diff --git a/.github/workflows/clang-cir-tests.yml b/.github/workflows/clang-cir-tests.yml
new file mode 100644
index 000000000000..3f42b141c4ab
--- /dev/null
+++ b/.github/workflows/clang-cir-tests.yml
@@ -0,0 +1,38 @@
+name: Clang CIR Tests
+
+permissions:
+  contents: read
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - 'main'
+    paths:
+      - 'clang/**'
+      - '.github/workflows/clang-cir-tests.yml'
+      - '.github/workflows/llvm-project-tests.yml'
+      - '!llvm/**'
+  pull_request:
+    branches:
+      - '**'
+    paths:
+      - 'clang/**'
+      - '.github/workflows/clang-cir-tests.yml'
+      - '.github/workflows/llvm-project-tests.yml'
+      - '!llvm/**'
+
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+
+jobs:
+  check_clang_cir:
+    name: Test clang-cir
+    uses: ./.github/workflows/llvm-project-tests.yml
+    with:
+      build_target: check-clang-cir
+      projects: clang;mlir
+      extra_cmake_args: -DCLANG_ENABLE_CIR=ON
diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml
index d7c21394ca48..2ec5a40abda9 100644
--- a/.github/workflows/libcxx-build-and-test.yaml
+++ b/.github/workflows/libcxx-build-and-test.yaml
@@ -22,9 +22,6 @@ on:
       - 'runtimes/**'
       - 'cmake/**'
       - '.github/workflows/libcxx-build-and-test.yaml'
-  schedule:
-    # Run nightly at 08:00 UTC (aka 00:00 Pacific, aka 03:00 Eastern)
-    - cron: '0 8 * * *'
 
 permissions:
   contents: read # Default everything to read-only
diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml
index 22357e5d99e4..85edacb08094 100644
--- a/.github/workflows/pr-code-format.yml
+++ b/.github/workflows/pr-code-format.yml
@@ -11,7 +11,7 @@ on:
 jobs:
   code_formatter:
     runs-on: ubuntu-latest
-    if: github.repository == 'llvm/llvm-project'
+    if: github.repository == 'llvm/clangir'
     steps:
       - name: Fetch LLVM sources
         uses: actions/checkout@v4
diff --git a/README.md b/README.md
index a9b29ecbc1a3..3dd79abc4b3e 100644
--- a/README.md
+++ b/README.md
@@ -1,44 +1,3 @@
-# The LLVM Compiler Infrastructure
+# ClangIR (CIR)
 
-[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/llvm/llvm-project/badge)](https://securityscorecards.dev/viewer/?uri=github.com/llvm/llvm-project)
-[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8273/badge)](https://www.bestpractices.dev/projects/8273)
-[![libc++](https://github.com/llvm/llvm-project/actions/workflows/libcxx-build-and-test.yaml/badge.svg?branch=main&event=schedule)](https://github.com/llvm/llvm-project/actions/workflows/libcxx-build-and-test.yaml?query=event%3Aschedule)
-
-Welcome to the LLVM project!
-
-This repository contains the source code for LLVM, a toolkit for the
-construction of highly optimized compilers, optimizers, and run-time
-environments.
-
-The LLVM project has multiple components. The core of the project is
-itself called "LLVM". This contains all of the tools, libraries, and header
-files needed to process intermediate representations and convert them into
-object files. Tools include an assembler, disassembler, bitcode analyzer, and
-bitcode optimizer.
-
-C-like languages use the [Clang](https://clang.llvm.org/) frontend. This
-component compiles C, C++, Objective-C, and Objective-C++ code into LLVM bitcode
--- and from there into object files, using LLVM.
-
-Other components include:
-the [libc++ C++ standard library](https://libcxx.llvm.org),
-the [LLD linker](https://lld.llvm.org), and more.
-
-## Getting the Source Code and Building LLVM
-
-Consult the
-[Getting Started with LLVM](https://llvm.org/docs/GettingStarted.html#getting-the-source-code-and-building-llvm)
-page for information on building and running LLVM.
-
-For information on how to contribute to the LLVM project, please take a look at
-the [Contributing to LLVM](https://llvm.org/docs/Contributing.html) guide.
-
-## Getting in touch
-
-Join the [LLVM Discourse forums](https://discourse.llvm.org/), [Discord
-chat](https://discord.gg/xS7Z362),
-[LLVM Office Hours](https://llvm.org/docs/GettingInvolved.html#office-hours) or
-[Regular sync-ups](https://llvm.org/docs/GettingInvolved.html#online-sync-ups).
-
-The LLVM project has adopted a [code of conduct](https://llvm.org/docs/CodeOfConduct.html) for
-participants to all modes of communication within the project.
+Check https://clangir.org for general information, build instructions and documentation.
diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt
index 430ea4cdbb38..e52f9f9875c5 100644
--- a/clang-tools-extra/clang-tidy/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/CMakeLists.txt
@@ -23,7 +23,7 @@ add_clang_library(clangTidy
   ClangSACheckers
   omp_gen
   ClangDriverOptions
-  )
+)
 
 clang_target_link_libraries(clangTidy
   PRIVATE
@@ -77,6 +77,9 @@ add_subdirectory(performance)
 add_subdirectory(portability)
 add_subdirectory(readability)
 add_subdirectory(zircon)
+if(CLANG_ENABLE_CIR)
+  add_subdirectory(cir)
+endif()
 set(ALL_CLANG_TIDY_CHECKS
   clangTidyAndroidModule
   clangTidyAbseilModule
@@ -105,6 +108,9 @@ set(ALL_CLANG_TIDY_CHECKS
 if(CLANG_TIDY_ENABLE_STATIC_ANALYZER)
   list(APPEND ALL_CLANG_TIDY_CHECKS clangTidyMPIModule)
 endif()
+if(CLANG_ENABLE_CIR)
+  list(APPEND ALL_CLANG_TIDY_CHECKS clangTidyCIRModule)
+endif()
 set(ALL_CLANG_TIDY_CHECKS ${ALL_CLANG_TIDY_CHECKS} PARENT_SCOPE)
 
 # Other subtargets. These may reference ALL_CLANG_TIDY_CHECKS
diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp b/clang-tools-extra/clang-tidy/ClangTidy.cpp
index 1cd7cdd10bc2..a40832d7238a 100644
--- a/clang-tools-extra/clang-tidy/ClangTidy.cpp
+++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp
@@ -20,7 +20,10 @@
 #include "ClangTidyModuleRegistry.h"
 #include "ClangTidyProfiling.h"
 #include "ExpandModularHeadersPPCallbacks.h"
+#ifndef CLANG_TIDY_CONFIG_H
 #include "clang-tidy-config.h"
+#endif
+#include "utils/OptionsUtils.h"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/Format/Format.h"
@@ -466,6 +469,7 @@ ClangTidyASTConsumerFactory::createASTConsumer(
     Consumers.push_back(std::move(AnalysisConsumer));
   }
 #endif // CLANG_TIDY_ENABLE_STATIC_ANALYZER
+
   return std::make_unique<ClangTidyASTConsumer>(
       std::move(Consumers), std::move(Profiling), std::move(Finder),
       std::move(Checks));
diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
index 97e16a12febd..7cd7f71f0db2 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
+++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
@@ -20,6 +20,18 @@
 #include "llvm/Support/Regex.h"
 #include <optional>
 
+// Workaround unitests not needing to change unittests to require
+// "clang-tidy-config.h" being generated.
+#if __has_include("clang-tidy-config.h")
+#ifndef CLANG_TIDY_CONFIG_H
+#include "clang-tidy-config.h"
+#endif
+#endif
+
+#if CLANG_ENABLE_CIR
+#include "clang/Basic/CodeGenOptions.h"
+#endif
+
 namespace clang {
 
 class ASTContext;
@@ -137,6 +149,12 @@ class ClangTidyContext {
   /// Gets the language options from the AST context.
   const LangOptions &getLangOpts() const { return LangOpts; }
 
+#if CLANG_ENABLE_CIR
+  /// Get and set CodeGenOpts
+  CodeGenOptions &getCodeGenOpts() { return CodeGenOpts; };
+  void setCodeGenOpts(CodeGenOptions &CGO) { CodeGenOpts = CGO; }
+#endif
+
   /// Returns the name of the clang-tidy check which produced this
   /// diagnostic ID.
   std::string getCheckName(unsigned DiagnosticID) const;
@@ -242,6 +260,10 @@ class ClangTidyContext {
 
   LangOptions LangOpts;
 
+#if CLANG_ENABLE_CIR
+  CodeGenOptions CodeGenOpts;
+#endif
+
   ClangTidyStats Stats;
 
   std::string CurrentBuildDirectory;
diff --git a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h
index adde9136ff1d..6d3ffa743460 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h
+++ b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h
@@ -9,7 +9,9 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CLANGTIDYFORCELINKER_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CLANGTIDYFORCELINKER_H
 
+#ifndef CLANG_TIDY_CONFIG_H
 #include "clang-tidy-config.h"
+#endif
 #include "llvm/Support/Compiler.h"
 
 namespace clang::tidy {
@@ -137,6 +139,13 @@ extern volatile int ZirconModuleAnchorSource;
 static int LLVM_ATTRIBUTE_UNUSED ZirconModuleAnchorDestination =
     ZirconModuleAnchorSource;
 
+#if CLANG_ENABLE_CIR
+// This anchor is used to force the linker to link the CIRModule.
+extern volatile int CIRModuleAnchorSource;
+static int LLVM_ATTRIBUTE_UNUSED CIRModuleAnchorDestination =
+    CIRModuleAnchorSource;
+#endif
+
 } // namespace clang::tidy
 
 #endif
diff --git a/clang-tools-extra/clang-tidy/cir-tidy/tool/CMakeLists.txt b/clang-tools-extra/clang-tidy/cir-tidy/tool/CMakeLists.txt
new file mode 100644
index 000000000000..f31eba82228e
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/cir-tidy/tool/CMakeLists.txt
@@ -0,0 +1,50 @@
+include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+set(LLVM_LINK_COMPONENTS
+  AllTargetsAsmParsers
+  AllTargetsDescs
+  AllTargetsInfos
+  FrontendOpenMP
+  support
+  )
+
+# Needed by LLVM's CMake checks because this file defines multiple targets.
+set(LLVM_OPTIONAL_SOURCES CIRTidyMain.cpp CIRTidyToolMain.cpp)
+
+add_clang_library(CIRTidyMain
+  CIRTidyMain.cpp
+
+  LINK_LIBS
+  CIRTidy
+  clangTidy
+  MLIRIR
+  ${ALL_CLANG_TIDY_CHECKS}
+  MLIRIR
+
+  DEPENDS
+  omp_gen
+  )
+
+clang_target_link_libraries(CIRTidyMain
+  PRIVATE
+  clangBasic
+  clangTooling
+  clangToolingCore
+  )
+
+add_clang_tool(cir-tidy
+  CIRTidyToolMain.cpp
+  )
+add_dependencies(cir-tidy
+  clang-resource-headers
+  )
+
+target_link_libraries(cir-tidy
+  PRIVATE
+  CIRTidyMain
+  CIRTidy
+  )
+
+install(TARGETS cir-tidy
+  DESTINATION bin
+  )
diff --git a/clang-tools-extra/clang-tidy/cir/CIRTidyModule.cpp b/clang-tools-extra/clang-tidy/cir/CIRTidyModule.cpp
new file mode 100644
index 000000000000..0c54cde3d0f0
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/cir/CIRTidyModule.cpp
@@ -0,0 +1,34 @@
+//===--- CIRTidyModule.cpp - clang-tidy -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../ClangTidy.h"
+#include "../ClangTidyModule.h"
+#include "../ClangTidyModuleRegistry.h"
+#include "Lifetime.h"
+
+namespace clang::tidy {
+namespace cir {
+
+class CIRModule : public ClangTidyModule {
+public:
+  void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
+    CheckFactories.registerCheck<Lifetime>("cir-lifetime-check");
+  }
+};
+
+} // namespace cir
+
+// Register the CIRTidyModule using this statically initialized variable.
+static ClangTidyModuleRegistry::Add<cir::CIRModule>
+    X("cir-module", "Adds ClangIR (CIR) based clang-tidy checks.");
+
+// This anchor is used to force the linker to link in the generated object file
+// and thus register the CIRModule.
+volatile int CIRModuleAnchorSource = 0;
+
+} // namespace clang::tidy
diff --git a/clang-tools-extra/clang-tidy/cir/CMakeLists.txt b/clang-tools-extra/clang-tidy/cir/CMakeLists.txt
new file mode 100644
index 000000000000..0b892f332790
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/cir/CMakeLists.txt
@@ -0,0 +1,57 @@
+set(LLVM_LINK_COMPONENTS
+  FrontendOpenMP
+  Support
+  )
+
+include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+include_directories( ${LLVM_MAIN_SRC_DIR}/../mlir/include )
+include_directories( ${CMAKE_BINARY_DIR}/tools/mlir/include )
+
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
+add_clang_library(clangTidyCIRModule
+  Lifetime.cpp
+  CIRTidyModule.cpp
+
+  LINK_LIBS
+  clangASTMatchers
+  clangCIR
+  clangFrontend
+  clangSerialization
+  clangTidy
+  clangTidyUtils
+  ${dialect_libs}
+  MLIRCIR
+  MLIRCIRTransforms
+  MLIRAffineToStandard
+  MLIRAnalysis
+  MLIRIR
+  MLIRLLVMCommonConversion
+  MLIRLLVMDialect
+  MLIRLLVMToLLVMIRTranslation
+  MLIRMemRefDialect
+  MLIRMemRefToLLVM
+  MLIRParser
+  MLIRPass
+  MLIRSideEffectInterfaces
+  MLIRSCFToControlFlow
+  MLIRFuncToLLVM
+  MLIRSupport
+  MLIRMemRefDialect
+  MLIRTargetLLVMIRExport
+  MLIRTransforms
+
+  DEPENDS
+  omp_gen
+  )
+
+clang_target_link_libraries(clangTidyCIRModule
+  PRIVATE
+  clangAnalysis
+  clangAST
+  clangASTMatchers
+  clangBasic
+  clangLex
+  clangTooling
+  clangToolingCore
+  )
diff --git a/clang-tools-extra/clang-tidy/cir/Lifetime.cpp b/clang-tools-extra/clang-tidy/cir/Lifetime.cpp
new file mode 100644
index 000000000000..c349febed734
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/cir/Lifetime.cpp
@@ -0,0 +1,197 @@
+//===--- Lifetime.cpp - clang-tidy ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Lifetime.h"
+#include "../utils/OptionsUtils.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/DeclGroup.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/CIR/CIRGenerator.h"
+#include "clang/CIR/Dialect/Passes.h"
+#include "clang/Format/Format.h"
+#include "clang/Frontend/ASTConsumers.h"
+#include "clang/Tooling/FixIt.h"
+#include <algorithm>
+
+using namespace clang::ast_matchers;
+using namespace clang;
+
+namespace clang::tidy::cir {
+
+Lifetime::Lifetime(StringRef Name, ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context), codeGenOpts(Context->getCodeGenOpts()),
+      cirOpts{} {
+  auto OV = OptionsView(Name, Context->getOptions().CheckOptions, Context);
+  codeGenOpts.ClangIRBuildDeferredThreshold =
+      OV.get("CodeGenBuildDeferredThreshold", 500U);
+  codeGenOpts.ClangIRSkipFunctionsFromSystemHeaders =
+      OV.get("CodeGenSkipFunctionsFromSystemHeaders", false);
+
+  cirOpts.RemarksList =
+      utils::options::parseStringList(OV.get("RemarksList", ""));
+  cirOpts.HistoryList =
+      utils::options::parseStringList(OV.get("HistoryList", "all"));
+  cirOpts.HistLimit = OV.get("HistLimit", 1U);
+}
+
+void Lifetime::registerMatchers(MatchFinder *Finder) {
+  Finder->addMatcher(translationUnitDecl(), this);
+}
+
+void Lifetime::setupAndRunClangIRLifetimeChecker(ASTContext &astCtx) {
+  auto *TU = astCtx.getTranslationUnitDecl();
+  // This is the hook used to build clangir and run the lifetime checker
+  // pass. Perhaps in the future it's possible to come up with a better
+  // integration story.
+
+  // Create an instance of CIRGenerator and use it to build CIR, followed by
+  // MLIR module verification.
+  std::unique_ptr<::cir::CIRGenerator> Gen =
+      std::make_unique<::cir::CIRGenerator>(astCtx.getDiagnostics(), nullptr,
+                                            codeGenOpts);
+  Gen->Initialize(astCtx);
+  Gen->HandleTopLevelDecl(DeclGroupRef(TU));
+  Gen->HandleTranslationUnit(astCtx);
+  Gen->verifyModule();
+
+  mlir::ModuleOp mlirMod = Gen->getModule();
+  std::unique_ptr<mlir::MLIRContext> mlirCtx = Gen->takeContext();
+
+  mlir::OpPrintingFlags flags;
+  flags.enableDebugInfo(/*prettyForm=*/false);
+
+  clang::SourceManager &clangSrcMgr = astCtx.getSourceManager();
+  FileID MainFileID = clangSrcMgr.getMainFileID();
+
+  // Do some big dance with diagnostics here: hijack clang's diagnostics with
+  // MLIR one.
+  llvm::MemoryBufferRef MainFileBuf = clangSrcMgr.getBufferOrFake(MainFileID);
+  std::unique_ptr<llvm::MemoryBuffer> FileBuf =
+      llvm::MemoryBuffer::getMemBuffer(MainFileBuf);
+
+  llvm::SourceMgr llvmSrcMgr;
+  llvmSrcMgr.AddNewSourceBuffer(std::move(FileBuf), llvm::SMLoc());
+
+  class CIRTidyDiagnosticHandler : public mlir::SourceMgrDiagnosticHandler {
+    ClangTidyCheck &tidyCheck;
+    clang::SourceManager &clangSrcMgr;
+
+    clang::SourceLocation getClangFromFileLineCol(mlir::FileLineColLoc loc) {
+      clang::SourceLocation clangLoc;
+      FileManager &fileMgr = clangSrcMgr.getFileManager();
+      assert(loc && "not a valid mlir::FileLineColLoc");
+      // The column and line may be zero to represent unknown column
+      // and/or unknown line/column information.
+      if (loc.getLine() == 0 || loc.getColumn() == 0) {
+        llvm_unreachable("How should we workaround this?");
+        return clangLoc;
+      }
+      if (auto FE = fileMgr.getFile(loc.getFilename())) {
+        return clangSrcMgr.translateFileLineCol(*FE, loc.getLine(),
+                                                loc.getColumn());
+      }
+      llvm_unreachable("location doesn't map to a file?");
+    }
+
+    clang::SourceLocation getClangSrcLoc(mlir::Location loc) {
+      // Direct maps into a clang::SourceLocation.
+      if (auto fileLoc = dyn_cast<mlir::FileLineColLoc>(loc)) {
+        return getClangFromFileLineCol(fileLoc);
+      }
+
+      // FusedLoc needs to be decomposed but the canonical one
+      // is the first location, we handle source ranges somewhere
+      // else.
+      if (auto fileLoc = dyn_cast<mlir::FusedLoc>(loc)) {
+        auto locArray = fileLoc.getLocations();
+        assert(locArray.size() > 0 && "expected multiple locs");
+        return getClangFromFileLineCol(
+            dyn_cast<mlir::FileLineColLoc>(locArray[0]));
+      }
+
+      // Many loc styles are yet to be handled.
+      if (auto fileLoc = dyn_cast<mlir::UnknownLoc>(loc)) {
+        llvm_unreachable("mlir::UnknownLoc not implemented!");
+      }
+      if (auto fileLoc = dyn_cast<mlir::CallSiteLoc>(loc)) {
+        llvm_unreachable("mlir::CallSiteLoc not implemented!");
+      }
+      llvm_unreachable("Unknown location style");
+    }
+
+    clang::DiagnosticIDs::Level
+    translateToClangDiagLevel(const mlir::DiagnosticSeverity &sev) {
+      switch (sev) {
+      case mlir::DiagnosticSeverity::Note:
+        return clang::DiagnosticIDs::Level::Note;
+      case mlir::DiagnosticSeverity::Warning:
+        return clang::DiagnosticIDs::Level::Warning;
+      case mlir::DiagnosticSeverity::Error:
+        return clang::DiagnosticIDs::Level::Error;
+      case mlir::DiagnosticSeverity::Remark:
+        return clang::DiagnosticIDs::Level::Remark;
+      }
+      llvm_unreachable("should not get here!");
+    }
+
+  public:
+    void emitClangTidyDiagnostic(mlir::Diagnostic &diag) {
+      auto clangBeginLoc = getClangSrcLoc(diag.getLocation());
+      tidyCheck.diag(clangBeginLoc, diag.str(),
+                     translateToClangDiagLevel(diag.getSeverity()));
+      for (const auto &note : diag.getNotes()) {
+        auto clangNoteBeginLoc = getClangSrcLoc(note.getLocation());
+        tidyCheck.diag(clangNoteBeginLoc, note.str(),
+                       translateToClangDiagLevel(note.getSeverity()));
+      }
+    }
+
+    CIRTidyDiagnosticHandler(llvm::SourceMgr &mgr, mlir::MLIRContext *ctx,
+                             ClangTidyCheck &tidyCheck,
+                             clang::SourceManager &clangMgr,
+                             ShouldShowLocFn &&shouldShowLocFn = {})
+        : SourceMgrDiagnosticHandler(mgr, ctx, llvm::errs(),
+                                     std::move(shouldShowLocFn)),
+          tidyCheck(tidyCheck), clangSrcMgr(clangMgr) {
+      setHandler(
+          [this](mlir::Diagnostic &diag) { emitClangTidyDiagnostic(diag); });
+    }
+    ~CIRTidyDiagnosticHandler() = default;
+  };
+
+  // Use a custom diagnostic handler that can allow both regular printing
+  // to stderr but also populates clang-tidy context with diagnostics (and
+  // allow for instance, diagnostics to be later converted to YAML).
+  CIRTidyDiagnosticHandler sourceMgrHandler(llvmSrcMgr, mlirCtx.get(), *this,
+                                            clangSrcMgr);
+
+  mlir::PassManager pm(mlirCtx.get());
+
+  // Add pre-requisite passes to the pipeline
+  pm.addPass(mlir::createCIRSimplifyPass());
+
+  // Insert the lifetime checker.
+  pm.addPass(mlir::createLifetimeCheckPass(
+      cirOpts.RemarksList, cirOpts.HistoryList, cirOpts.HistLimit, &astCtx));
+
+  bool passResult = !mlir::failed(pm.run(mlirMod));
+  if (!passResult)
+    llvm::report_fatal_error(
+        "The pass manager failed to run pass on the module!");
+}
+
+void Lifetime::check(const MatchFinder::MatchResult &Result) {
+  setupAndRunClangIRLifetimeChecker(*Result.Context);
+}
+
+} // namespace clang::tidy::cir
diff --git a/clang-tools-extra/clang-tidy/cir/Lifetime.h b/clang-tools-extra/clang-tidy/cir/Lifetime.h
new file mode 100644
index 000000000000..fb65bbf5be80
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/cir/Lifetime.h
@@ -0,0 +1,35 @@
+//===--- Lifetime.h - clang-tidy --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CIR_LIFETIME_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CIR_LIFETIME_H
+
+#include "../ClangTidyCheck.h"
+#include <optional>
+
+namespace clang::tidy::cir {
+
+struct CIROpts {
+  std::vector<StringRef> RemarksList;
+  std::vector<StringRef> HistoryList;
+  unsigned HistLimit;
+};
+class Lifetime : public ClangTidyCheck {
+public:
+  Lifetime(StringRef Name, ClangTidyContext *Context);
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+  void setupAndRunClangIRLifetimeChecker(ASTContext &astCtx);
+
+  CodeGenOptions codeGenOpts;
+  CIROpts cirOpts;
+};
+
+} // namespace clang::tidy::cir
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CIR_LIFETIME_H
diff --git a/clang-tools-extra/clang-tidy/clang-tidy-config.h.cmake b/clang-tools-extra/clang-tidy/clang-tidy-config.h.cmake
index f4d1a4b38004..7397c1a65249 100644
--- a/clang-tools-extra/clang-tidy/clang-tidy-config.h.cmake
+++ b/clang-tools-extra/clang-tidy/clang-tidy-config.h.cmake
@@ -7,4 +7,6 @@
 
 #cmakedefine01 CLANG_TIDY_ENABLE_STATIC_ANALYZER
 
+#cmakedefine01 CLANG_ENABLE_CIR
+
 #endif
diff --git a/clang-tools-extra/test/CMakeLists.txt b/clang-tools-extra/test/CMakeLists.txt
index 0953ff2531e1..74c7b4aaa135 100644
--- a/clang-tools-extra/test/CMakeLists.txt
+++ b/clang-tools-extra/test/CMakeLists.txt
@@ -10,6 +10,7 @@ set(CLANG_TOOLS_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/..")
 llvm_canonicalize_cmake_booleans(
   CLANG_TIDY_ENABLE_STATIC_ANALYZER
   CLANG_PLUGIN_SUPPORT
+  CLANG_ENABLE_CIR
   LLVM_INSTALL_TOOLCHAIN_ONLY
   )
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/cir/lifetime-basic.cpp b/clang-tools-extra/test/clang-tidy/checkers/cir/lifetime-basic.cpp
new file mode 100644
index 000000000000..c65781190663
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/cir/lifetime-basic.cpp
@@ -0,0 +1,39 @@
+// RUN: %check_clang_tidy %s cir-lifetime-check %t \
+// RUN: --export-fixes=%t.yaml \
+// RUN: -config='{CheckOptions: \
+// RUN:  [{key: cir-lifetime-check.RemarksList, value: "all"}, \
+// RUN:   {key: cir-lifetime-check.HistLimit, value: "1"}, \
+// RUN:   {key: cir-lifetime-check.CodeGenBuildDeferredThreshold, value: "500"}, \
+// RUN:   {key: cir-lifetime-check.CodeGenSkipFunctionsFromSystemHeaders, value: "false"}, \
+// RUN:   {key: cir-lifetime-check.HistoryList, value: "invalid;null"}]}' \
+// RUN: --
+// RUN: FileCheck -input-file=%t.yaml -check-prefix=CHECK-YAML %s
+
+int *p0() {
+  int *p = nullptr;
+  {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }
+  *p = 42; // CHECK-MESSAGES: :[[@LINE]]:4: warning: use of invalid pointer 'p'
+  return p;
+}
+
+// CHECK-YAML:    DiagnosticMessage:
+// CHECK-YAML:      Message:         'pset => { x }'
+// CHECK-YAML:      Replacements:    []
+// CHECK-YAML:    Level:           Remark
+
+// CHECK-YAML:    DiagnosticMessage:
+// CHECK-YAML:      Message:         'pset => { invalid }'
+// CHECK-YAML:      Replacements:    []
+// CHECK-YAML:    Level:           Remark
+
+// CHECK-YAML: DiagnosticMessage:
+// CHECK-YAML:   Message:         'use of invalid pointer ''p'''
+// CHECK-YAML:   Replacements:    []
+// CHECK-YAML: Notes:
+// CHECK-YAML:   - Message:         'pointee ''x'' invalidated at end of scope'
+// CHECK-YAML:     Replacements:    []
+// CHECK-YAML: Level:           Warning
\ No newline at end of file
diff --git a/clang-tools-extra/test/clang-tidy/checkers/cir/lit.local.cfg b/clang-tools-extra/test/clang-tidy/checkers/cir/lit.local.cfg
new file mode 100644
index 000000000000..e479c3e74cb6
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/cir/lit.local.cfg
@@ -0,0 +1,2 @@
+if not config.clang_enable_cir:
+  config.unsupported = True
\ No newline at end of file
diff --git a/clang-tools-extra/test/lit.site.cfg.py.in b/clang-tools-extra/test/lit.site.cfg.py.in
index e6503a4c097c..fb3b1f675a20 100644
--- a/clang-tools-extra/test/lit.site.cfg.py.in
+++ b/clang-tools-extra/test/lit.site.cfg.py.in
@@ -11,6 +11,7 @@ config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.host_triple = "@LLVM_HOST_TRIPLE@"
 config.clang_tidy_staticanalyzer = @CLANG_TIDY_ENABLE_STATIC_ANALYZER@
 config.has_plugins = @CLANG_PLUGIN_SUPPORT@
+config.clang_enable_cir = @CLANG_ENABLE_CIR@
 # Support substitution of the tools and libs dirs with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
 config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index e3f6da4a84f6..3d95a201aae4 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -453,6 +453,16 @@ CODEGENOPT(CtorDtorReturnThis, 1, 0)
 /// FIXME: Make DebugOptions its own top-level .def file.
 #include "DebugOptions.def"
 
+/// ClangIR specific (internal): limits recursion depth for buildDeferred()
+/// calls. This helps incremental progress while building large C++ TUs, once
+/// CIRGen is mature we should probably remove it.
+VALUE_CODEGENOPT(ClangIRBuildDeferredThreshold, 32, 500)
+
+/// ClangIR specific (internal): Only build deferred functions not coming from
+/// system headers. This helps incremental progress while building large C++
+/// TUs, once CIRGen is mature we should probably remove it.
+CODEGENOPT(ClangIRSkipFunctionsFromSystemHeaders, 1, 0)
+
 #undef CODEGENOPT
 #undef ENUM_CODEGENOPT
 #undef VALUE_CODEGENOPT
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 1ca2cb85565a..0adaef2b04c6 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -349,6 +349,8 @@ def err_drv_incompatible_omp_arch : Error<"OpenMP target architecture '%0' point
 def err_drv_omp_host_ir_file_not_found : Error<
   "provided host compiler IR file '%0' is required to generate code for OpenMP "
   "target regions but cannot be found">;
+def err_drv_cir_pass_opt_parsing : Error<
+  "clangir pass option '%0' not recognized">;
 def err_drv_omp_host_target_not_supported : Error<
   "target '%0' is not a supported OpenMP host target">;
 def err_drv_expecting_fopenmp_with_fopenmp_targets : Error<
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 6dd6b5614f44..5e65b7c05475 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -426,6 +426,8 @@ LANGOPT(RetainCommentsFromSystemHeaders, 1, 0, "retain documentation comments fr
 LANGOPT(APINotes, 1, 0, "use external API notes")
 LANGOPT(APINotesModules, 1, 0, "use module-based external API notes")
 
+LANGOPT(CIRWarnings, 1, 0, "emit warnings with ClangIR")
+
 LANGOPT(SanitizeAddressFieldPadding, 2, 0, "controls how aggressive is ASan "
                                            "field padding (0: none, 1:least "
                                            "aggressive, 2: more aggressive)")
diff --git a/clang/include/clang/CIR/.clang-tidy b/clang/include/clang/CIR/.clang-tidy
new file mode 100644
index 000000000000..dfbcf9ccf7c2
--- /dev/null
+++ b/clang/include/clang/CIR/.clang-tidy
@@ -0,0 +1,61 @@
+InheritParentConfig: true
+Checks: >
+        -misc-const-correctness,
+        bugprone-argument-comment,
+        bugprone-assert-side-effect,
+        bugprone-branch-clone,
+        bugprone-copy-constructor-init,
+        bugprone-dangling-handle,
+        bugprone-dynamic-static-initializers,
+        bugprone-macro-parentheses,
+        bugprone-macro-repeated-side-effects,
+        bugprone-misplaced-widening-cast,
+        bugprone-move-forwarding-reference,
+        bugprone-multiple-statement-macro,
+        bugprone-suspicious-semicolon,
+        bugprone-swapped-arguments,
+        bugprone-terminating-continue,
+        bugprone-unused-raii,
+        bugprone-unused-return-value,
+        misc-redundant-expression,
+        misc-static-assert,
+        misc-unused-using-decls,
+        modernize-use-bool-literals,
+        modernize-loop-convert,
+        modernize-make-unique,
+        modernize-raw-string-literal,
+        modernize-use-equals-default,
+        modernize-use-default-member-init,
+        modernize-use-emplace,
+        modernize-use-nullptr,
+        modernize-use-override,
+        modernize-use-using,
+        performance-for-range-copy,
+        performance-implicit-conversion-in-loop,
+        performance-inefficient-algorithm,
+        performance-inefficient-vector-operation,
+        performance-move-const-arg,
+        performance-no-automatic-move,
+        performance-trivially-destructible,
+        performance-unnecessary-copy-initialization,
+        performance-unnecessary-value-param,
+        readability-avoid-const-params-in-decls,
+        readability-const-return-type,
+        readability-container-size-empty,
+        readability-identifier-naming,
+        readability-inconsistent-declaration-parameter-name,
+        readability-misleading-indentation,
+        readability-redundant-control-flow,
+        readability-redundant-smartptr-get,
+        readability-simplify-boolean-expr,
+        readability-simplify-subscript-expr,
+        readability-use-anyofallof
+
+
+CheckOptions:
+  - key:             readability-identifier-naming.MemberCase
+    value:           camelBack
+  - key:             readability-identifier-naming.ParameterCase
+    value:           camelBack
+  - key:             readability-identifier-naming.VariableCase
+    value:           camelBack
diff --git a/clang/include/clang/CIR/ABIArgInfo.h b/clang/include/clang/CIR/ABIArgInfo.h
new file mode 100644
index 000000000000..d330b2c3e24d
--- /dev/null
+++ b/clang/include/clang/CIR/ABIArgInfo.h
@@ -0,0 +1,272 @@
+//==-- ABIArgInfo.h - Abstract info regarding ABI-specific arguments -------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines ABIArgInfo and associated types used by CIR to track information
+// regarding ABI-coerced types for function arguments and return values. This
+// was moved to the common library as it might be used by both CIRGen and
+// passes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CIR_COMMON_ABIARGINFO_H
+#define CIR_COMMON_ABIARGINFO_H
+
+#include "mlir/IR/Types.h"
+#include "clang/AST/Type.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include <cstdint>
+
+namespace cir {
+
+/// Helper class to encapsulate information about how a specific C
+/// type should be passed to or returned from a function.
+class ABIArgInfo {
+public:
+  enum Kind : uint8_t {
+    /// Pass the argument directly using the normal converted CIR type,
+    /// or by coercing to another specified type stored in 'CoerceToType'). If
+    /// an offset is specified (in UIntData), then the argument passed is offset
+    /// by some number of bytes in the memory representation. A dummy argument
+    /// is emitted before the real argument if the specified type stored in
+    /// "PaddingType" is not zero.
+    Direct,
+
+    /// Valid only for integer argument types. Same as 'direct' but
+    /// also emit a zer/sign extension attribute.
+    Extend,
+
+    /// Pass the argument indirectly via a hidden pointer with the
+    /// specified alignment (0 indicates default alignment) and address space.
+    Indirect,
+
+    /// Similar to Indirect, but the pointer may be to an
+    /// object that is otherwise referenced. The object is known to not be
+    /// modified through any other references for the duration of the call, and
+    /// the callee must not itself modify the object. Because C allows parameter
+    /// variables to be modified and guarantees that they have unique addresses,
+    /// the callee must defensively copy the object into a local variable if it
+    /// might be modified or its address might be compared. Since those are
+    /// uncommon, in principle this convention allows programs to avoid copies
+    /// in more situations. However, it may introduce *extra* copies if the
+    /// callee fails to prove that a copy is unnecessary and the caller
+    /// naturally produces an unaliased object for the argument.
+    IndirectAliased,
+
+    /// Ignore the argument (treat as void). Useful for void and empty
+    /// structs.
+    Ignore,
+
+    /// Only valid for aggregate argument types. The structure should
+    /// be expanded into consecutive arguments for its constituent fields.
+    /// Currently expand is only allowed on structures whose fields are all
+    /// scalar types or are themselves expandable types.
+    Expand,
+
+    /// Only valid for aggregate argument types. The structure
+    /// should be expanded into consecutive arguments corresponding to the
+    /// non-array elements of the type stored in CoerceToType.
+    /// Array elements in the type are assumed to be padding and skipped.
+    CoerceAndExpand,
+
+    // TODO: translate this idea to CIR! Define it for now just to ensure that
+    // we can assert it not being used
+    InAlloca,
+    KindFirst = Direct,
+    KindLast = InAlloca
+  };
+
+private:
+  mlir::Type TypeData; // canHaveCoerceToType();
+  union {
+    mlir::Type PaddingType;                 // canHavePaddingType()
+    mlir::Type UnpaddedCoerceAndExpandType; // isCoerceAndExpand()
+  };
+  struct DirectAttrInfo {
+    unsigned Offset;
+    unsigned Align;
+  };
+  struct IndirectAttrInfo {
+    unsigned Align;
+    unsigned AddrSpace;
+  };
+  union {
+    DirectAttrInfo DirectAttr;     // isDirect() || isExtend()
+    IndirectAttrInfo IndirectAttr; // isIndirect()
+    unsigned AllocaFieldIndex;     // isInAlloca()
+  };
+  Kind TheKind;
+  bool InReg : 1;          // isDirect() || isExtend() || isIndirect()
+  bool CanBeFlattened : 1; // isDirect()
+  bool SignExt : 1;        // isExtend()
+
+  bool canHavePaddingType() const {
+    return isDirect() || isExtend() || isIndirect() || isIndirectAliased() ||
+           isExpand();
+  }
+
+  void setPaddingType(mlir::Type T) {
+    assert(canHavePaddingType());
+    PaddingType = T;
+  }
+
+public:
+  ABIArgInfo(Kind K = Direct)
+      : TypeData(nullptr), PaddingType(nullptr), DirectAttr{0, 0}, TheKind(K),
+        InReg(false), CanBeFlattened(false), SignExt(false) {}
+
+  static ABIArgInfo getDirect(mlir::Type T = nullptr, unsigned Offset = 0,
+                              mlir::Type Padding = nullptr,
+                              bool CanBeFlattened = true, unsigned Align = 0) {
+    auto AI = ABIArgInfo(Direct);
+    AI.setCoerceToType(T);
+    AI.setPaddingType(Padding);
+    AI.setDirectOffset(Offset);
+    AI.setDirectAlign(Align);
+    AI.setCanBeFlattened(CanBeFlattened);
+    return AI;
+  }
+
+  static ABIArgInfo getSignExtend(clang::QualType Ty, mlir::Type T = nullptr) {
+    assert(Ty->isIntegralOrEnumerationType() && "Unexpected QualType");
+    auto AI = ABIArgInfo(Extend);
+    AI.setCoerceToType(T);
+    AI.setPaddingType(nullptr);
+    AI.setDirectOffset(0);
+    AI.setDirectAlign(0);
+    AI.setSignExt(true);
+    return AI;
+  }
+  static ABIArgInfo getSignExtend(mlir::Type Ty, mlir::Type T = nullptr) {
+    // NOTE(cir): Enumerations are IntTypes in CIR.
+    auto AI = ABIArgInfo(Extend);
+    AI.setCoerceToType(T);
+    AI.setPaddingType(nullptr);
+    AI.setDirectOffset(0);
+    AI.setDirectAlign(0);
+    AI.setSignExt(true);
+    return AI;
+  }
+
+  static ABIArgInfo getZeroExtend(clang::QualType Ty, mlir::Type T = nullptr) {
+    assert(Ty->isIntegralOrEnumerationType() && "Unexpected QualType");
+    auto AI = ABIArgInfo(Extend);
+    AI.setCoerceToType(T);
+    AI.setPaddingType(nullptr);
+    AI.setDirectOffset(0);
+    AI.setDirectAlign(0);
+    AI.setSignExt(false);
+    return AI;
+  }
+  static ABIArgInfo getZeroExtend(mlir::Type Ty, mlir::Type T = nullptr) {
+    // NOTE(cir): Enumerations are IntTypes in CIR.
+    assert(mlir::isa<mlir::cir::IntType>(Ty) ||
+           mlir::isa<mlir::cir::BoolType>(Ty));
+    auto AI = ABIArgInfo(Extend);
+    AI.setCoerceToType(T);
+    AI.setPaddingType(nullptr);
+    AI.setDirectOffset(0);
+    AI.setDirectAlign(0);
+    AI.setSignExt(false);
+    return AI;
+  }
+
+  // ABIArgInfo will record the argument as being extended based on the sign of
+  // it's type.
+  static ABIArgInfo getExtend(clang::QualType Ty, mlir::Type T = nullptr) {
+    assert(Ty->isIntegralOrEnumerationType() && "Unexpected QualType");
+    if (Ty->hasSignedIntegerRepresentation())
+      return getSignExtend(Ty, T);
+    return getZeroExtend(Ty, T);
+  }
+  static ABIArgInfo getExtend(mlir::Type Ty, mlir::Type T = nullptr) {
+    // NOTE(cir): The original can apply this method on both integers and
+    // enumerations, but in CIR, these two types are one and the same. Booleans
+    // will also fall into this category, but they have their own type.
+    if (mlir::isa<mlir::cir::IntType>(Ty) &&
+        mlir::cast<mlir::cir::IntType>(Ty).isSigned())
+      return getSignExtend(mlir::cast<mlir::cir::IntType>(Ty), T);
+    return getZeroExtend(Ty, T);
+  }
+
+  static ABIArgInfo getIgnore() { return ABIArgInfo(Ignore); }
+
+  Kind getKind() const { return TheKind; }
+  bool isDirect() const { return TheKind == Direct; }
+  bool isInAlloca() const { return TheKind == InAlloca; }
+  bool isExtend() const { return TheKind == Extend; }
+  bool isIndirect() const { return TheKind == Indirect; }
+  bool isIndirectAliased() const { return TheKind == IndirectAliased; }
+  bool isExpand() const { return TheKind == Expand; }
+  bool isCoerceAndExpand() const { return TheKind == CoerceAndExpand; }
+
+  bool isSignExt() const {
+    assert(isExtend() && "Invalid kind!");
+    return SignExt;
+  }
+  void setSignExt(bool SExt) {
+    assert(isExtend() && "Invalid kind!");
+    SignExt = SExt;
+  }
+
+  bool getInReg() const {
+    assert((isDirect() || isExtend() || isIndirect()) && "Invalid kind!");
+    return InReg;
+  }
+  void setInReg(bool IR) {
+    assert((isDirect() || isExtend() || isIndirect()) && "Invalid kind!");
+    InReg = IR;
+  }
+
+  bool canHaveCoerceToType() const {
+    return isDirect() || isExtend() || isCoerceAndExpand();
+  }
+
+  // Direct/Extend accessors
+  unsigned getDirectOffset() const {
+    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    return DirectAttr.Offset;
+  }
+
+  void setDirectOffset(unsigned Offset) {
+    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    DirectAttr.Offset = Offset;
+  }
+
+  void setDirectAlign(unsigned Align) {
+    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    DirectAttr.Align = Align;
+  }
+
+  void setCanBeFlattened(bool Flatten) {
+    assert(isDirect() && "Invalid kind!");
+    CanBeFlattened = Flatten;
+  }
+
+  bool getCanBeFlattened() const {
+    assert(isDirect() && "Invalid kind!");
+    return CanBeFlattened;
+  }
+
+  mlir::Type getPaddingType() const {
+    return (canHavePaddingType() ? PaddingType : nullptr);
+  }
+
+  mlir::Type getCoerceToType() const {
+    assert(canHaveCoerceToType() && "Invalid kind!");
+    return TypeData;
+  }
+
+  void setCoerceToType(mlir::Type T) {
+    assert(canHaveCoerceToType() && "Invalid kind!");
+    TypeData = T;
+  }
+};
+
+} // namespace cir
+
+#endif // CIR_COMMON_ABIARGINFO_H
diff --git a/clang/include/clang/CIR/CIRGenerator.h b/clang/include/clang/CIR/CIRGenerator.h
new file mode 100644
index 000000000000..2dedb3b66385
--- /dev/null
+++ b/clang/include/clang/CIR/CIRGenerator.h
@@ -0,0 +1,107 @@
+//===- CIRGenerator.h - CIR Generation from Clang AST ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a simple interface to perform CIR generation from Clang
+// AST
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CIRGENERATOR_H_
+#define CLANG_CIRGENERATOR_H_
+
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/Decl.h"
+#include "clang/Basic/CodeGenOptions.h"
+
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VirtualFileSystem.h"
+
+#include <memory>
+
+namespace mlir {
+class MLIRContext;
+class ModuleOp;
+class OwningModuleRef;
+} // namespace mlir
+
+namespace clang {
+class ASTContext;
+class DeclGroupRef;
+class FunctionDecl;
+} // namespace clang
+
+namespace cir {
+class CIRGenModule;
+class CIRGenTypes;
+
+class CIRGenerator : public clang::ASTConsumer {
+  virtual void anchor();
+  clang::DiagnosticsEngine &Diags;
+  clang::ASTContext *astCtx;
+  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>
+      fs; // Only used for debug info.
+
+  const clang::CodeGenOptions codeGenOpts; // Intentionally copied in.
+
+  unsigned HandlingTopLevelDecls;
+
+  /// Use this when emitting decls to block re-entrant decl emission. It will
+  /// emit all deferred decls on scope exit. Set EmitDeferred to false if decl
+  /// emission must be deferred longer, like at the end of a tag definition.
+  struct HandlingTopLevelDeclRAII {
+    CIRGenerator &Self;
+    bool EmitDeferred;
+    HandlingTopLevelDeclRAII(CIRGenerator &Self, bool EmitDeferred = true)
+        : Self{Self}, EmitDeferred{EmitDeferred} {
+      ++Self.HandlingTopLevelDecls;
+    }
+    ~HandlingTopLevelDeclRAII() {
+      unsigned Level = --Self.HandlingTopLevelDecls;
+      if (Level == 0 && EmitDeferred)
+        Self.buildDeferredDecls();
+    }
+  };
+
+protected:
+  std::unique_ptr<mlir::MLIRContext> mlirCtx;
+  std::unique_ptr<CIRGenModule> CGM;
+
+private:
+  llvm::SmallVector<clang::FunctionDecl *, 8> DeferredInlineMemberFuncDefs;
+
+public:
+  CIRGenerator(clang::DiagnosticsEngine &diags,
+               llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+               const clang::CodeGenOptions &CGO);
+  ~CIRGenerator();
+  void Initialize(clang::ASTContext &Context) override;
+  bool EmitFunction(const clang::FunctionDecl *FD);
+
+  bool HandleTopLevelDecl(clang::DeclGroupRef D) override;
+  void HandleTranslationUnit(clang::ASTContext &Ctx) override;
+  void HandleInlineFunctionDefinition(clang::FunctionDecl *D) override;
+  void HandleTagDeclDefinition(clang::TagDecl *D) override;
+  void HandleTagDeclRequiredDefinition(const clang::TagDecl *D) override;
+  void HandleCXXStaticMemberVarInstantiation(clang::VarDecl *D) override;
+  void CompleteTentativeDefinition(clang::VarDecl *D) override;
+
+  mlir::ModuleOp getModule();
+  std::unique_ptr<mlir::MLIRContext> takeContext() {
+    return std::move(mlirCtx);
+  };
+
+  bool verifyModule();
+
+  void buildDeferredDecls();
+  void buildDefaultMethods();
+};
+
+} // namespace cir
+
+#endif // CLANG_CIRGENERATOR_H_
diff --git a/clang/include/clang/CIR/CIRToCIRPasses.h b/clang/include/clang/CIR/CIRToCIRPasses.h
new file mode 100644
index 000000000000..4ad4aeebb22e
--- /dev/null
+++ b/clang/include/clang/CIR/CIRToCIRPasses.h
@@ -0,0 +1,42 @@
+//====- CIRToCIRPasses.h- Lowering from CIR to LLVM -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares an interface for converting CIR modules to LLVM IR.
+//
+//===----------------------------------------------------------------------===//
+#ifndef CLANG_CIR_CIRTOCIRPASSES_H
+#define CLANG_CIR_CIRTOCIRPASSES_H
+
+#include "mlir/Pass/Pass.h"
+
+#include <memory>
+
+namespace clang {
+class ASTContext;
+}
+
+namespace mlir {
+class MLIRContext;
+class ModuleOp;
+} // namespace mlir
+
+namespace cir {
+
+// Run set of cleanup/prepare/etc passes CIR <-> CIR.
+mlir::LogicalResult runCIRToCIRPasses(
+    mlir::ModuleOp theModule, mlir::MLIRContext *mlirCtx,
+    clang::ASTContext &astCtx, bool enableVerifier, bool enableLifetime,
+    llvm::StringRef lifetimeOpts, bool enableIdiomRecognizer,
+    llvm::StringRef idiomRecognizerOpts, bool enableLibOpt,
+    llvm::StringRef libOptOpts, std::string &passOptParsingFailure,
+    bool enableCIRSimplify, bool flattenCIR, bool emitMLIR,
+    bool enableCallConvLowering, bool enableMem2reg);
+
+} // namespace cir
+
+#endif // CLANG_CIR_CIRTOCIRPASSES_H_
diff --git a/clang/include/clang/CIR/CMakeLists.txt b/clang/include/clang/CIR/CMakeLists.txt
index f8d6f407a03d..25497fc222d1 100644
--- a/clang/include/clang/CIR/CMakeLists.txt
+++ b/clang/include/clang/CIR/CMakeLists.txt
@@ -1,6 +1,8 @@
+set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir/include ) # --src-root
 set(MLIR_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/../mlir/include ) # --includedir
 set(MLIR_TABLEGEN_OUTPUT_DIR ${CMAKE_BINARY_DIR}/tools/mlir/include)
 include_directories(${MLIR_INCLUDE_DIR})
 include_directories(${MLIR_TABLEGEN_OUTPUT_DIR})
 
 add_subdirectory(Dialect)
+add_subdirectory(Interfaces)
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
new file mode 100644
index 000000000000..bd4c60bb1a61
--- /dev/null
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -0,0 +1,748 @@
+//===-- CIRBaseBuilder.h - CIRBuilder implementation  -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIRBASEBUILDER_H
+#define LLVM_CLANG_LIB_CIRBASEBUILDER_H
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/Type.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/Dialect/IR/FPEnv.h"
+#include "clang/CIR/MissingFeatures.h"
+
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/Types.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FloatingPointMode.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <optional>
+#include <string>
+
+namespace cir {
+
+class CIRBaseBuilderTy : public mlir::OpBuilder {
+
+public:
+  CIRBaseBuilderTy(mlir::MLIRContext &C) : mlir::OpBuilder(&C) {}
+
+  mlir::Value getConstAPSInt(mlir::Location loc, const llvm::APSInt &val) {
+    auto ty = mlir::cir::IntType::get(getContext(), val.getBitWidth(),
+                                      val.isSigned());
+    return create<mlir::cir::ConstantOp>(loc, ty,
+                                         getAttr<mlir::cir::IntAttr>(ty, val));
+  }
+
+  mlir::Value getConstAPInt(mlir::Location loc, mlir::Type typ,
+                            const llvm::APInt &val) {
+    return create<mlir::cir::ConstantOp>(loc, typ,
+                                         getAttr<mlir::cir::IntAttr>(typ, val));
+  }
+
+  mlir::cir::ConstantOp getConstant(mlir::Location loc, mlir::TypedAttr attr) {
+    return create<mlir::cir::ConstantOp>(loc, attr.getType(), attr);
+  }
+
+  // Creates constant null value for integral type ty.
+  mlir::cir::ConstantOp getNullValue(mlir::Type ty, mlir::Location loc) {
+    return create<mlir::cir::ConstantOp>(loc, ty, getZeroInitAttr(ty));
+  }
+
+  mlir::cir::ConstantOp getBool(bool state, mlir::Location loc) {
+    return create<mlir::cir::ConstantOp>(loc, getBoolTy(),
+                                         getCIRBoolAttr(state));
+  }
+  mlir::cir::ConstantOp getFalse(mlir::Location loc) {
+    return getBool(false, loc);
+  }
+  mlir::cir::ConstantOp getTrue(mlir::Location loc) {
+    return getBool(true, loc);
+  }
+
+  mlir::cir::BoolType getBoolTy() {
+    return ::mlir::cir::BoolType::get(getContext());
+  }
+
+  mlir::cir::VoidType getVoidTy() {
+    return ::mlir::cir::VoidType::get(getContext());
+  }
+
+  mlir::cir::IntType getUIntNTy(int N) {
+    return mlir::cir::IntType::get(getContext(), N, false);
+  }
+
+  mlir::cir::IntType getSIntNTy(int N) {
+    return mlir::cir::IntType::get(getContext(), N, true);
+  }
+
+  mlir::cir::AddressSpaceAttr getAddrSpaceAttr(clang::LangAS langAS) {
+    if (langAS == clang::LangAS::Default)
+      return {};
+    return mlir::cir::AddressSpaceAttr::get(getContext(), langAS);
+  }
+
+  mlir::cir::PointerType getPointerTo(mlir::Type ty,
+                                      mlir::cir::AddressSpaceAttr cirAS = {}) {
+    return mlir::cir::PointerType::get(getContext(), ty, cirAS);
+  }
+
+  mlir::cir::PointerType getPointerTo(mlir::Type ty, clang::LangAS langAS) {
+    return getPointerTo(ty, getAddrSpaceAttr(langAS));
+  }
+
+  mlir::cir::PointerType
+  getVoidPtrTy(clang::LangAS langAS = clang::LangAS::Default) {
+    return getPointerTo(::mlir::cir::VoidType::get(getContext()), langAS);
+  }
+
+  mlir::cir::PointerType getVoidPtrTy(mlir::cir::AddressSpaceAttr cirAS) {
+    return getPointerTo(::mlir::cir::VoidType::get(getContext()), cirAS);
+  }
+
+  mlir::cir::BoolAttr getCIRBoolAttr(bool state) {
+    return mlir::cir::BoolAttr::get(getContext(), getBoolTy(), state);
+  }
+
+  mlir::TypedAttr getZeroAttr(mlir::Type t) {
+    return mlir::cir::ZeroAttr::get(getContext(), t);
+  }
+
+  mlir::TypedAttr getZeroInitAttr(mlir::Type ty) {
+    if (mlir::isa<mlir::cir::IntType>(ty))
+      return mlir::cir::IntAttr::get(ty, 0);
+    if (auto fltType = mlir::dyn_cast<mlir::cir::SingleType>(ty))
+      return mlir::cir::FPAttr::getZero(fltType);
+    if (auto fltType = mlir::dyn_cast<mlir::cir::DoubleType>(ty))
+      return mlir::cir::FPAttr::getZero(fltType);
+    if (auto fltType = mlir::dyn_cast<mlir::cir::FP16Type>(ty))
+      return mlir::cir::FPAttr::getZero(fltType);
+    if (auto fltType = mlir::dyn_cast<mlir::cir::BF16Type>(ty))
+      return mlir::cir::FPAttr::getZero(fltType);
+    if (auto complexType = mlir::dyn_cast<mlir::cir::ComplexType>(ty))
+      return getZeroAttr(complexType);
+    if (auto arrTy = mlir::dyn_cast<mlir::cir::ArrayType>(ty))
+      return getZeroAttr(arrTy);
+    if (auto ptrTy = mlir::dyn_cast<mlir::cir::PointerType>(ty))
+      return getConstNullPtrAttr(ptrTy);
+    if (auto structTy = mlir::dyn_cast<mlir::cir::StructType>(ty))
+      return getZeroAttr(structTy);
+    if (mlir::isa<mlir::cir::BoolType>(ty)) {
+      return getCIRBoolAttr(false);
+    }
+    llvm_unreachable("Zero initializer for given type is NYI");
+  }
+
+  mlir::Value createLoad(mlir::Location loc, mlir::Value ptr,
+                         bool isVolatile = false, uint64_t alignment = 0) {
+    mlir::IntegerAttr intAttr;
+    if (alignment)
+      intAttr = mlir::IntegerAttr::get(
+          mlir::IntegerType::get(ptr.getContext(), 64), alignment);
+
+    return create<mlir::cir::LoadOp>(loc, ptr, /*isDeref=*/false, isVolatile,
+                                     /*alignment=*/intAttr,
+                                     /*mem_order=*/mlir::cir::MemOrderAttr{});
+  }
+
+  mlir::Value createAlignedLoad(mlir::Location loc, mlir::Value ptr,
+                                uint64_t alignment) {
+    return createLoad(loc, ptr, /*isVolatile=*/false, alignment);
+  }
+
+  mlir::Value createNot(mlir::Value value) {
+    return create<mlir::cir::UnaryOp>(value.getLoc(), value.getType(),
+                                      mlir::cir::UnaryOpKind::Not, value);
+  }
+
+  mlir::cir::CmpOp createCompare(mlir::Location loc, mlir::cir::CmpOpKind kind,
+                                 mlir::Value lhs, mlir::Value rhs) {
+    return create<mlir::cir::CmpOp>(loc, getBoolTy(), kind, lhs, rhs);
+  }
+
+  mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand) {
+    return createCompare(loc, mlir::cir::CmpOpKind::ne, operand, operand);
+  }
+
+  mlir::Value createUnaryOp(mlir::Location loc, mlir::cir::UnaryOpKind kind,
+                            mlir::Value operand) {
+    return create<mlir::cir::UnaryOp>(loc, kind, operand);
+  }
+
+  mlir::Value createBinop(mlir::Value lhs, mlir::cir::BinOpKind kind,
+                          const llvm::APInt &rhs) {
+    return create<mlir::cir::BinOp>(
+        lhs.getLoc(), lhs.getType(), kind, lhs,
+        getConstAPInt(lhs.getLoc(), lhs.getType(), rhs));
+  }
+
+  mlir::Value createBinop(mlir::Value lhs, mlir::cir::BinOpKind kind,
+                          mlir::Value rhs) {
+    return create<mlir::cir::BinOp>(lhs.getLoc(), lhs.getType(), kind, lhs,
+                                    rhs);
+  }
+
+  mlir::Value createBinop(mlir::Location loc, mlir::Value lhs,
+                          mlir::cir::BinOpKind kind, mlir::Value rhs) {
+    return create<mlir::cir::BinOp>(loc, lhs.getType(), kind, lhs, rhs);
+  }
+
+  mlir::Value createShift(mlir::Value lhs, const llvm::APInt &rhs,
+                          bool isShiftLeft) {
+    return create<mlir::cir::ShiftOp>(
+        lhs.getLoc(), lhs.getType(), lhs,
+        getConstAPInt(lhs.getLoc(), lhs.getType(), rhs), isShiftLeft);
+  }
+
+  mlir::Value createShift(mlir::Value lhs, unsigned bits, bool isShiftLeft) {
+    auto width = mlir::dyn_cast<mlir::cir::IntType>(lhs.getType()).getWidth();
+    auto shift = llvm::APInt(width, bits);
+    return createShift(lhs, shift, isShiftLeft);
+  }
+
+  mlir::Value createShiftLeft(mlir::Value lhs, unsigned bits) {
+    return createShift(lhs, bits, true);
+  }
+
+  mlir::Value createShiftRight(mlir::Value lhs, unsigned bits) {
+    return createShift(lhs, bits, false);
+  }
+
+  mlir::Value createLowBitsSet(mlir::Location loc, unsigned size,
+                               unsigned bits) {
+    auto val = llvm::APInt::getLowBitsSet(size, bits);
+    auto typ = mlir::cir::IntType::get(getContext(), size, false);
+    return getConstAPInt(loc, typ, val);
+  }
+
+  mlir::Value createAnd(mlir::Value lhs, llvm::APInt rhs) {
+    auto val = getConstAPInt(lhs.getLoc(), lhs.getType(), rhs);
+    return createBinop(lhs, mlir::cir::BinOpKind::And, val);
+  }
+
+  mlir::Value createAnd(mlir::Value lhs, mlir::Value rhs) {
+    return createBinop(lhs, mlir::cir::BinOpKind::And, rhs);
+  }
+
+  mlir::Value createAnd(mlir::Location loc, mlir::Value lhs, mlir::Value rhs) {
+    return createBinop(loc, lhs, mlir::cir::BinOpKind::And, rhs);
+  }
+
+  mlir::Value createOr(mlir::Value lhs, llvm::APInt rhs) {
+    auto val = getConstAPInt(lhs.getLoc(), lhs.getType(), rhs);
+    return createBinop(lhs, mlir::cir::BinOpKind::Or, val);
+  }
+
+  mlir::Value createOr(mlir::Value lhs, mlir::Value rhs) {
+    return createBinop(lhs, mlir::cir::BinOpKind::Or, rhs);
+  }
+
+  mlir::Value createMul(mlir::Value lhs, mlir::Value rhs, bool hasNUW = false,
+                        bool hasNSW = false) {
+    auto op = create<mlir::cir::BinOp>(lhs.getLoc(), lhs.getType(),
+                                       mlir::cir::BinOpKind::Mul, lhs, rhs);
+    if (hasNUW)
+      op.setNoUnsignedWrap(true);
+    if (hasNSW)
+      op.setNoSignedWrap(true);
+    return op;
+  }
+  mlir::Value createNSWMul(mlir::Value lhs, mlir::Value rhs) {
+    return createMul(lhs, rhs, false, true);
+  }
+  mlir::Value createNUWAMul(mlir::Value lhs, mlir::Value rhs) {
+    return createMul(lhs, rhs, true, false);
+  }
+
+  mlir::Value createMul(mlir::Value lhs, llvm::APInt rhs) {
+    auto val = getConstAPInt(lhs.getLoc(), lhs.getType(), rhs);
+    return createBinop(lhs, mlir::cir::BinOpKind::Mul, val);
+  }
+
+  mlir::Value createSelect(mlir::Location loc, mlir::Value condition,
+                           mlir::Value trueValue, mlir::Value falseValue) {
+    assert(trueValue.getType() == falseValue.getType() &&
+           "trueValue and falseValue should have the same type");
+    return create<mlir::cir::SelectOp>(loc, trueValue.getType(), condition,
+                                       trueValue, falseValue);
+  }
+
+  mlir::Value createLogicalAnd(mlir::Location loc, mlir::Value lhs,
+                               mlir::Value rhs) {
+    return createSelect(loc, lhs, rhs, getBool(false, loc));
+  }
+
+  mlir::Value createLogicalOr(mlir::Location loc, mlir::Value lhs,
+                              mlir::Value rhs) {
+    return createSelect(loc, lhs, getBool(true, loc), rhs);
+  }
+
+  mlir::Value createComplexCreate(mlir::Location loc, mlir::Value real,
+                                  mlir::Value imag) {
+    auto resultComplexTy =
+        mlir::cir::ComplexType::get(getContext(), real.getType());
+    return create<mlir::cir::ComplexCreateOp>(loc, resultComplexTy, real, imag);
+  }
+
+  mlir::Value createComplexReal(mlir::Location loc, mlir::Value operand) {
+    auto operandTy = mlir::cast<mlir::cir::ComplexType>(operand.getType());
+    return create<mlir::cir::ComplexRealOp>(loc, operandTy.getElementTy(),
+                                            operand);
+  }
+
+  mlir::Value createComplexImag(mlir::Location loc, mlir::Value operand) {
+    auto operandTy = mlir::cast<mlir::cir::ComplexType>(operand.getType());
+    return create<mlir::cir::ComplexImagOp>(loc, operandTy.getElementTy(),
+                                            operand);
+  }
+
+  mlir::Value createComplexBinOp(mlir::Location loc, mlir::Value lhs,
+                                 mlir::cir::ComplexBinOpKind kind,
+                                 mlir::Value rhs,
+                                 mlir::cir::ComplexRangeKind range,
+                                 bool promoted) {
+    return create<mlir::cir::ComplexBinOp>(loc, kind, lhs, rhs, range,
+                                           promoted);
+  }
+
+  mlir::Value createComplexAdd(mlir::Location loc, mlir::Value lhs,
+                               mlir::Value rhs) {
+    return createBinop(loc, lhs, mlir::cir::BinOpKind::Add, rhs);
+  }
+
+  mlir::Value createComplexSub(mlir::Location loc, mlir::Value lhs,
+                               mlir::Value rhs) {
+    return createBinop(loc, lhs, mlir::cir::BinOpKind::Sub, rhs);
+  }
+
+  mlir::Value createComplexMul(mlir::Location loc, mlir::Value lhs,
+                               mlir::Value rhs,
+                               mlir::cir::ComplexRangeKind range,
+                               bool promoted) {
+    return createComplexBinOp(loc, lhs, mlir::cir::ComplexBinOpKind::Mul, rhs,
+                              range, promoted);
+  }
+
+  mlir::Value createComplexDiv(mlir::Location loc, mlir::Value lhs,
+                               mlir::Value rhs,
+                               mlir::cir::ComplexRangeKind range,
+                               bool promoted) {
+    return createComplexBinOp(loc, lhs, mlir::cir::ComplexBinOpKind::Div, rhs,
+                              range, promoted);
+  }
+
+  mlir::cir::StoreOp createStore(mlir::Location loc, mlir::Value val,
+                                 mlir::Value dst, bool _volatile = false,
+                                 ::mlir::IntegerAttr align = {},
+                                 ::mlir::cir::MemOrderAttr order = {}) {
+    if (mlir::cast<mlir::cir::PointerType>(dst.getType()).getPointee() !=
+        val.getType())
+      dst = createPtrBitcast(dst, val.getType());
+    return create<mlir::cir::StoreOp>(loc, val, dst, _volatile, align, order);
+  }
+
+  mlir::Value createAlloca(mlir::Location loc, mlir::cir::PointerType addrType,
+                           mlir::Type type, llvm::StringRef name,
+                           mlir::IntegerAttr alignment,
+                           mlir::Value dynAllocSize) {
+    return create<mlir::cir::AllocaOp>(loc, addrType, type, name, alignment,
+                                       dynAllocSize);
+  }
+
+  mlir::Value createAlloca(mlir::Location loc, mlir::cir::PointerType addrType,
+                           mlir::Type type, llvm::StringRef name,
+                           clang::CharUnits alignment,
+                           mlir::Value dynAllocSize) {
+    auto alignmentIntAttr = getSizeFromCharUnits(getContext(), alignment);
+    return createAlloca(loc, addrType, type, name, alignmentIntAttr,
+                        dynAllocSize);
+  }
+
+  mlir::Value createAlloca(mlir::Location loc, mlir::cir::PointerType addrType,
+                           mlir::Type type, llvm::StringRef name,
+                           mlir::IntegerAttr alignment) {
+    return create<mlir::cir::AllocaOp>(loc, addrType, type, name, alignment);
+  }
+
+  mlir::Value createAlloca(mlir::Location loc, mlir::cir::PointerType addrType,
+                           mlir::Type type, llvm::StringRef name,
+                           clang::CharUnits alignment) {
+    auto alignmentIntAttr = getSizeFromCharUnits(getContext(), alignment);
+    return createAlloca(loc, addrType, type, name, alignmentIntAttr);
+  }
+
+  mlir::Value createSub(mlir::Value lhs, mlir::Value rhs, bool hasNUW = false,
+                        bool hasNSW = false) {
+    auto op = create<mlir::cir::BinOp>(lhs.getLoc(), lhs.getType(),
+                                       mlir::cir::BinOpKind::Sub, lhs, rhs);
+    if (hasNUW)
+      op.setNoUnsignedWrap(true);
+    if (hasNSW)
+      op.setNoSignedWrap(true);
+    return op;
+  }
+
+  mlir::Value createNSWSub(mlir::Value lhs, mlir::Value rhs) {
+    return createSub(lhs, rhs, false, true);
+  }
+
+  mlir::Value createNUWSub(mlir::Value lhs, mlir::Value rhs) {
+    return createSub(lhs, rhs, true, false);
+  }
+
+  mlir::Value createAdd(mlir::Value lhs, mlir::Value rhs, bool hasNUW = false,
+                        bool hasNSW = false) {
+    auto op = create<mlir::cir::BinOp>(lhs.getLoc(), lhs.getType(),
+                                       mlir::cir::BinOpKind::Add, lhs, rhs);
+    if (hasNUW)
+      op.setNoUnsignedWrap(true);
+    if (hasNSW)
+      op.setNoSignedWrap(true);
+    return op;
+  }
+
+  mlir::Value createNSWAdd(mlir::Value lhs, mlir::Value rhs) {
+    return createAdd(lhs, rhs, false, true);
+  }
+  mlir::Value createNUWAdd(mlir::Value lhs, mlir::Value rhs) {
+    return createAdd(lhs, rhs, true, false);
+  }
+
+  struct BinOpOverflowResults {
+    mlir::Value result;
+    mlir::Value overflow;
+  };
+
+  BinOpOverflowResults createBinOpOverflowOp(mlir::Location loc,
+                                             mlir::cir::IntType resultTy,
+                                             mlir::cir::BinOpOverflowKind kind,
+                                             mlir::Value lhs, mlir::Value rhs) {
+    auto op = create<mlir::cir::BinOpOverflowOp>(loc, resultTy, kind, lhs, rhs);
+    return {op.getResult(), op.getOverflow()};
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Cast/Conversion Operators
+  //===--------------------------------------------------------------------===//
+
+  mlir::Value createCast(mlir::Location loc, mlir::cir::CastKind kind,
+                         mlir::Value src, mlir::Type newTy) {
+    if (newTy == src.getType())
+      return src;
+    return create<mlir::cir::CastOp>(loc, newTy, kind, src);
+  }
+
+  mlir::Value createCast(mlir::cir::CastKind kind, mlir::Value src,
+                         mlir::Type newTy) {
+    if (newTy == src.getType())
+      return src;
+    return createCast(src.getLoc(), kind, src, newTy);
+  }
+
+  mlir::Value createIntCast(mlir::Value src, mlir::Type newTy) {
+    return createCast(mlir::cir::CastKind::integral, src, newTy);
+  }
+
+  mlir::Value createIntToPtr(mlir::Value src, mlir::Type newTy) {
+    return createCast(mlir::cir::CastKind::int_to_ptr, src, newTy);
+  }
+
+  mlir::Value createGetMemberOp(mlir::Location &loc, mlir::Value structPtr,
+                                const char *fldName, unsigned idx) {
+
+    assert(mlir::isa<mlir::cir::PointerType>(structPtr.getType()));
+    auto structBaseTy =
+        mlir::cast<mlir::cir::PointerType>(structPtr.getType()).getPointee();
+    assert(mlir::isa<mlir::cir::StructType>(structBaseTy));
+    auto fldTy =
+        mlir::cast<mlir::cir::StructType>(structBaseTy).getMembers()[idx];
+    auto fldPtrTy = ::mlir::cir::PointerType::get(getContext(), fldTy);
+    return create<mlir::cir::GetMemberOp>(loc, fldPtrTy, structPtr, fldName,
+                                          idx);
+  }
+
+  mlir::Value createPtrToInt(mlir::Value src, mlir::Type newTy) {
+    return createCast(mlir::cir::CastKind::ptr_to_int, src, newTy);
+  }
+
+  mlir::Value createPtrToBoolCast(mlir::Value v) {
+    return createCast(mlir::cir::CastKind::ptr_to_bool, v, getBoolTy());
+  }
+
+  // TODO(cir): the following function was introduced to keep in sync with LLVM
+  // codegen. CIR does not have "zext" operations. It should eventually be
+  // renamed or removed. For now, we just add whatever cast is required here.
+  mlir::Value createZExtOrBitCast(mlir::Location loc, mlir::Value src,
+                                  mlir::Type newTy) {
+    auto srcTy = src.getType();
+
+    if (srcTy == newTy)
+      return src;
+
+    if (mlir::isa<mlir::cir::BoolType>(srcTy) &&
+        mlir::isa<mlir::cir::IntType>(newTy))
+      return createBoolToInt(src, newTy);
+
+    llvm_unreachable("unhandled extension cast");
+  }
+
+  mlir::Value createBoolToInt(mlir::Value src, mlir::Type newTy) {
+    return createCast(mlir::cir::CastKind::bool_to_int, src, newTy);
+  }
+
+  mlir::Value createBitcast(mlir::Value src, mlir::Type newTy) {
+    return createCast(mlir::cir::CastKind::bitcast, src, newTy);
+  }
+
+  mlir::Value createBitcast(mlir::Location loc, mlir::Value src,
+                            mlir::Type newTy) {
+    return createCast(loc, mlir::cir::CastKind::bitcast, src, newTy);
+  }
+
+  mlir::Value createPtrBitcast(mlir::Value src, mlir::Type newPointeeTy) {
+    assert(mlir::isa<mlir::cir::PointerType>(src.getType()) &&
+           "expected ptr src");
+    return createBitcast(src, getPointerTo(newPointeeTy));
+  }
+
+  mlir::Value createAddrSpaceCast(mlir::Location loc, mlir::Value src,
+                                  mlir::Type newTy) {
+    return createCast(loc, mlir::cir::CastKind::address_space, src, newTy);
+  }
+
+  mlir::Value createAddrSpaceCast(mlir::Value src, mlir::Type newTy) {
+    return createAddrSpaceCast(src.getLoc(), src, newTy);
+  }
+
+  mlir::Value createPtrIsNull(mlir::Value ptr) {
+    return createNot(createPtrToBoolCast(ptr));
+  }
+
+  //
+  // Block handling helpers
+  // ----------------------
+  //
+  OpBuilder::InsertPoint getBestAllocaInsertPoint(mlir::Block *block) {
+    auto last =
+        std::find_if(block->rbegin(), block->rend(), [](mlir::Operation &op) {
+          return mlir::isa<mlir::cir::AllocaOp, mlir::cir::LabelOp>(&op);
+        });
+
+    if (last != block->rend())
+      return OpBuilder::InsertPoint(block,
+                                    ++mlir::Block::iterator(&*last));
+    return OpBuilder::InsertPoint(block, block->begin());
+  };
+
+  mlir::IntegerAttr getSizeFromCharUnits(mlir::MLIRContext *ctx,
+                                         clang::CharUnits size) {
+    // Note that mlir::IntegerType is used instead of mlir::cir::IntType here
+    // because we don't need sign information for this to be useful, so keep
+    // it simple.
+    return mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 64),
+                                  size.getQuantity());
+  }
+
+  /// Create a do-while operation.
+  mlir::cir::DoWhileOp createDoWhile(
+      mlir::Location loc,
+      llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)> condBuilder,
+      llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)> bodyBuilder) {
+    return create<mlir::cir::DoWhileOp>(loc, condBuilder, bodyBuilder);
+  }
+
+  /// Create a while operation.
+  mlir::cir::WhileOp createWhile(
+      mlir::Location loc,
+      llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)> condBuilder,
+      llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)> bodyBuilder) {
+    return create<mlir::cir::WhileOp>(loc, condBuilder, bodyBuilder);
+  }
+
+  /// Create a for operation.
+  mlir::cir::ForOp createFor(
+      mlir::Location loc,
+      llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)> condBuilder,
+      llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)> bodyBuilder,
+      llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)> stepBuilder) {
+    return create<mlir::cir::ForOp>(loc, condBuilder, bodyBuilder, stepBuilder);
+  }
+
+  mlir::TypedAttr getConstPtrAttr(mlir::Type t, int64_t v) {
+    auto val =
+        mlir::IntegerAttr::get(mlir::IntegerType::get(t.getContext(), 64), v);
+    return mlir::cir::ConstPtrAttr::get(
+        getContext(), mlir::cast<mlir::cir::PointerType>(t), val);
+  }
+
+  mlir::TypedAttr getConstNullPtrAttr(mlir::Type t) {
+    assert(mlir::isa<mlir::cir::PointerType>(t) && "expected cir.ptr");
+    return getConstPtrAttr(t, 0);
+  }
+
+  // Creates constant nullptr for pointer type ty.
+  mlir::cir::ConstantOp getNullPtr(mlir::Type ty, mlir::Location loc) {
+    assert(!MissingFeatures::targetCodeGenInfoGetNullPointer());
+    return create<mlir::cir::ConstantOp>(loc, ty, getConstPtrAttr(ty, 0));
+  }
+
+  /// Create a loop condition.
+  mlir::cir::ConditionOp createCondition(mlir::Value condition) {
+    return create<mlir::cir::ConditionOp>(condition.getLoc(), condition);
+  }
+
+  /// Create a yield operation.
+  mlir::cir::YieldOp createYield(mlir::Location loc,
+                                 mlir::ValueRange value = {}) {
+    return create<mlir::cir::YieldOp>(loc, value);
+  }
+
+  mlir::cir::PtrStrideOp createPtrStride(mlir::Location loc, mlir::Value base,
+                                         mlir::Value stride) {
+    return create<mlir::cir::PtrStrideOp>(loc, base.getType(), base, stride);
+  }
+
+  mlir::cir::CallOp
+  createCallOp(mlir::Location loc,
+               mlir::SymbolRefAttr callee = mlir::SymbolRefAttr(),
+               mlir::Type returnType = mlir::cir::VoidType(),
+               mlir::ValueRange operands = mlir::ValueRange(),
+               mlir::cir::CallingConv callingConv = mlir::cir::CallingConv::C,
+               mlir::cir::ExtraFuncAttributesAttr extraFnAttr = {}) {
+
+    mlir::cir::CallOp callOp = create<mlir::cir::CallOp>(
+        loc, callee, returnType, operands, callingConv);
+
+    if (extraFnAttr) {
+      callOp->setAttr("extra_attrs", extraFnAttr);
+    } else {
+      mlir::NamedAttrList empty;
+      callOp->setAttr("extra_attrs",
+                      mlir::cir::ExtraFuncAttributesAttr::get(
+                          getContext(), empty.getDictionary(getContext())));
+    }
+    return callOp;
+  }
+
+  mlir::cir::CallOp
+  createCallOp(mlir::Location loc, mlir::cir::FuncOp callee,
+               mlir::ValueRange operands = mlir::ValueRange(),
+               mlir::cir::CallingConv callingConv = mlir::cir::CallingConv::C,
+               mlir::cir::ExtraFuncAttributesAttr extraFnAttr = {}) {
+    return createCallOp(loc, mlir::SymbolRefAttr::get(callee),
+                        callee.getFunctionType().getReturnType(), operands,
+                        callingConv, extraFnAttr);
+  }
+
+  mlir::cir::CallOp createIndirectCallOp(
+      mlir::Location loc, mlir::Value ind_target, mlir::cir::FuncType fn_type,
+      mlir::ValueRange operands = mlir::ValueRange(),
+      mlir::cir::CallingConv callingConv = mlir::cir::CallingConv::C,
+      mlir::cir::ExtraFuncAttributesAttr extraFnAttr = {}) {
+
+    llvm::SmallVector<mlir::Value, 4> resOperands({ind_target});
+    resOperands.append(operands.begin(), operands.end());
+
+    return createCallOp(loc, mlir::SymbolRefAttr(), fn_type.getReturnType(),
+                        resOperands, callingConv, extraFnAttr);
+  }
+
+  mlir::cir::CallOp
+  createCallOp(mlir::Location loc, mlir::SymbolRefAttr callee,
+               mlir::ValueRange operands = mlir::ValueRange(),
+               mlir::cir::CallingConv callingConv = mlir::cir::CallingConv::C,
+               mlir::cir::ExtraFuncAttributesAttr extraFnAttr = {}) {
+    return createCallOp(loc, callee, mlir::cir::VoidType(), operands,
+                        callingConv, extraFnAttr);
+  }
+
+  mlir::cir::CallOp createTryCallOp(
+      mlir::Location loc, mlir::SymbolRefAttr callee = mlir::SymbolRefAttr(),
+      mlir::Type returnType = mlir::cir::VoidType(),
+      mlir::ValueRange operands = mlir::ValueRange(),
+      mlir::cir::CallingConv callingConv = mlir::cir::CallingConv::C,
+      mlir::cir::ExtraFuncAttributesAttr extraFnAttr = {}) {
+    mlir::cir::CallOp tryCallOp =
+        create<mlir::cir::CallOp>(loc, callee, returnType, operands,
+                                  callingConv, /*exception=*/getUnitAttr());
+    if (extraFnAttr) {
+      tryCallOp->setAttr("extra_attrs", extraFnAttr);
+    } else {
+      mlir::NamedAttrList empty;
+      tryCallOp->setAttr("extra_attrs",
+                         mlir::cir::ExtraFuncAttributesAttr::get(
+                             getContext(), empty.getDictionary(getContext())));
+    }
+    return tryCallOp;
+  }
+
+  mlir::cir::CallOp createTryCallOp(
+      mlir::Location loc, mlir::cir::FuncOp callee, mlir::ValueRange operands,
+      mlir::cir::CallingConv callingConv = mlir::cir::CallingConv::C,
+      mlir::cir::ExtraFuncAttributesAttr extraFnAttr = {}) {
+    return createTryCallOp(loc, mlir::SymbolRefAttr::get(callee),
+                           callee.getFunctionType().getReturnType(), operands,
+                           callingConv, extraFnAttr);
+  }
+
+  mlir::cir::CallOp createIndirectTryCallOp(
+      mlir::Location loc, mlir::Value ind_target, mlir::cir::FuncType fn_type,
+      mlir::ValueRange operands,
+      mlir::cir::CallingConv callingConv = mlir::cir::CallingConv::C) {
+    llvm::SmallVector<mlir::Value, 4> resOperands({ind_target});
+    resOperands.append(operands.begin(), operands.end());
+    return createTryCallOp(loc, mlir::SymbolRefAttr(), fn_type.getReturnType(),
+                           resOperands, callingConv);
+  }
+
+  struct GetMethodResults {
+    mlir::Value callee;
+    mlir::Value adjustedThis;
+  };
+
+  GetMethodResults createGetMethod(mlir::Location loc, mlir::Value method,
+                                   mlir::Value objectPtr) {
+    // Build the callee function type.
+    auto methodFuncTy =
+        mlir::cast<mlir::cir::MethodType>(method.getType()).getMemberFuncTy();
+    auto methodFuncInputTypes = methodFuncTy.getInputs();
+
+    auto objectPtrTy = mlir::cast<mlir::cir::PointerType>(objectPtr.getType());
+    auto objectPtrAddrSpace =
+        mlir::cast_if_present<mlir::cir::AddressSpaceAttr>(
+            objectPtrTy.getAddrSpace());
+    auto adjustedThisTy = getVoidPtrTy(objectPtrAddrSpace);
+
+    llvm::SmallVector<mlir::Type, 8> calleeFuncInputTypes{adjustedThisTy};
+    calleeFuncInputTypes.insert(calleeFuncInputTypes.end(),
+                                methodFuncInputTypes.begin(),
+                                methodFuncInputTypes.end());
+    auto calleeFuncTy =
+        methodFuncTy.clone(calleeFuncInputTypes, methodFuncTy.getReturnType());
+    // TODO(cir): consider the address space of the callee.
+    assert(!MissingFeatures::addressSpace());
+    auto calleeTy = getPointerTo(calleeFuncTy);
+
+    auto op = create<mlir::cir::GetMethodOp>(loc, calleeTy, adjustedThisTy,
+                                             method, objectPtr);
+    return {op.getCallee(), op.getAdjustedThis()};
+  }
+};
+
+} // namespace cir
+#endif
diff --git a/clang/include/clang/CIR/Dialect/CMakeLists.txt b/clang/include/clang/CIR/Dialect/CMakeLists.txt
index f33061b2d87c..cd837615e82f 100644
--- a/clang/include/clang/CIR/Dialect/CMakeLists.txt
+++ b/clang/include/clang/CIR/Dialect/CMakeLists.txt
@@ -1 +1,28 @@
+add_custom_target(clang-cir-doc)
+
+# This replicates part of the add_mlir_doc cmake function from MLIR that cannot
+# be used here. This happens because it expects to be run inside MLIR directory
+# which is not the case for CIR (and also FIR, both have similar workarounds).
+function(add_clang_mlir_doc doc_filename output_file output_directory command)
+  set(LLVM_TARGET_DEFINITIONS ${doc_filename}.td)
+  tablegen(MLIR ${output_file}.md ${command} ${ARGN} "-I${MLIR_MAIN_SRC_DIR}" "-I${MLIR_INCLUDE_DIR}")
+  set(GEN_DOC_FILE ${CLANG_BINARY_DIR}/docs/${output_directory}${output_file}.md)
+  add_custom_command(
+          OUTPUT ${GEN_DOC_FILE}
+          COMMAND ${CMAKE_COMMAND} -E copy
+                  ${CMAKE_CURRENT_BINARY_DIR}/${output_file}.md
+                  ${GEN_DOC_FILE}
+          DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${output_file}.md)
+  add_custom_target(${output_file}DocGen DEPENDS ${GEN_DOC_FILE})
+  add_dependencies(clang-cir-doc ${output_file}DocGen)
+endfunction()
+
 add_subdirectory(IR)
+
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls -name CIR)
+mlir_tablegen(Passes.capi.h.inc -gen-pass-capi-header --prefix CIR)
+mlir_tablegen(Passes.capi.cpp.inc -gen-pass-capi-impl --prefix CIR)
+add_public_tablegen_target(MLIRCIRPassIncGen)
+
+add_clang_mlir_doc(Passes CIRPasses ./ -gen-pass-doc)
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrDefs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrDefs.td
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.h b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.h
new file mode 100644
index 000000000000..5961f77629b5
--- /dev/null
+++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.h
@@ -0,0 +1,49 @@
+//===- CIRAttrs.h - MLIR CIR Attrs ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the attributes in the CIR dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_CIR_IR_CIRATTRS_H_
+#define MLIR_DIALECT_CIR_IR_CIRATTRS_H_
+
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinAttributeInterfaces.h"
+
+#include "llvm/ADT/SmallVector.h"
+
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+
+#include "clang/CIR/Interfaces/ASTAttrInterfaces.h"
+
+//===----------------------------------------------------------------------===//
+// CIR Dialect Attrs
+//===----------------------------------------------------------------------===//
+
+namespace clang {
+class FunctionDecl;
+class VarDecl;
+class RecordDecl;
+} // namespace clang
+
+namespace mlir {
+namespace cir {
+class ArrayType;
+class StructType;
+class BoolType;
+} // namespace cir
+} // namespace mlir
+
+#define GET_ATTRDEF_CLASSES
+#include "clang/CIR/Dialect/IR/CIROpsAttributes.h.inc"
+
+#endif // MLIR_DIALECT_CIR_IR_CIRATTRS_H_
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
new file mode 100644
index 000000000000..5e39663bd906
--- /dev/null
+++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
@@ -0,0 +1,1181 @@
+//===- CIRAttrs.td - CIR dialect types ---------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CIR dialect attributes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CIR_DIALECT_CIR_ATTRS
+#define MLIR_CIR_DIALECT_CIR_ATTRS
+
+include "mlir/IR/BuiltinAttributeInterfaces.td"
+include "mlir/IR/EnumAttr.td"
+
+include "clang/CIR/Dialect/IR/CIRDialect.td"
+
+include "clang/CIR/Interfaces/ASTAttrInterfaces.td"
+
+//===----------------------------------------------------------------------===//
+// CIR Attrs
+//===----------------------------------------------------------------------===//
+
+class CIR_Attr<string name, string attrMnemonic, list<Trait> traits = []>
+    : AttrDef<CIR_Dialect, name, traits> {
+  let mnemonic = attrMnemonic;
+}
+
+class CIRUnitAttr<string name, string attrMnemonic, list<Trait> traits = []>
+    : CIR_Attr<name, attrMnemonic, traits> {
+  let returnType = "bool";
+  let defaultValue = "false";
+  let valueType = NoneType;
+  let isOptional = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// LangAttr
+//===----------------------------------------------------------------------===//
+
+def C : I32EnumAttrCase<"C", 1, "c">;
+def CXX : I32EnumAttrCase<"CXX", 2, "cxx">;
+def OpenCLC : I32EnumAttrCase<"OpenCLC", 3, "opencl_c">;
+
+def SourceLanguage : I32EnumAttr<"SourceLanguage", "Source language", [
+  C, CXX, OpenCLC
+]> {
+  let cppNamespace = "::mlir::cir";
+  let genSpecializedAttr = 0;
+}
+
+def LangAttr : CIR_Attr<"Lang", "lang"> {
+  let summary = "Module source language";
+  let parameters = (ins SourceLanguage:$lang);
+  let description = [{
+    Represents the source language used to generate the module.
+
+    Example:
+    ```
+    // Module compiled from C.
+    module attributes {cir.lang = cir.lang<c>} {}
+    // Module compiled from C++.
+    module attributes {cir.lang = cir.lang<cxx>} {}
+    ```
+  }];
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDeclaration = [{
+    bool isC() const { return getLang() == SourceLanguage::C; };
+    bool isCXX() const { return getLang() == SourceLanguage::CXX; };
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// BoolAttr
+//===----------------------------------------------------------------------===//
+
+def CIR_BoolAttr : CIR_Attr<"Bool", "bool", [TypedAttrInterface]> {
+  let summary = "Represent true/false for !cir.bool types";
+  let description = [{
+    The BoolAttr represents a 'true' or 'false' value.
+  }];
+
+  let parameters = (ins AttributeSelfTypeParameter<
+                        "", "mlir::cir::BoolType">:$type,
+                    "bool":$value);
+
+  let assemblyFormat = [{
+    `<` $value `>`
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// ZeroAttr
+//===----------------------------------------------------------------------===//
+
+def ZeroAttr : CIR_Attr<"Zero", "zero", [TypedAttrInterface]> {
+  let summary = "Attribute to represent zero initialization";
+  let description = [{
+    The ZeroAttr is used to indicate zero initialization on structs.
+  }];
+
+  let parameters = (ins AttributeSelfTypeParameter<"">:$type);
+  let assemblyFormat = [{}];
+}
+
+//===----------------------------------------------------------------------===//
+// ConstArrayAttr
+//===----------------------------------------------------------------------===//
+
+def ConstArrayAttr : CIR_Attr<"ConstArray", "const_array", [TypedAttrInterface]> {
+  let summary = "A constant array from ArrayAttr or StringRefAttr";
+  let description = [{
+    An CIR array attribute is an array of literals of the specified attr types.
+  }];
+
+  let parameters = (ins AttributeSelfTypeParameter<"">:$type,
+                        "Attribute":$elts,
+                        "int":$trailingZerosNum);
+
+  // Define a custom builder for the type; that removes the need to pass
+  // in an MLIRContext instance, as it can be infered from the `type`.
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "mlir::cir::ArrayType":$type,
+                                        "Attribute":$elts), [{
+      int zeros = 0;
+      auto typeSize = mlir::cast<mlir::cir::ArrayType>(type).getSize();
+      if (auto str = mlir::dyn_cast<mlir::StringAttr>(elts))
+        zeros = typeSize - str.size();
+      else
+        zeros = typeSize - mlir::cast<mlir::ArrayAttr>(elts).size();
+
+      return $_get(type.getContext(), type, elts, zeros);
+    }]>
+  ];
+
+  // Printing and parsing available in CIRDialect.cpp
+  let hasCustomAssemblyFormat = 1;
+
+  // Enable verifier.
+  let genVerifyDecl = 1;
+
+  let extraClassDeclaration = [{
+    bool hasTrailingZeros() const { return getTrailingZerosNum() != 0; };
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// ConstVectorAttr
+//===----------------------------------------------------------------------===//
+
+def ConstVectorAttr : CIR_Attr<"ConstVector", "const_vector",
+                               [TypedAttrInterface]> {
+  let summary = "A constant vector from ArrayAttr";
+  let description = [{
+    A CIR vector attribute is an array of literals of the specified attribute
+    types.
+  }];
+
+  let parameters = (ins AttributeSelfTypeParameter<"">:$type,
+		        "ArrayAttr":$elts);
+
+  // Define a custom builder for the type; that removes the need to pass in an
+  // MLIRContext instance, as it can be inferred from the `type`.
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "mlir::cir::VectorType":$type,
+		                        "ArrayAttr":$elts), [{
+      return $_get(type.getContext(), type, elts);
+    }]>
+  ];
+
+  // Printing and parsing available in CIRDialect.cpp
+  let hasCustomAssemblyFormat = 1;
+
+  // Enable verifier.
+  let genVerifyDecl = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ConstStructAttr
+//===----------------------------------------------------------------------===//
+
+def ConstStructAttr : CIR_Attr<"ConstStruct", "const_struct",
+                               [TypedAttrInterface]> {
+  let summary = "Represents a constant struct";
+  let description = [{
+    Effectively supports "struct-like" constants. It's must be built from
+    an `mlir::ArrayAttr `instance where each elements is a typed attribute
+    (`mlir::TypedAttribute`).
+
+    Example:
+    ```
+    cir.global external @rgb2 = #cir.const_struct<{0 : i8,
+                                                   5 : i64, #cir.null : !cir.ptr<i8>
+                                                  }> : !cir.struct<"", i8, i64, !cir.ptr<i8>>
+    ```
+  }];
+
+  let parameters = (ins AttributeSelfTypeParameter<"">:$type,
+                        "ArrayAttr":$members);
+
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "mlir::cir::StructType":$type,
+                                        "ArrayAttr":$members), [{
+      return $_get(type.getContext(), type, members);
+    }]>
+  ];
+
+  let assemblyFormat = [{
+    `<` custom<StructMembers>($members) `>`
+  }];
+
+  let genVerifyDecl = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// IntegerAttr
+//===----------------------------------------------------------------------===//
+
+def IntAttr : CIR_Attr<"Int", "int", [TypedAttrInterface]> {
+  let summary = "An Attribute containing a integer value";
+  let description = [{
+    An integer attribute is a literal attribute that represents an integral
+    value of the specified integer type.
+  }];
+  let parameters = (ins AttributeSelfTypeParameter<"">:$type, "APInt":$value);
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "Type":$type,
+                                        "const APInt &":$value), [{
+      return $_get(type.getContext(), type, value);
+    }]>,
+    AttrBuilderWithInferredContext<(ins "Type":$type, "int64_t":$value), [{
+      IntType intType = mlir::cast<IntType>(type);
+      mlir::APInt apValue(intType.getWidth(), value, intType.isSigned());
+      return $_get(intType.getContext(), intType, apValue);
+    }]>,
+  ];
+  let extraClassDeclaration = [{
+    int64_t getSInt() const { return getValue().getSExtValue(); }
+    uint64_t getUInt() const { return getValue().getZExtValue(); }
+    bool isNullValue() const { return getValue() == 0; }
+    uint64_t getBitWidth() const { return mlir::cast<IntType>(getType()).getWidth(); }
+  }];
+  let genVerifyDecl = 1;
+  let hasCustomAssemblyFormat = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FPAttr
+//===----------------------------------------------------------------------===//
+
+def FPAttr : CIR_Attr<"FP", "fp", [TypedAttrInterface]> {
+  let summary = "An attribute containing a floating-point value";
+  let description = [{
+    An fp attribute is a literal attribute that represents a floating-point
+    value of the specified floating-point type.
+  }];
+  let parameters = (ins AttributeSelfTypeParameter<"">:$type, "APFloat":$value);
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "Type":$type,
+                                        "const APFloat &":$value), [{
+      return $_get(type.getContext(), type, value);
+    }]>,
+  ];
+  let extraClassDeclaration = [{
+    static FPAttr getZero(mlir::Type type);
+  }];
+  let genVerifyDecl = 1;
+
+  let assemblyFormat = [{
+    `<` custom<FloatLiteral>($value, ref($type)) `>`
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// ComplexAttr
+//===----------------------------------------------------------------------===//
+
+def ComplexAttr : CIR_Attr<"Complex", "complex", [TypedAttrInterface]> {
+  let summary = "An attribute that contains a constant complex value";
+  let description = [{
+    The `#cir.complex` attribute contains a constant value of complex number
+    type. The `real` parameter gives the real part of the complex number and the
+    `imag` parameter gives the imaginary part of the complex number.
+
+    The `real` and `imag` parameter must be either an IntAttr or an FPAttr that
+    contains values of the same CIR type.
+  }];
+
+  let parameters = (ins 
+    AttributeSelfTypeParameter<"", "mlir::cir::ComplexType">:$type,
+    "mlir::TypedAttr":$real, "mlir::TypedAttr":$imag);
+
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "mlir::cir::ComplexType":$type,
+                                        "mlir::TypedAttr":$real,
+                                        "mlir::TypedAttr":$imag), [{
+      return $_get(type.getContext(), type, real, imag);
+    }]>,
+  ];
+
+  let genVerifyDecl = 1;
+
+  let assemblyFormat = [{
+    `<` qualified($real) `,` qualified($imag) `>`
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// ConstPointerAttr
+//===----------------------------------------------------------------------===//
+
+def ConstPtrAttr : CIR_Attr<"ConstPtr", "ptr", [TypedAttrInterface]> {
+  let summary = "Holds a constant pointer value";
+  let parameters = (ins
+    AttributeSelfTypeParameter<"", "::mlir::cir::PointerType">:$type,
+    "mlir::IntegerAttr":$value);
+  let description = [{
+    A pointer attribute is a literal attribute that represents an integral
+    value of a pointer type.
+  }];
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "Type":$type, "mlir::IntegerAttr":$value), [{
+      return $_get(type.getContext(), mlir::cast<mlir::cir::PointerType>(type), value);
+    }]>,
+    AttrBuilder<(ins "Type":$type,
+                     "mlir::IntegerAttr":$value), [{
+      return $_get($_ctxt, mlir::cast<mlir::cir::PointerType>(type), value);
+    }]>,
+  ];
+  let extraClassDeclaration = [{
+    bool isNullValue() const { return getValue().getInt() == 0; }
+  }];
+
+  let assemblyFormat = [{
+    `<` custom<ConstPtr>($value) `>`
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// CmpThreeWayInfoAttr
+//===----------------------------------------------------------------------===//
+
+def CmpOrdering_Strong : I32EnumAttrCase<"Strong", 1, "strong">;
+def CmpOrdering_Partial : I32EnumAttrCase<"Partial", 2, "partial">;
+
+def CmpOrdering : I32EnumAttr<
+  "CmpOrdering", "three-way comparison ordering kind",
+  [CmpOrdering_Strong, CmpOrdering_Partial]
+> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def CmpThreeWayInfoAttr : CIR_Attr<"CmpThreeWayInfo", "cmp3way_info"> {
+  let summary = "Holds information about a three-way comparison operation";
+  let description = [{
+    The `#cmp3way_info` attribute contains information about a three-way
+    comparison operation `cir.cmp3way`.
+
+    The `ordering` parameter gives the ordering kind of the three-way comparison
+    operation. It may be either strong ordering or partial ordering.
+
+    Given the two input operands of the three-way comparison operation `lhs` and
+    `rhs`, the `lt`, `eq`, `gt`, and `unordered` parameters gives the result
+    value that should be produced by the three-way comparison operation when the
+    ordering between `lhs` and `rhs` is `lhs < rhs`, `lhs == rhs`, `lhs > rhs`,
+    or neither, respectively.
+  }];
+
+  let parameters = (ins "CmpOrdering":$ordering, "int64_t":$lt, "int64_t":$eq,
+                        "int64_t":$gt,
+                        OptionalParameter<"std::optional<int64_t>">:$unordered);
+
+  let builders = [
+    AttrBuilder<(ins "int64_t":$lt, "int64_t":$eq, "int64_t":$gt), [{
+      return $_get($_ctxt, CmpOrdering::Strong, lt, eq, gt, std::nullopt);
+    }]>,
+    AttrBuilder<(ins "int64_t":$lt, "int64_t":$eq, "int64_t":$gt,
+                     "int64_t":$unordered), [{
+      return $_get($_ctxt, CmpOrdering::Partial, lt, eq, gt, unordered);
+    }]>,
+  ];
+
+  let extraClassDeclaration = [{
+    /// Get attribute alias name for this attribute.
+    std::string getAlias() const;
+  }];
+
+  let assemblyFormat = [{
+    `<`
+      $ordering `,`
+      `lt` `=` $lt `,`
+      `eq` `=` $eq `,`
+      `gt` `=` $gt
+      (`,` `unordered` `=` $unordered^)?
+    `>`
+  }];
+
+  let genVerifyDecl = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// DataMemberAttr
+//===----------------------------------------------------------------------===//
+
+def DataMemberAttr : CIR_Attr<"DataMember", "data_member",
+                              [TypedAttrInterface]> {
+  let summary = "Holds a constant data member pointer value";
+  let parameters = (ins AttributeSelfTypeParameter<
+                            "", "mlir::cir::DataMemberType">:$type,
+                        OptionalParameter<
+                            "std::optional<unsigned>">:$member_index);
+  let description = [{
+    A data member attribute is a literal attribute that represents a constant
+    pointer-to-data-member value.
+
+    The `member_index` parameter represents the index of the pointed-to member
+    within its containing struct. It is an optional parameter; lack of this
+    parameter indicates a null pointer-to-data-member value.
+
+    Example:
+    ```
+    #ptr = #cir.data_member<1> : !cir.data_member<!s32i in !ty_22Point22>
+
+    #null = #cir.data_member<null> : !cir.data_member<!s32i in !ty_22Point22>
+    ```
+  }];
+
+  let genVerifyDecl = 1;
+
+  let assemblyFormat = [{
+    `<` ($member_index^):(`null`)? `>`
+  }];
+
+  let extraClassDeclaration = [{
+    bool isNullPtr() const {
+      return !getMemberIndex().has_value();
+    }
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// MethodAttr
+//===----------------------------------------------------------------------===//
+
+def MethodAttr : CIR_Attr<"Method", "method", [TypedAttrInterface]> {
+  let summary = "Holds a constant pointer-to-member-function value";
+  let description = [{
+    A method attribute is a literal attribute that represents a constant
+    pointer-to-member-function value.
+
+    If the member function is a non-virtual function, the `symbol` parameter
+    gives the global symbol for the non-virtual member function.
+
+    If the member function is a virtual function, the `vtable_offset` parameter
+    gives the offset of the vtable entry corresponding to the virtual member
+    function.
+
+    `symbol` and `vtable_offset` cannot be present at the same time. If both of
+    `symbol` and `vtable_offset` are not present, the attribute represents a
+    null pointer constant.
+  }];
+
+  let parameters = (ins AttributeSelfTypeParameter<
+                            "", "mlir::cir::MethodType">:$type,
+                        OptionalParameter<
+                            "std::optional<FlatSymbolRefAttr>">:$symbol,
+                        OptionalParameter<
+                            "std::optional<uint64_t>">:$vtable_offset);
+
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "mlir::cir::MethodType":$type), [{
+      return $_get(type.getContext(), type, std::nullopt, std::nullopt);
+    }]>,
+    AttrBuilderWithInferredContext<(ins "mlir::cir::MethodType":$type,
+                                        "FlatSymbolRefAttr":$symbol), [{
+      return $_get(type.getContext(), type, symbol, std::nullopt);
+    }]>,
+    AttrBuilderWithInferredContext<(ins "mlir::cir::MethodType":$type,
+                                        "uint64_t":$vtable_offset), [{
+      return $_get(type.getContext(), type, std::nullopt, vtable_offset);
+    }]>,
+  ];
+
+  let hasCustomAssemblyFormat = 1;
+
+  let genVerifyDecl = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SignedOverflowBehaviorAttr
+//===----------------------------------------------------------------------===//
+
+def SignedOverflowBehaviorAttr : AttrDef<CIR_Dialect, "SignedOverflowBehavior"> {
+  let mnemonic = "signed_overflow_behavior";
+  let parameters = (ins
+    "sob::SignedOverflowBehavior":$behavior
+  );
+  let hasCustomAssemblyFormat = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// GlobalViewAttr
+//===----------------------------------------------------------------------===//
+
+def GlobalViewAttr : CIR_Attr<"GlobalView", "global_view", [TypedAttrInterface]> {
+  let summary = "Provides constant access to a global address";
+  let description = [{
+    Get constant address of global `symbol` and optionally apply offsets to
+    access existing subelements. It provides a way to access globals from other
+    global and always produces a pointer.
+
+    The type of the input symbol can be different from `#cir.global_view`
+    output type, since a given view of the global might require a static
+    cast for initializing other globals.
+
+    A list of indices can be optionally passed and each element subsequently
+    indexes underlying types. For `symbol` types like `!cir.array`
+    and `!cir.struct`, it leads to the constant address of sub-elements, while
+    for `!cir.ptr`, an offset is applied. The first index is relative to the
+    original symbol type, not the produced one.
+
+    Example:
+
+    ```
+      cir.global external @s = @".str2": !cir.ptr<i8>
+      cir.global external @x = #cir.global_view<@s> : !cir.ptr<i8>
+
+      cir.global external @rgb = #cir.const_array<[0 : i8, -23 : i8, 33 : i8] : !cir.array<i8 x 3>>
+      cir.global external @elt_ptr = #cir.global_view<@rgb, [1]> : !cir.ptr<i8>
+      cir.global external @table_of_ptrs = #cir.const_array<[#cir.global_view<@rgb, [1]> : !cir.ptr<i8>] : !cir.array<!cir.ptr<i8> x 1>>
+    ```
+  }];
+
+  let parameters = (ins AttributeSelfTypeParameter<"">:$type,
+                        "FlatSymbolRefAttr":$symbol,
+                        OptionalParameter<"ArrayAttr">:$indices);
+
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "Type":$type,
+                                        "FlatSymbolRefAttr":$symbol,
+                                        CArg<"ArrayAttr", "{}">:$indices), [{
+      return $_get(type.getContext(), type, symbol, indices);
+    }]>
+  ];
+
+  // let genVerifyDecl = 1;
+  let assemblyFormat = [{
+    `<`
+      $symbol
+      (`,` $indices^)?
+    `>`
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// TypeInfoAttr
+//===----------------------------------------------------------------------===//
+
+def TypeInfoAttr : CIR_Attr<"TypeInfo", "typeinfo", [TypedAttrInterface]> {
+  let summary = "Represents a typeinfo used for RTTI";
+  let description = [{
+    The typeinfo data for a given class is stored into an ArrayAttr. The
+    layout is determined by the C++ ABI used (clang only implements
+    itanium on CIRGen).
+
+    The verifier enforces that the output type is always a `!cir.struct`,
+    and that the ArrayAttr element types match the equivalent member type
+    for the resulting struct, i.e, a GlobalViewAttr for symbol reference or
+    an IntAttr for flags.
+
+    Example:
+
+    ```
+    cir.global "private" external @_ZTVN10__cxxabiv120__si_class_type_infoE : !cir.ptr<i32>
+
+    cir.global external @type_info_B = #cir.typeinfo<<
+      {#cir.global_view<@_ZTVN10__cxxabiv120__si_class_type_infoE, [2]> : !cir.ptr<i8>}
+    >> : !cir.struct<"", !cir.ptr<i8>>
+    ```
+  }];
+
+  let parameters = (ins AttributeSelfTypeParameter<"">:$type,
+                        "mlir::ArrayAttr":$data);
+
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "Type":$type,
+                                        "mlir::ArrayAttr":$data), [{
+      return $_get(type.getContext(), type, data);
+    }]>
+  ];
+
+  // Checks struct element types should match the array for every equivalent
+  // element type.
+  let genVerifyDecl = 1;
+  let assemblyFormat = [{
+    `<` custom<StructMembers>($data) `>`
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// VTableAttr
+//===----------------------------------------------------------------------===//
+
+def VTableAttr : CIR_Attr<"VTable", "vtable", [TypedAttrInterface]> {
+  let summary = "Represents a C++ vtable";
+  let description = [{
+    Wraps a #cir.const_struct containing vtable data.
+
+    Example:
+    ```
+    cir.global linkonce_odr @_ZTV1B = #cir.vtable<<
+        {#cir.const_array<[#cir.null : !cir.ptr<i8>,
+         #cir.global_view<@_ZTI1B> : !cir.ptr<i8>,
+         #cir.global_view<@_ZN1BD1Ev> : !cir.ptr<i8>,
+         #cir.global_view<@_ZN1BD0Ev> : !cir.ptr<i8>,
+         #cir.global_view<@_ZNK1A5quackEv> : !cir.ptr<i8>]>
+         : !cir.array<!cir.ptr<i8> x 5>}>>
+      : !cir.struct<"", !cir.array<!cir.ptr<i8> x 5>>
+    ```
+  }];
+
+  // `vtable_data` is const struct with one element, containing an array of
+  // vtable information.
+  let parameters = (ins AttributeSelfTypeParameter<"">:$type,
+                        "ArrayAttr":$vtable_data);
+
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "Type":$type,
+                                        "ArrayAttr":$vtable_data), [{
+      return $_get(type.getContext(), type, vtable_data);
+    }]>
+  ];
+
+  let genVerifyDecl = 1;
+  let assemblyFormat = [{
+    `<` custom<StructMembers>($vtable_data) `>`
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// StructLayoutAttr
+//===----------------------------------------------------------------------===//
+
+// Used to decouple layout information from the struct type. StructType's
+// uses this attribute to cache that information.
+
+def StructLayoutAttr : CIR_Attr<"StructLayout", "struct_layout"> {
+  let summary = "ABI specific information about a struct layout";
+  let description = [{
+    Holds layout information often queried by !cir.struct users
+    during lowering passes and optimizations.
+  }];
+
+  let parameters = (ins "unsigned":$size,
+                        "unsigned":$alignment,
+                        "bool":$padded,
+                        "mlir::Type":$largest_member,
+                        "mlir::ArrayAttr":$offsets);
+
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "unsigned":$size,
+                                        "unsigned":$alignment,
+                                        "bool":$padded,
+                                        "mlir::Type":$largest_member,
+                                        "mlir::ArrayAttr":$offsets), [{
+      return $_get(largest_member.getContext(), size, alignment, padded,
+                   largest_member, offsets);
+    }]>,
+  ];
+
+  let genVerifyDecl = 1;
+  let assemblyFormat = [{
+    `<`
+      struct($size, $alignment, $padded, $largest_member, $offsets)
+    `>`
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// DynamicCastInfoAttr
+//===----------------------------------------------------------------------===//
+
+def DynamicCastInfoAttr
+    : CIR_Attr<"DynamicCastInfo", "dyn_cast_info"> {
+  let summary = "ABI specific information about a dynamic cast";
+  let description = [{
+    Provide ABI specific information about a dynamic cast operation.
+
+    The `srcRtti` and the `destRtti` parameters give the RTTI of the source
+    struct type and the destination struct type, respectively.
+
+    The `runtimeFunc` parameter gives the `__dynamic_cast` function which is
+    provided by the runtime. The `badCastFunc` parameter gives the
+    `__cxa_bad_cast` function which is also provided by the runtime.
+
+    The `offsetHint` parameter gives the hint value that should be passed to the
+    `__dynamic_cast` runtime function.
+  }];
+
+  let parameters = (ins GlobalViewAttr:$srcRtti,
+                        GlobalViewAttr:$destRtti,
+                        "FlatSymbolRefAttr":$runtimeFunc,
+                        "FlatSymbolRefAttr":$badCastFunc,
+                        IntAttr:$offsetHint);
+
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "GlobalViewAttr":$srcRtti,
+                                        "GlobalViewAttr":$destRtti,
+                                        "FlatSymbolRefAttr":$runtimeFunc,
+                                        "FlatSymbolRefAttr":$badCastFunc,
+                                        "IntAttr":$offsetHint), [{
+      return $_get(srcRtti.getContext(), srcRtti, destRtti, runtimeFunc,
+                   badCastFunc, offsetHint);
+    }]>,
+  ];
+
+  let genVerifyDecl = 1;
+  let assemblyFormat = [{
+    `<`
+      qualified($srcRtti) `,` qualified($destRtti) `,`
+      $runtimeFunc `,` $badCastFunc `,` qualified($offsetHint)
+    `>`
+  }];
+
+  let extraClassDeclaration = [{
+    /// Get attribute alias name for this attribute.
+    std::string getAlias() const;
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// AddressSpaceAttr
+//===----------------------------------------------------------------------===//
+
+def AS_OffloadPrivate : I32EnumAttrCase<"offload_private", 1>;
+def AS_OffloadLocal : I32EnumAttrCase<"offload_local", 2>;
+def AS_OffloadGlobal : I32EnumAttrCase<"offload_global", 3>;
+def AS_OffloadConstant : I32EnumAttrCase<"offload_constant", 4>;
+def AS_OffloadGeneric : I32EnumAttrCase<"offload_generic", 5>;
+def AS_Target : I32EnumAttrCase<"target", 6>;
+
+def AddressSpaceAttr : CIR_Attr<"AddressSpace", "addrspace"> {
+
+  let summary = "Address space attribute for pointer types";
+  let description = [{
+    The address space attribute is used in pointer types. It essentially
+    provides a unified model on top of `clang::LangAS`, rather than LLVM address
+    spaces.
+
+    The representation is further simplified: `LangAS::Default` is encoded as
+    a null attribute; many address spaces from different offloading languages
+    are unified as `offload_*`; etc.
+
+    The meaning of `value` parameter is defined as an extensible enum `Kind`,
+    which encodes target AS as offset to the last language AS.
+  }];
+
+  let parameters = (ins "int32_t":$value);
+
+  let assemblyFormat = [{
+    `<` $value `>`
+  }];
+
+  let builders = [
+    AttrBuilder<(ins "clang::LangAS":$langAS), [{
+      assert(langAS != clang::LangAS::Default &&
+        "Default address space is encoded as null attribute");
+      return $_get($_ctxt, getValueFromLangAS(langAS).value());
+    }]>
+  ];
+
+  let cppNamespace = "::mlir::cir";
+
+  // The following codes implement these conversions:
+  // clang::LangAS -> int32_t <-> text-form CIR
+
+  // CIR_PointerType manipulates the parse- and stringify- methods to provide
+  // simplified assembly format `custom<PointerAddrSpace>`.
+
+  list<I32EnumAttrCase> langASCases = [
+    AS_OffloadPrivate, AS_OffloadLocal, AS_OffloadGlobal, AS_OffloadConstant,
+    AS_OffloadGeneric
+  ];
+
+  I32EnumAttrCase targetASCase = AS_Target;
+
+  let extraClassDeclaration = [{
+    static constexpr char kTargetKeyword[] = "}]#targetASCase.symbol#[{";
+    static constexpr int32_t kFirstTargetASValue = }]#targetASCase.value#[{;
+  
+    bool isLang() const;
+    bool isTarget() const;
+    unsigned getTargetValue() const;
+
+    /// Convert a clang LangAS to its corresponding CIR AS storage value. This
+    /// helper does not perform any language-specific mappings (e.g. determining
+    /// the default AS for offloading languages), so these must be handled in
+    /// the caller.
+    static std::optional<int32_t> getValueFromLangAS(clang::LangAS v);
+
+    /// Helper methods for the assembly format `custom<PointerAddrSpace>`.
+    static std::optional<int32_t> parseValueFromString(llvm::StringRef s);
+    static std::optional<llvm::StringRef> stringifyValue(int32_t v);
+
+    struct Kind {
+    }]#!interleave(
+      !foreach(case, langASCases,
+        "static constexpr int32_t "#case.symbol#" = "#case.value#";"
+      ), "\n"
+    )#[{
+    };
+  }];
+
+  let extraClassDefinition = [{
+    bool $cppClass::isLang() const {
+      return !isTarget();
+    }
+
+    bool $cppClass::isTarget() const {
+      return getValue() >= kFirstTargetASValue;
+    }
+
+    unsigned $cppClass::getTargetValue() const {
+      assert(isTarget() && "Not a target address space");
+      return getValue() - kFirstTargetASValue;
+    }
+
+    std::optional<int32_t>
+    $cppClass::parseValueFromString(llvm::StringRef str) {
+      return llvm::StringSwitch<::std::optional<int32_t>>(str)
+        }]
+        #
+        !interleave(
+          !foreach(case, langASCases,
+            ".Case(\""#case.symbol# "\", "#case.value # ")\n"
+          ),
+          "\n"
+        )
+        #
+        [{
+        // Target address spaces are not parsed here
+        .Default(std::nullopt);
+    }
+
+    std::optional<llvm::StringRef>
+    $cppClass::stringifyValue(int32_t value) {
+      switch (value) {
+      }]
+      #
+      !interleave(
+        !foreach(case, langASCases,
+          "case "#case.value
+          # ": return \""#case.symbol # "\";" ),
+        "\n"
+      )
+      #
+      [{
+      default:
+        // Target address spaces are not processed here
+        return std::nullopt;
+      }
+    }
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// AST Wrappers
+//===----------------------------------------------------------------------===//
+
+class AST<string name, string prefix, list<Trait> traits = []>
+  : CIR_Attr<!strconcat("AST", name), !strconcat(prefix, ".ast"), traits> {
+  string clang_name = !strconcat("const clang::", name, " *");
+
+  let summary = !strconcat("Wraps a '", clang_name, "' AST node.");
+  let description = [{
+    Operations optionally refer to this node, they could be available depending
+    on the CIR lowering stage. Whether it's attached to the appropriated
+    CIR operation is delegated to the operation verifier.
+
+    This always implies a non-null AST reference (verified).
+  }];
+  let parameters = (ins clang_name:$ast);
+
+  // Printing and parsing available in CIRDialect.cpp
+  let hasCustomAssemblyFormat = 1;
+
+  // Enable verifier.
+  let genVerifyDecl = 1;
+
+  let extraClassDefinition = [{
+    ::mlir::Attribute $cppClass::parse(::mlir::AsmParser &parser,
+                                       ::mlir::Type type) {
+      // We cannot really parse anything AST related at this point
+      // since we have no serialization/JSON story.
+      return $cppClass::get(parser.getContext(), nullptr);
+    }
+
+    void $cppClass::print(::mlir::AsmPrinter &printer) const {
+      // Nothing to print besides the mnemonics.
+    }
+
+    LogicalResult $cppClass::verify(
+        ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError,
+        }] # clang_name # [{ decl) {
+      return success();
+    }
+  }];
+}
+
+def ASTDeclAttr : AST<"Decl", "decl", [ASTDeclInterface]>;
+
+def ASTFunctionDeclAttr : AST<"FunctionDecl", "function.decl",
+  [ASTFunctionDeclInterface]>;
+
+def ASTCXXMethodDeclAttr : AST<"CXXMethodDecl", "cxxmethod.decl",
+  [ASTCXXMethodDeclInterface]>;
+
+def ASTCXXConstructorDeclAttr : AST<"CXXConstructorDecl",
+  "cxxconstructor.decl", [ASTCXXConstructorDeclInterface]>;
+
+def ASTCXXConversionDeclAttr : AST<"CXXConversionDecl",
+  "cxxconversion.decl", [ASTCXXConversionDeclInterface]>;
+
+def ASTCXXDestructorDeclAttr : AST<"CXXDestructorDecl",
+  "cxxdestructor.decl", [ASTCXXDestructorDeclInterface]>;
+
+def ASTVarDeclAttr : AST<"VarDecl", "var.decl",
+  [ASTVarDeclInterface]>;
+
+def ASTTypeDeclAttr: AST<"TypeDecl", "type.decl",
+  [ASTTypeDeclInterface]>;
+
+def ASTTagDeclAttr : AST<"TagDecl", "tag.decl",
+  [ASTTagDeclInterface]>;
+
+def ASTRecordDeclAttr : AST<"RecordDecl", "record.decl",
+  [ASTRecordDeclInterface]>;
+
+def ASTExprAttr : AST<"Expr", "expr",
+  [ASTExprInterface]>;
+
+def ASTCallExprAttr : AST<"CallExpr", "call.expr",
+  [ASTCallExprInterface]>;
+
+
+//===----------------------------------------------------------------------===//
+// VisibilityAttr
+//===----------------------------------------------------------------------===//
+
+def VK_Default : I32EnumAttrCase<"Default", 1, "default">; 
+def VK_Hidden : I32EnumAttrCase<"Hidden", 2, "hidden">;
+def VK_Protected : I32EnumAttrCase<"Protected", 3, "protected">;
+
+def VisibilityKind : I32EnumAttr<"VisibilityKind", "C/C++ visibility", [
+  VK_Default, VK_Hidden, VK_Protected
+]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def VisibilityAttr : CIR_Attr<"Visibility", "visibility"> {
+  let summary = "Visibility attribute";
+  let description = [{
+    Visibility attributes.
+  }];
+  let parameters = (ins "VisibilityKind":$value);
+
+  let assemblyFormat = [{
+    $value
+  }];
+
+  let builders = [
+    AttrBuilder<(ins CArg<"VisibilityKind", "cir::VisibilityKind::Default">:$value), [{
+      return $_get($_ctxt, value);
+    }]>
+  ];
+
+  let skipDefaultBuilders = 1;
+
+  let extraClassDeclaration = [{
+    bool isDefault() const { return getValue() == VisibilityKind::Default; };
+    bool isHidden() const { return getValue() == VisibilityKind::Hidden; };
+    bool isProtected() const { return getValue() == VisibilityKind::Protected; };
+  }]; 
+}
+
+
+//===----------------------------------------------------------------------===//
+// ExtraFuncAttr
+//===----------------------------------------------------------------------===//
+
+def ExtraFuncAttr : CIR_Attr<"ExtraFuncAttributes", "extra"> {
+  let summary = "Represents aggregated attributes for a function";
+  let description = [{
+    This is a wrapper of dictionary attrbiute that contains extra attributes of
+    a function.
+  }];
+
+  let parameters = (ins "DictionaryAttr":$elements);
+
+  let assemblyFormat = [{ `(` $elements `)` }];
+
+  // Printing and parsing also available in CIRDialect.cpp
+}
+
+def NoInline : I32EnumAttrCase<"NoInline", 1, "no">;
+def AlwaysInline : I32EnumAttrCase<"AlwaysInline", 2, "always">;
+def InlineHint : I32EnumAttrCase<"InlineHint", 3, "hint">;
+
+def InlineKind : I32EnumAttr<"InlineKind", "inlineKind", [
+  NoInline, AlwaysInline, InlineHint
+]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def InlineAttr : CIR_Attr<"Inline", "inline"> {
+  let summary = "Inline attribute";
+  let description = [{
+    Inline attributes represents user directives.
+  }];
+
+  let parameters = (ins "InlineKind":$value);
+
+  let assemblyFormat = [{
+    `<` $value `>`
+  }];
+
+  let extraClassDeclaration = [{
+    bool isNoInline() const { return getValue() == InlineKind::NoInline; };
+    bool isAlwaysInline() const { return getValue() == InlineKind::AlwaysInline; };
+    bool isInlineHint() const { return getValue() == InlineKind::InlineHint; };
+  }];
+}
+
+def OptNoneAttr : CIRUnitAttr<"OptNone", "optnone"> {
+  let storageType = [{ OptNoneAttr }];
+}
+
+def NoThrowAttr : CIRUnitAttr<"NoThrow", "nothrow"> {
+  let storageType = [{ NoThrowAttr }];
+}
+
+def ConvergentAttr : CIRUnitAttr<"Convergent", "convergent"> {
+  let storageType = [{ ConvergentAttr }];
+}
+
+class CIR_GlobalCtorDtor<string name, string attrMnemonic,
+                         string sum, string desc>
+    : CIR_Attr<"Global" # name, "global_" # attrMnemonic> {
+  let summary = sum;
+  let description = desc;
+
+  let parameters = (ins "StringAttr":$name, "int":$priority);
+  let assemblyFormat = [{
+    `<`
+      $name `,` $priority
+    `>`
+  }];
+  let builders = [
+    AttrBuilder<(ins "StringRef":$name,
+                      CArg<"int", "65536">:$priority), [{
+      return $_get($_ctxt, StringAttr::get($_ctxt, name), priority);
+    }]>
+  ];
+  let extraClassDeclaration = [{
+    bool isDefaultPriority() const { return getPriority() == 65536; };
+  }];
+  let skipDefaultBuilders = 1;
+}
+
+def GlobalCtorAttr : CIR_GlobalCtorDtor<"Ctor", "ctor",
+  "Marks a function as a global constructor",
+  "A function with this attribute executes before main()"
+>;
+def GlobalDtorAttr : CIR_GlobalCtorDtor<"Dtor", "dtor",
+  "Marks a function as a global destructor",
+  "A function with this attribute excutes before module unloading"
+>;
+
+def BitfieldInfoAttr : CIR_Attr<"BitfieldInfo", "bitfield_info"> {
+  let summary = "Represents a bit field info";
+  let description = [{
+    Holds the next information about bitfields: name, storage type, a bitfield size
+    and position in the storage, if the bitfield is signed or not.
+  }];
+  let parameters = (ins "StringAttr":$name,
+                        "Type":$storage_type,
+                        "uint64_t":$size,
+                        "uint64_t":$offset,
+                        "bool":$is_signed);
+
+  let assemblyFormat = "`<` struct($name, $storage_type, $size, $offset, $is_signed) `>`";
+
+  let builders = [
+    AttrBuilder<(ins "StringRef":$name,
+                     "Type":$storage_type,
+                     "uint64_t":$size,
+                     "uint64_t":$offset,
+                     "bool":$is_signed
+                     ), [{
+      return $_get($_ctxt, StringAttr::get($_ctxt, name), storage_type, size, offset, is_signed);
+    }]>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// AnnotationAttr
+//===----------------------------------------------------------------------===//
+
+def AnnotationAttr : CIR_Attr<"Annotation", "annotation"> {
+  let summary = "Annotation attribute for global variables and functions";
+  let description = [{
+    Represent C/C++ attribute of annotate in CIR.
+    Example C code:
+    ```
+     int *a __attribute__((annotate("testptr", "21", 12 )));
+    ```
+    In this example code, the `AnnotationAttr` has annotation name "testptr",
+    and arguments "21" and 12 constitutes an `ArrayAttr` type parameter `args`
+    for global variable `a`.
+    In CIR, the attribute for above annotation looks like:
+    ```
+    [#cir.annotation<name = "withargs", args = ["21", 12 : i32]>]
+    ```
+  }];
+
+  // The parameter args is empty when there is no arg.
+  let parameters = (ins "StringAttr":$name,
+                        "ArrayAttr":$args);
+
+  let assemblyFormat = "`<` struct($name, $args) `>`";                  
+
+  let extraClassDeclaration = [{
+    bool isNoArgs() const { return getArgs().empty(); };
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// GlobalAnnotationValuesAttr
+//===----------------------------------------------------------------------===//
+
+def GlobalAnnotationValuesAttr : CIR_Attr<"GlobalAnnotationValues",
+                                          "global_annotations"> {
+  let summary = "Array of annotations, each element consists of name of"
+                "a global var or func and one of its annotations";
+  let description = [{
+    This is annotation value array, which holds the annotation
+    values for all global variables and functions in a module.
+    This array is used to create the initial value of a global annotation
+    metadata variable in LLVM IR.
+    Example C code:
+    ```
+    double *a __attribute__((annotate("withargs", "21", 12 )));
+    int *b __attribute__((annotate("withargs", "21", 12 )));
+    void *c __attribute__((annotate("noargvar")));
+    void foo(int i) __attribute__((annotate("noargfunc"))) {}
+    ```
+    After CIR lowering prepare pass, compiler generates a 
+    `GlobalAnnotationValuesAttr` like the following:
+    ```
+    #cir<global_annotations [
+      ["a", #cir.annotation<name = "withargs", args = ["21", 12 : i32]>],
+      ["b", #cir.annotation<name = "withargs", args = ["21", 12 : i32]>],
+      ["c", #cir.annotation<name = "noargvar", args = []>],
+      ["foo", #cir.annotation<name = "noargfunc", args = []>]]>
+    ```
+  }];
+
+  let parameters = (ins "ArrayAttr":$annotations);
+
+  let assemblyFormat = [{ $annotations }];
+
+  // Enable verifier.
+  let genVerifyDecl = 1;
+}
+
+include "clang/CIR/Dialect/IR/CIROpenCLAttrs.td"
+
+#endif // MLIR_CIR_DIALECT_CIR_ATTRS
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRDataLayout.h b/clang/include/clang/CIR/Dialect/IR/CIRDataLayout.h
new file mode 100644
index 000000000000..55320895db91
--- /dev/null
+++ b/clang/include/clang/CIR/Dialect/IR/CIRDataLayout.h
@@ -0,0 +1,165 @@
+//===--- CIRDataLayout.h - CIR Data Layout Information ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Provides a LLVM-like API wrapper to DLTI and MLIR layout queries. This makes
+// it easier to port some of LLVM codegen layout logic to CIR.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_CIR_DIALECT_IR_CIRDATALAYOUT_H
+#define LLVM_CLANG_CIR_DIALECT_IR_CIRDATALAYOUT_H
+
+#include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/TypeSize.h"
+
+namespace cir {
+
+class StructLayout;
+
+// FIXME(cir): This might be replaced by a CIRDataLayout interface which can
+// provide the same functionalities.
+class CIRDataLayout {
+  bool bigEndian = false;
+
+  /// Primitive type alignment data. This is sorted by type and bit
+  /// width during construction.
+  llvm::LayoutAlignElem StructAlignment;
+
+  // The StructType -> StructLayout map.
+  mutable void *LayoutMap = nullptr;
+
+public:
+  mlir::DataLayout layout;
+
+  /// Constructs a DataLayout the module's data layout attribute.
+  CIRDataLayout(mlir::ModuleOp modOp);
+
+  /// Parse a data layout string (with fallback to default values).
+  void reset(mlir::DataLayoutSpecInterface spec);
+
+  // Free all internal data structures.
+  void clear();
+
+  bool isBigEndian() const { return bigEndian; }
+
+  /// Returns a StructLayout object, indicating the alignment of the
+  /// struct, its size, and the offsets of its fields.
+  ///
+  /// Note that this information is lazily cached.
+  const StructLayout *getStructLayout(mlir::cir::StructType Ty) const;
+
+  /// Internal helper method that returns requested alignment for type.
+  llvm::Align getAlignment(mlir::Type Ty, bool abiOrPref) const;
+
+  llvm::Align getABITypeAlign(mlir::Type ty) const {
+    return getAlignment(ty, true);
+  }
+
+  llvm::Align getPrefTypeAlign(mlir::Type Ty) const {
+    return getAlignment(Ty, false);
+  }
+
+  /// Returns the maximum number of bytes that may be overwritten by
+  /// storing the specified type.
+  ///
+  /// If Ty is a scalable vector type, the scalable property will be set and
+  /// the runtime size will be a positive integer multiple of the base size.
+  ///
+  /// For example, returns 5 for i36 and 10 for x86_fp80.
+  llvm::TypeSize getTypeStoreSize(mlir::Type Ty) const {
+    llvm::TypeSize BaseSize = getTypeSizeInBits(Ty);
+    return {llvm::divideCeil(BaseSize.getKnownMinValue(), 8),
+            BaseSize.isScalable()};
+  }
+
+  /// Returns the offset in bytes between successive objects of the
+  /// specified type, including alignment padding.
+  ///
+  /// If Ty is a scalable vector type, the scalable property will be set and
+  /// the runtime size will be a positive integer multiple of the base size.
+  ///
+  /// This is the amount that alloca reserves for this type. For example,
+  /// returns 12 or 16 for x86_fp80, depending on alignment.
+  llvm::TypeSize getTypeAllocSize(mlir::Type Ty) const {
+    // Round up to the next alignment boundary.
+    return llvm::alignTo(getTypeStoreSize(Ty), getABITypeAlign(Ty).value());
+  }
+
+  llvm::TypeSize getPointerTypeSizeInBits(mlir::Type Ty) const {
+    assert(mlir::isa<mlir::cir::PointerType>(Ty) &&
+           "This should only be called with a pointer type");
+    return layout.getTypeSizeInBits(Ty);
+  }
+
+  llvm::TypeSize getTypeSizeInBits(mlir::Type Ty) const;
+
+  mlir::Type getIntPtrType(mlir::Type Ty) const {
+    assert(mlir::isa<mlir::cir::PointerType>(Ty) && "Expected pointer type");
+    auto IntTy = mlir::cir::IntType::get(Ty.getContext(),
+                                         getPointerTypeSizeInBits(Ty), false);
+    return IntTy;
+  }
+};
+
+/// Used to lazily calculate structure layout information for a target machine,
+/// based on the DataLayout structure.
+class StructLayout final
+    : public llvm::TrailingObjects<StructLayout, llvm::TypeSize> {
+  llvm::TypeSize StructSize;
+  llvm::Align StructAlignment;
+  unsigned IsPadded : 1;
+  unsigned NumElements : 31;
+
+public:
+  llvm::TypeSize getSizeInBytes() const { return StructSize; }
+
+  llvm::TypeSize getSizeInBits() const { return 8 * StructSize; }
+
+  llvm::Align getAlignment() const { return StructAlignment; }
+
+  /// Returns whether the struct has padding or not between its fields.
+  /// NB: Padding in nested element is not taken into account.
+  bool hasPadding() const { return IsPadded; }
+
+  /// Given a valid byte offset into the structure, returns the structure
+  /// index that contains it.
+  unsigned getElementContainingOffset(uint64_t FixedOffset) const;
+
+  llvm::MutableArrayRef<llvm::TypeSize> getMemberOffsets() {
+    return llvm::MutableArrayRef(getTrailingObjects<llvm::TypeSize>(),
+                                 NumElements);
+  }
+
+  llvm::ArrayRef<llvm::TypeSize> getMemberOffsets() const {
+    return llvm::ArrayRef(getTrailingObjects<llvm::TypeSize>(), NumElements);
+  }
+
+  llvm::TypeSize getElementOffset(unsigned Idx) const {
+    assert(Idx < NumElements && "Invalid element idx!");
+    return getMemberOffsets()[Idx];
+  }
+
+  llvm::TypeSize getElementOffsetInBits(unsigned Idx) const {
+    return getElementOffset(Idx) * 8;
+  }
+
+private:
+  friend class CIRDataLayout; // Only DataLayout can create this class
+
+  StructLayout(mlir::cir::StructType ST, const CIRDataLayout &DL);
+
+  size_t numTrailingObjects(OverloadToken<llvm::TypeSize>) const {
+    return NumElements;
+  }
+};
+
+} // namespace cir
+
+#endif
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRDialect.h b/clang/include/clang/CIR/Dialect/IR/CIRDialect.h
index d53e5d1663d6..d59b4ede3091 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRDialect.h
+++ b/clang/include/clang/CIR/Dialect/IR/CIRDialect.h
@@ -13,4 +13,86 @@
 #ifndef LLVM_CLANG_CIR_DIALECT_IR_CIRDIALECT_H
 #define LLVM_CLANG_CIR_DIALECT_IR_CIRDIALECT_H
 
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/Interfaces/CallInterfaces.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Interfaces/FunctionInterfaces.h"
+#include "mlir/Interfaces/InferTypeOpInterface.h"
+#include "mlir/Interfaces/LoopLikeInterface.h"
+#include "mlir/Interfaces/MemorySlotInterfaces.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIROpsDialect.h.inc"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIROpsStructs.h.inc"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+
+#include "clang/CIR/Interfaces/ASTAttrInterfaces.h"
+#include "clang/CIR/Interfaces/CIRLoopOpInterface.h"
+#include "clang/CIR/Interfaces/CIROpInterfaces.h"
+
+namespace mlir {
+namespace OpTrait {
+
+namespace impl {
+// These functions are out-of-line implementations of the methods in the
+// corresponding trait classes.  This avoids them being template
+// instantiated/duplicated.
+LogicalResult verifySameFirstOperandAndResultType(Operation *op);
+LogicalResult verifySameSecondOperandAndResultType(Operation *op);
+LogicalResult verifySameFirstSecondOperandAndResultType(Operation *op);
+} // namespace impl
+
+/// This class provides verification for ops that are known to have the same
+/// first operand and result type.
+///
+template <typename ConcreteType>
+class SameFirstOperandAndResultType
+    : public TraitBase<ConcreteType, SameFirstOperandAndResultType> {
+public:
+  static LogicalResult verifyTrait(Operation *op) {
+    return impl::verifySameFirstOperandAndResultType(op);
+  }
+};
+
+/// This class provides verification for ops that are known to have the same
+/// second operand and result type.
+///
+template <typename ConcreteType>
+class SameSecondOperandAndResultType
+    : public TraitBase<ConcreteType, SameSecondOperandAndResultType> {
+public:
+  static LogicalResult verifyTrait(Operation *op) {
+    return impl::verifySameSecondOperandAndResultType(op);
+  }
+};
+
+/// This class provides verification for ops that are known to have the same
+/// first, second operand and result type.
+///
+template <typename ConcreteType>
+class SameFirstSecondOperandAndResultType
+    : public TraitBase<ConcreteType, SameFirstSecondOperandAndResultType> {
+public:
+  static LogicalResult verifyTrait(Operation *op) {
+    return impl::verifySameFirstSecondOperandAndResultType(op);
+  }
+};
+
+} // namespace OpTrait
+
+namespace cir {
+void buildTerminatedBody(OpBuilder &builder, Location loc);
+} // namespace cir
+
+} // namespace mlir
+
+#define GET_OP_CLASSES
+#include "clang/CIR/Dialect/IR/CIROps.h.inc"
+
 #endif // LLVM_CLANG_CIR_DIALECT_IR_CIRDIALECT_H
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRDialect.td b/clang/include/clang/CIR/Dialect/IR/CIRDialect.td
index 69d6e9774942..fc87df7c86a2 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRDialect.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRDialect.td
@@ -27,7 +27,14 @@ def CIR_Dialect : Dialect {
   let useDefaultAttributePrinterParser = 0;
   let useDefaultTypePrinterParser = 0;
 
+  let hasConstantMaterializer = 1;
+
   let extraClassDeclaration = [{
+
+    // Names of CIR parameter attributes.
+    static StringRef getSExtAttrName() { return "cir.signext"; }
+    static StringRef getZExtAttrName() { return "cir.zeroext"; }
+
     void registerAttributes();
     void registerTypes();
 
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROpenCLAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIROpenCLAttrs.td
new file mode 100644
index 000000000000..576d619fcf7a
--- /dev/null
+++ b/clang/include/clang/CIR/Dialect/IR/CIROpenCLAttrs.td
@@ -0,0 +1,188 @@
+//===- CIROpenCLAttrs.td - CIR dialect attrs for OpenCL ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CIR dialect attributes for OpenCL.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CIR_DIALECT_CIR_OPENCL_ATTRS
+#define MLIR_CIR_DIALECT_CIR_OPENCL_ATTRS
+
+//===----------------------------------------------------------------------===//
+// OpenCLKernelMetadataAttr
+//===----------------------------------------------------------------------===//
+
+def OpenCLKernelMetadataAttr
+    : CIR_Attr<"OpenCLKernelMetadata", "cl.kernel_metadata"> {
+  
+  let summary = "OpenCL kernel metadata";
+  let description = [{
+    Provide the required information of an OpenCL kernel for the SPIR-V backend.
+
+    The `work_group_size_hint` and `reqd_work_group_size` parameter are integer
+    arrays with 3 elements that provide hints for the work-group size and the
+    required work-group size, respectively.
+
+    The `vec_type_hint` parameter is a type attribute that provides a hint for
+    the vectorization. It can be a CIR or LLVM type, depending on the lowering
+    stage.
+
+    The `vec_type_hint_signedness` parameter is a boolean that indicates the
+    signedness of the vector type hint. It's useful when LLVM type is set in
+    `vec_type_hint`, which is signless by design. It should be set if and only
+    if the `vec_type_hint` is present.
+
+    The `intel_reqd_sub_group_size` parameter is an integer that restricts the
+    sub-group size to the specified value.
+
+    Example:
+    ```
+    #fn_attr = #cir<extra({cl.kernel_metadata = #cir.cl.kernel_metadata<
+      work_group_size_hint = [8 : i32, 16 : i32, 32 : i32],
+      reqd_work_group_size = [1 : i32, 2 : i32, 4 : i32],
+      vec_type_hint = !s32i,
+      vec_type_hint_signedness = 1,
+      intel_reqd_sub_group_size = 8 : i32
+    >})>
+
+    cir.func @kernel(%arg0: !s32i) extra(#fn_attr) {
+      cir.return
+    }
+    ```
+  }];
+
+  let parameters = (ins
+    OptionalParameter<"ArrayAttr">:$work_group_size_hint,
+    OptionalParameter<"ArrayAttr">:$reqd_work_group_size,
+    OptionalParameter<"TypeAttr">:$vec_type_hint,
+    OptionalParameter<"std::optional<bool>">:$vec_type_hint_signedness,
+    OptionalParameter<"IntegerAttr">:$intel_reqd_sub_group_size
+  );
+
+  let assemblyFormat = "`<` struct(params) `>`";
+
+  let genVerifyDecl = 1;
+
+  let extraClassDeclaration = [{
+    /// Extract the signedness from int or int vector types.
+    static std::optional<bool> isSignedHint(mlir::Type vecTypeHint);
+  }];
+
+  let extraClassDefinition = [{
+    std::optional<bool> $cppClass::isSignedHint(mlir::Type hintQTy) {
+      // Only types in CIR carry signedness
+      if (!mlir::isa<mlir::cir::CIRDialect>(hintQTy.getDialect()))
+        return std::nullopt;
+      
+      // See also clang::CodeGen::CodeGenFunction::EmitKernelMetadata
+      auto hintEltQTy = mlir::dyn_cast<mlir::cir::VectorType>(hintQTy);
+      auto isCIRSignedIntType = [](mlir::Type t) {
+        return mlir::isa<mlir::cir::IntType>(t) &&
+               mlir::cast<mlir::cir::IntType>(t).isSigned();
+      };
+      return isCIRSignedIntType(hintQTy) ||
+              (hintEltQTy && isCIRSignedIntType(hintEltQTy.getEltType()));
+    }
+  }];
+
+}
+
+//===----------------------------------------------------------------------===//
+// OpenCLKernelArgMetadataAttr
+//===----------------------------------------------------------------------===//
+
+def OpenCLKernelArgMetadataAttr
+    : CIR_Attr<"OpenCLKernelArgMetadata", "cl.kernel_arg_metadata"> {
+  
+  let summary = "OpenCL kernel argument metadata";
+  let description = [{
+    Provide the required information of an OpenCL kernel argument for the SPIR-V
+    backend.
+
+    All parameters are arrays, containing the information of the argument in
+    the same order as they appear in the source code.
+
+    The `addr_space` parameter is an array of I32 that provides the address
+    space of the argument. It's useful for special types like `image`, which
+    have implicit global address space.
+
+    Other parameters are arrays of strings that pass through the information
+    from the source code correspondingly.
+
+    All the fields are mandatory except for `name`, which is optional.
+
+    Example:
+    ```
+    #fn_attr = #cir<extra({cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<
+      addr_space = [1 : i32],
+      access_qual = ["none"],
+      type = ["char*"],
+      base_type = ["char*"],
+      type_qual = [""],
+      name = ["in"]
+    >})>
+
+    cir.func @kernel(%arg0: !s32i) extra(#fn_attr) {
+      cir.return
+    }
+    ```
+  }];
+
+  let parameters = (ins
+    "ArrayAttr":$addr_space,
+    "ArrayAttr":$access_qual,
+    "ArrayAttr":$type,
+    "ArrayAttr":$base_type,
+    "ArrayAttr":$type_qual,
+    OptionalParameter<"ArrayAttr">:$name
+  );
+
+  let assemblyFormat = "`<` struct(params) `>`";
+
+  let genVerifyDecl = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// OpenCLVersionAttr
+//===----------------------------------------------------------------------===//
+
+def OpenCLVersionAttr : CIR_Attr<"OpenCLVersion", "cl.version"> {
+  let summary = "OpenCL version";
+  let parameters = (ins "int32_t":$major_version, "int32_t":$minor_version);
+  let description = [{
+    Represents the version of OpenCL.
+
+    Example:
+    ```
+    // Module compiled from OpenCL 1.2.
+    module attributes {cir.cl.version = cir.cl.version<1, 2>} {}
+    // Module compiled from OpenCL 3.0.
+    module attributes {cir.cl.version = cir.cl.version<3, 0>} {}
+    ```
+  }];
+  let assemblyFormat = "`<` $major_version `,` $minor_version `>`";
+}
+
+
+//===----------------------------------------------------------------------===//
+// OpenCLKernelAttr
+//===----------------------------------------------------------------------===//
+
+// TODO: It might be worthwhile to introduce a generic attribute applicable to
+// all offloading languages.
+def OpenCLKernelAttr : CIRUnitAttr<
+    "OpenCLKernel", "cl.kernel"> {
+  let summary = "OpenCL kernel";
+  let description = [{
+    Indicate the function is a OpenCL kernel.
+  }];
+
+  let storageType = [{ OpenCLKernelAttr }];
+}
+
+#endif // MLIR_CIR_DIALECT_CIR_OPENCL_ATTRS
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 7311c8db783e..00d771a75c63 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -15,5 +15,4644 @@
 #define LLVM_CLANG_CIR_DIALECT_IR_CIROPS
 
 include "clang/CIR/Dialect/IR/CIRDialect.td"
+include "clang/CIR/Dialect/IR/CIRTypes.td"
+include "clang/CIR/Dialect/IR/CIRAttrs.td"
+
+include "clang/CIR/Interfaces/ASTAttrInterfaces.td"
+include "clang/CIR/Interfaces/CIROpInterfaces.td"
+include "clang/CIR/Interfaces/CIRLoopOpInterface.td"
+
+include "mlir/Interfaces/ControlFlowInterfaces.td"
+include "mlir/Interfaces/FunctionInterfaces.td"
+include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/Interfaces/LoopLikeInterface.td"
+include "mlir/Interfaces/MemorySlotInterfaces.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+
+include "mlir/IR/BuiltinAttributeInterfaces.td"
+include "mlir/IR/EnumAttr.td"
+include "mlir/IR/SymbolInterfaces.td"
+include "mlir/IR/CommonAttrConstraints.td"
+
+//===----------------------------------------------------------------------===//
+// CIR Ops
+//===----------------------------------------------------------------------===//
+
+// LLVMLoweringInfo is used by cir-tablegen to generate LLVM lowering logic
+// automatically for CIR operations. The `llvmOp` field gives the name of the
+// LLVM IR dialect operation that the CIR operation will be lowered to. The
+// input arguments of the CIR operation will be passed in the same order to the
+// lowered LLVM IR operation.
+//
+// Example:
+//
+// For the following CIR operation definition:
+//
+//   def FooOp : CIR_Op<"foo"> {
+//     // ...
+//     let arguments = (ins CIR_AnyType:$arg1, CIR_AnyType:$arg2);
+//     let llvmOp = "BarOp";
+//   }
+//
+// cir-tablegen will generate LLVM lowering code for the FooOp similar to the
+// following:
+//
+//   class CIRFooOpLowering
+//       : public mlir::OpConversionPattern<mlir::cir::FooOp> {
+//   public:
+//     using OpConversionPattern<mlir::cir::FooOp>::OpConversionPattern;
+//
+//     mlir::LogicalResult matchAndRewrite(
+//         mlir::cir::FooOp op,
+//         OpAdaptor adaptor,
+//         mlir::ConversionPatternRewriter &rewriter) const override {
+//       rewriter.replaceOpWithNewOp<mlir::LLVM::BarOp>(
+//         op, adaptor.getOperands()[0], adaptor.getOperands()[1]);
+//       return mlir::success();
+//     }
+//   }
+//
+// If you want fully customized LLVM IR lowering logic, simply exclude the
+// `llvmOp` field from your CIR operation definition.
+class LLVMLoweringInfo {
+  string llvmOp = "";
+}
+
+class CIR_Op<string mnemonic, list<Trait> traits = []> :
+    Op<CIR_Dialect, mnemonic, traits>, LLVMLoweringInfo;
+
+//===----------------------------------------------------------------------===//
+// CIR Op Traits
+//===----------------------------------------------------------------------===//
+
+def SameFirstOperandAndResultType :
+  NativeOpTrait<"SameFirstOperandAndResultType">;
+def SameSecondOperandAndResultType :
+  NativeOpTrait<"SameSecondOperandAndResultType">;
+def SameFirstSecondOperandAndResultType :
+  NativeOpTrait<"SameFirstSecondOperandAndResultType">;
+
+//===----------------------------------------------------------------------===//
+// CastOp
+//===----------------------------------------------------------------------===//
+
+// The enumaration value isn't in sync with clang.
+def CK_IntegralToBoolean : I32EnumAttrCase<"int_to_bool", 1>;
+def CK_ArrayToPointerDecay : I32EnumAttrCase<"array_to_ptrdecay", 2>;
+def CK_IntegralCast : I32EnumAttrCase<"integral", 3>;
+def CK_BitCast : I32EnumAttrCase<"bitcast", 4>;
+def CK_FloatingCast : I32EnumAttrCase<"floating", 5>;
+def CK_PtrToBoolean : I32EnumAttrCase<"ptr_to_bool", 6>;
+def CK_FloatToIntegral : I32EnumAttrCase<"float_to_int", 7>;
+def CK_IntegralToPointer : I32EnumAttrCase<"int_to_ptr", 8>;
+def CK_PointerToIntegral : I32EnumAttrCase<"ptr_to_int", 9>;
+def CK_FloatToBoolean : I32EnumAttrCase<"float_to_bool", 10>;
+def CK_BooleanToIntegral : I32EnumAttrCase<"bool_to_int", 11>;
+def CK_IntegralToFloat : I32EnumAttrCase<"int_to_float", 12>;
+def CK_BooleanToFloat : I32EnumAttrCase<"bool_to_float", 13>;
+def CK_AddressSpaceConversion : I32EnumAttrCase<"address_space", 14>;
+def CK_FloatToComplex : I32EnumAttrCase<"float_to_complex", 15>;
+def CK_IntegralToComplex : I32EnumAttrCase<"int_to_complex", 16>;
+def CK_FloatComplexToReal : I32EnumAttrCase<"float_complex_to_real", 17>;
+def CK_IntegralComplexToReal : I32EnumAttrCase<"int_complex_to_real", 18>;
+def CK_FloatComplexToBoolean : I32EnumAttrCase<"float_complex_to_bool", 19>;
+def CK_IntegralComplexToBoolean : I32EnumAttrCase<"int_complex_to_bool", 20>;
+def CK_FloatComplexCast : I32EnumAttrCase<"float_complex", 21>;
+def CK_FloatComplexToIntegralComplex
+    : I32EnumAttrCase<"float_complex_to_int_complex", 22>;
+def CK_IntegralComplexCast : I32EnumAttrCase<"int_complex", 23>;
+def CK_IntegralComplexToFloatComplex
+    : I32EnumAttrCase<"int_complex_to_float_complex", 24>;
+
+def CastKind : I32EnumAttr<
+    "CastKind",
+    "cast kind",
+    [CK_IntegralToBoolean, CK_ArrayToPointerDecay, CK_IntegralCast,
+     CK_BitCast, CK_FloatingCast, CK_PtrToBoolean, CK_FloatToIntegral,
+     CK_IntegralToPointer, CK_PointerToIntegral, CK_FloatToBoolean,
+     CK_BooleanToIntegral, CK_IntegralToFloat, CK_BooleanToFloat,
+     CK_AddressSpaceConversion, CK_FloatToComplex, CK_IntegralToComplex,
+     CK_FloatComplexToReal, CK_IntegralComplexToReal, CK_FloatComplexToBoolean,
+     CK_IntegralComplexToBoolean, CK_FloatComplexCast,
+     CK_FloatComplexToIntegralComplex, CK_IntegralComplexCast,
+     CK_IntegralComplexToFloatComplex]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def CastOp : CIR_Op<"cast",
+             [Pure,
+              DeclareOpInterfaceMethods<PromotableOpInterface>]> {
+  // FIXME: not all conversions are free of side effects.
+  let summary = "Conversion between values of different types";
+  let description = [{
+    Apply C/C++ usual conversions rules between values. Currently supported kinds:
+
+    - `array_to_ptrdecay`
+    - `bitcast`
+    - `integral`
+    - `int_to_bool`
+    - `int_to_float`
+    - `floating`
+    - `float_to_int`
+    - `float_to_bool`
+    - `ptr_to_int`
+    - `ptr_to_bool`
+    - `bool_to_int`
+    - `bool_to_float`
+    - `address_space`
+    - `float_to_complex`
+    - `int_to_complex`
+    - `float_complex_to_real`
+    - `int_complex_to_real`
+    - `float_complex_to_bool`
+    - `int_complex_to_bool`
+    - `float_complex`
+    - `float_complex_to_int_complex`
+    - `int_complex`
+    - `int_complex_to_float_complex`
+
+    This is effectively a subset of the rules from
+    `llvm-project/clang/include/clang/AST/OperationKinds.def`; but note that some
+    of the conversions aren't implemented in terms of `cir.cast`, `lvalue-to-rvalue`
+    for instance is modeled as a regular `cir.load`.
+
+    ```mlir
+    %4 = cir.cast (int_to_bool, %3 : i32), !cir.bool
+    ...
+    %x = cir.cast(array_to_ptrdecay, %0 : !cir.ptr<!cir.array<i32 x 10>>), !cir.ptr<i32>
+    ```
+  }];
+
+  let arguments = (ins CastKind:$kind, CIR_AnyType:$src);
+  let results = (outs CIR_AnyType:$result);
+
+  let assemblyFormat = [{
+    `(` $kind `,` $src `:` type($src) `)`
+    `,` type($result) attr-dict
+  }];
+
+  // The input and output types should match the cast kind.
+  let hasVerifier = 1;
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// DynamicCastOp
+//===----------------------------------------------------------------------===//
+
+def DCK_PtrCast : I32EnumAttrCase<"ptr", 1>;
+def DCK_RefCast : I32EnumAttrCase<"ref", 2>;
+
+def DynamicCastKind : I32EnumAttr<
+    "DynamicCastKind", "dynamic cast kind", [DCK_PtrCast, DCK_RefCast]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def DynamicCastOp : CIR_Op<"dyn_cast"> {
+  let summary = "Perform dynamic cast on struct pointers";
+  let description = [{
+    The `cir.dyn_cast` operation models part of the semantics of the
+    `dynamic_cast` operator in C++. It can be used to perform 3 kinds of casts
+    on struct pointers:
+
+    - Down-cast, which casts a base class pointer to a derived class pointer;
+    - Side-cast, which casts a class pointer to a sibling class pointer;
+    - Cast-to-complete, which casts a class pointer to a void pointer.
+
+    The input of the operation must be a struct pointer. The result of the
+    operation is either a struct pointer or a void pointer.
+
+    The parameter `kind` specifies the semantics of this operation. If its value
+    is `ptr`, then the operation models dynamic casts on pointers. Otherwise, if
+    its value is `ref`, the operation models dynamic casts on references.
+    Specifically:
+
+    - When the input pointer is a null pointer value:
+      - If `kind` is `ref`, the operation will invoke undefined behavior. A
+        sanitizer check will be emitted if sanitizer is on.
+      - Otherwise, the operation will return a null pointer value as its result.
+    - When the runtime type check fails:
+      - If `kind` is `ref`, the operation will throw a `bad_cast` exception.
+      - Otherwise, the operation will return a null pointer value as its result.
+
+    The `info` argument gives detailed information about the requested dynamic
+    cast operation. It is an optional `#cir.dyn_cast_info` attribute that is
+    only present when the operation models a down-cast or a side-cast.
+
+    The `relative_layout` argument specifies whether the Itanium C++ ABI vtable
+    uses relative layout. It is only meaningful when the operation models a
+    cast-to-complete operation.
+  }];
+
+  let arguments = (ins DynamicCastKind:$kind,
+                       StructPtr:$src,
+                       OptionalAttr<DynamicCastInfoAttr>:$info,
+                       UnitAttr:$relative_layout);
+  let results = (outs CIR_PointerType:$result);
+
+  let assemblyFormat = [{
+    `(`
+      $kind `,` $src `:` type($src)
+      (`,` qualified($info)^)?
+      (`relative_layout` $relative_layout^)?
+    `)`
+    `->` qualified(type($result)) attr-dict
+  }];
+
+  let extraClassDeclaration = [{
+    /// Determine whether this operation models reference casting in C++.
+    bool isRefcast() {
+      return getKind() == ::mlir::cir::DynamicCastKind::ref;
+    }
+
+    /// Determine whether this operation represents a dynamic cast to a void
+    /// pointer.
+    bool isCastToVoid() {
+      return getType().isVoidPtr();
+    }
+  }];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ObjSizeOp
+//===----------------------------------------------------------------------===//
+
+def SizeInfoTypeMin : I32EnumAttrCase<"min", 0>;
+def SizeInfoTypeMax : I32EnumAttrCase<"max", 1>;
+
+def SizeInfoType : I32EnumAttr<
+    "SizeInfoType",
+    "size info type",
+    [SizeInfoTypeMin, SizeInfoTypeMax]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def ObjSizeOp : CIR_Op<"objsize", [Pure]> {
+  let summary = "Conversion between values of different types";
+  let description = [{
+  }];
+
+  let arguments = (ins CIR_PointerType:$ptr, SizeInfoType:$kind,
+                   UnitAttr:$dynamic);
+  let results = (outs PrimitiveInt:$result);
+
+  let assemblyFormat = [{
+    `(`
+        $ptr `:` type($ptr) `,`
+        $kind
+        (`,` `dynamic` $dynamic^)?
+    `)`
+    `->` type($result) attr-dict
+  }];
+
+  // Nothing to verify that isn't already covered by constraints.
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// PtrDiffOp
+//===----------------------------------------------------------------------===//
+
+def PtrDiffOp : CIR_Op<"ptr_diff", [Pure, SameTypeOperands]> {
+
+  let summary = "Pointer subtraction arithmetic";
+  let description = [{
+    `cir.ptr_diff` performs a subtraction between two pointer types with the
+    same element type and produces a `mlir::cir::IntType` result.
+
+    Note that the result considers the pointer size according to the ABI for
+    the pointee sizes, e.g. the subtraction between two `!cir.ptr<!u64i>` might
+    yield 1, meaning 8 bytes, whereas for `void` or function type pointees,
+    yielding 8 means 8 bytes.
+
+    ```mlir
+    %7 = "cir.ptr_diff"(%0, %1) : !cir.ptr<!u64i> -> !u64i
+    ```
+  }];
+
+  let results = (outs PrimitiveInt:$result);
+  let arguments = (ins CIR_PointerType:$lhs, CIR_PointerType:$rhs);
+
+  let assemblyFormat = [{
+    `(` $lhs `,` $rhs  `)` `:` qualified(type($lhs)) `->` qualified(type($result)) attr-dict
+  }];
+
+  // Already covered by the traits
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// PtrStrideOp
+//===----------------------------------------------------------------------===//
+
+def PtrStrideOp : CIR_Op<"ptr_stride",
+                         [Pure, SameFirstOperandAndResultType]> {
+  let summary = "Pointer access with stride";
+  let description = [{
+    Given a base pointer as first operand, provides a new pointer after applying
+    a stride (second operand).
+
+    ```mlir
+    %3 = cir.const 0 : i32
+    %4 = cir.ptr_stride(%2 : !cir.ptr<i32>, %3 : i32), !cir.ptr<i32>
+    ```
+  }];
+
+  let arguments = (ins CIR_PointerType:$base, PrimitiveInt:$stride);
+  let results = (outs CIR_PointerType:$result);
+
+  let assemblyFormat = [{
+    `(` $base `:` qualified(type($base)) `,` $stride `:` qualified(type($stride)) `)`
+    `,` qualified(type($result)) attr-dict
+  }];
+
+  let extraClassDeclaration = [{
+    // Get type pointed by the base pointer.
+    mlir::Type getElementTy() {
+      return mlir::cast<mlir::cir::PointerType>(getBase().getType()).getPointee();
+    }
+  }];
+
+  // SameFirstOperandAndResultType already checks all we need.
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantOp
+//===----------------------------------------------------------------------===//
+
+def ConstantOp : CIR_Op<"const",
+    [ConstantLike, Pure, AllTypesMatch<["value", "res"]>]> {
+  // FIXME: Use SameOperandsAndResultType or similar and prevent eye bleeding
+  // type repetition in the assembly form.
+
+  let summary = "Defines a CIR constant";
+  let description = [{
+    The `cir.const` operation turns a literal into an SSA value. The data is
+    attached to the operation as an attribute.
+
+    ```mlir
+      %0 = cir.const 42 : i32
+      %1 = cir.const 4.2 : f32
+      %2 = cir.const nullptr : !cir.ptr<i32>
+    ```
+  }];
+
+  // The constant operation takes an attribute as the only input.
+  let arguments = (ins TypedAttrInterface:$value);
+
+  // The constant operation returns a single value of CIR_AnyType.
+  let results = (outs CIR_AnyType:$res);
+
+  let assemblyFormat = "attr-dict $value";
+
+  let hasVerifier = 1;
+
+  let extraClassDeclaration = [{
+     bool isNullPtr() {
+      if (const auto ptrAttr = mlir::dyn_cast<mlir::cir::ConstPtrAttr>(getValue()))
+       return ptrAttr.isNullValue();
+      return false;
+     }
+  }];
+
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// C/C++ memory order definitions
+//===----------------------------------------------------------------------===//
+
+def MemOrderRelaxed : I32EnumAttrCase<"Relaxed", 0, "relaxed">;
+def MemOrderConsume : I32EnumAttrCase<"Consume", 1, "consume">;
+def MemOrderAcquire : I32EnumAttrCase<"Acquire", 2, "acquire">;
+def MemOrderRelease : I32EnumAttrCase<"Release", 3, "release">;
+def MemOrderAcqRel : I32EnumAttrCase<"AcquireRelease", 4, "acq_rel">;
+def MemOrderSeqCst : I32EnumAttrCase<"SequentiallyConsistent", 5, "seq_cst">;
+
+def MemOrder : I32EnumAttr<
+    "MemOrder",
+    "Memory order according to C++11 memory model",
+    [MemOrderRelaxed, MemOrderConsume, MemOrderAcquire,
+     MemOrderRelease, MemOrderAcqRel, MemOrderSeqCst]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+//===----------------------------------------------------------------------===//
+// AllocaOp
+//===----------------------------------------------------------------------===//
+
+class AllocaTypesMatchWith<string summary, string lhsArg, string rhsArg,
+                     string transform, string comparator = "std::equal_to<>()">
+  : PredOpTrait<summary, CPred<
+      comparator # "(" #
+      !subst("$_self", "$" # lhsArg # ".getType()", transform) #
+             ", $" # rhsArg # ")">> {
+  string lhs = lhsArg;
+  string rhs = rhsArg;
+  string transformer = transform;
+}
+
+def AllocaOp : CIR_Op<"alloca", [
+  AllocaTypesMatchWith<"'allocaType' matches pointee type of 'addr'",
+                 "addr", "allocaType",
+                 "cast<PointerType>($_self).getPointee()">,
+                 DeclareOpInterfaceMethods<PromotableAllocationOpInterface>]> {
+  let summary = "Defines a scope-local variable";
+  let description = [{
+    The `cir.alloca` operation defines a scope-local variable.
+
+    The presence `init` attribute indicates that the local variable represented
+    by this alloca was originally initialized in C/C++ source code. In such
+    cases, the first use contains the initialization (a cir.store, a cir.call
+    to a ctor, etc).
+
+    The `dynAllocSize` specifies the size to dynamically allocate on the stack
+    and ignores the allocation size based on the original type. This is useful
+    when handling VLAs and is omitted when declaring regular local variables.
+
+    The result type is a pointer to the input's type.
+
+    Example:
+
+    ```mlir
+    // int count = 3;
+    %0 = cir.alloca i32, !cir.ptr<i32>, ["count", init] {alignment = 4 : i64}
+
+    // int *ptr;
+    %1 = cir.alloca !cir.ptr<i32>, !cir.ptr<!cir.ptr<i32>>, ["ptr"] {alignment = 8 : i64}
+    ...
+    ```
+  }];
+
+  let arguments = (ins
+    Optional<PrimitiveInt>:$dynAllocSize,
+    TypeAttr:$allocaType,
+    StrAttr:$name,
+    UnitAttr:$init,
+    ConfinedAttr<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$alignment,
+    OptionalAttr<ASTVarDeclInterface>:$ast
+  );
+
+  let results = (outs Res<CIR_PointerType, "",
+                      [MemAlloc<AutomaticAllocationScopeResource>]>:$addr);
+
+  let skipDefaultBuilders = 1;
+  let builders = [
+    OpBuilder<(ins "Type":$addr, "Type":$allocaType,
+                   "StringRef":$name,
+                   "IntegerAttr":$alignment)>,
+
+    OpBuilder<(ins "Type":$addr,
+                   "Type":$allocaType,
+                   "StringRef":$name,
+                   "IntegerAttr":$alignment,
+                   "Value":$dynAllocSize),
+    [{
+      if (dynAllocSize)
+        $_state.addOperands(dynAllocSize);
+      build($_builder, $_state, addr, allocaType, name, alignment);
+    }]>
+  ];
+
+  let extraClassDeclaration = [{
+    // Whether the alloca input type is a pointer.
+    bool isPointerType() { return ::mlir::isa<::mlir::cir::PointerType>(getAllocaType()); }
+
+    bool isDynamic() { return (bool)getDynAllocSize(); }
+  }];
+
+  let assemblyFormat = [{
+    $allocaType `,` qualified(type($addr)) `,`
+    ($dynAllocSize^ `:` type($dynAllocSize) `,`)?
+    `[` $name
+       (`,` `init` $init^)?
+    `]`
+    (`ast` $ast^)? attr-dict
+  }];
+
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// LoadOp
+//===----------------------------------------------------------------------===//
+
+def LoadOp : CIR_Op<"load", [
+  TypesMatchWith<"type of 'result' matches pointee type of 'addr'",
+                 "addr", "result",
+                 "cast<PointerType>($_self).getPointee()">,
+                 DeclareOpInterfaceMethods<PromotableMemOpInterface>]> {
+
+  let summary = "Load value from memory adddress";
+  let description = [{
+    `cir.load` reads a value (lvalue to rvalue conversion) given an address
+    backed up by a `cir.ptr` type. A unit attribute `deref` can be used to
+    mark the resulting value as used by another operation to dereference
+    a pointer. A unit attribute `volatile` can be used to indicate a volatile
+    loading. Load can be marked atomic by using `atomic(<mem_order>)`.
+
+    `align` can be used to specify an alignment that's different from the
+    default, which is computed from `result`'s type ABI data layout.
+
+    Example:
+
+    ```mlir
+
+    // Read from local variable, address in %0.
+    %1 = cir.load %0 : !cir.ptr<i32>, i32
+
+    // Load address from memory at address %0. %3 is used by at least one
+    // operation that dereferences a pointer.
+    %3 = cir.load deref %0 : !cir.ptr<!cir.ptr<i32>>
+
+    // Perform a volatile load from address in %0.
+    %4 = cir.load volatile %0 : !cir.ptr<i32>, i32
+
+    // Others
+    %x = cir.load align(16) atomic(seq_cst) %0 : !cir.ptr<i32>, i32
+    ```
+  }];
+
+  let arguments = (ins Arg<CIR_PointerType, "the address to load from",
+                           [MemRead]>:$addr, UnitAttr:$isDeref,
+                       UnitAttr:$is_volatile,
+                       OptionalAttr<I64Attr>:$alignment,
+                       OptionalAttr<MemOrder>:$mem_order
+                       );
+  let results = (outs CIR_AnyType:$result);
+
+  let assemblyFormat = [{
+    (`deref` $isDeref^)?
+    (`volatile` $is_volatile^)?
+    (`align` `(` $alignment^ `)`)?
+    (`atomic` `(` $mem_order^ `)`)?
+    $addr `:` qualified(type($addr)) `,` type($result) attr-dict
+  }];
+
+  // FIXME: add verifier.
+}
+
+//===----------------------------------------------------------------------===//
+// StoreOp
+//===----------------------------------------------------------------------===//
+
+def StoreOp : CIR_Op<"store", [
+  TypesMatchWith<"type of 'value' matches pointee type of 'addr'",
+                 "addr", "value",
+                 "cast<PointerType>($_self).getPointee()">,
+                 DeclareOpInterfaceMethods<PromotableMemOpInterface>]> {
+
+  let summary = "Store value to memory address";
+  let description = [{
+    `cir.store` stores a value (first operand) to the memory address specified
+    in the second operand. A unit attribute `volatile` can be used to indicate
+    a volatile store. Store's can be marked atomic by using
+    `atomic(<mem_order>)`.
+
+    `align` can be used to specify an alignment that's different from the
+    default, which is computed from `result`'s type ABI data layout.
+
+    Example:
+
+    ```mlir
+    // Store a function argument to local storage, address in %0.
+    cir.store %arg0, %0 : i32, !cir.ptr<i32>
+
+    // Perform a volatile store into memory location at the address in %0.
+    cir.store volatile %arg0, %0 : i32, !cir.ptr<i32>
+
+    // Others
+    cir.store align(16) atomic(seq_cst) %x, %addr : i32, !cir.ptr<i32>
+    ```
+  }];
+
+  let builders = [
+    OpBuilder<(ins "Value":$value, "Value":$addr), [{
+      $_state.addOperands({value, addr});
+    }]>
+  ];
+
+  let arguments = (ins CIR_AnyType:$value,
+                       Arg<CIR_PointerType, "the address to store the value",
+                           [MemWrite]>:$addr,
+                       UnitAttr:$is_volatile,
+                       OptionalAttr<I64Attr>:$alignment,
+                       OptionalAttr<MemOrder>:$mem_order);
+
+  let assemblyFormat = [{
+    (`volatile` $is_volatile^)?
+    (`align` `(` $alignment^ `)`)?
+    (`atomic` `(` $mem_order^ `)`)?
+    $value `,` $addr attr-dict `:` type($value) `,` qualified(type($addr))
+  }];
+
+  // FIXME: add verifier.
+}
+
+//===----------------------------------------------------------------------===//
+// ReturnOp
+//===----------------------------------------------------------------------===//
+
+def ReturnOp : CIR_Op<"return", [ParentOneOf<["FuncOp", "ScopeOp", "IfOp",
+                                              "SwitchOp", "DoWhileOp",
+                                              "WhileOp", "ForOp"]>,
+                                 Terminator]> {
+  let summary = "Return from function";
+  let description = [{
+    The "return" operation represents a return operation within a function.
+    The operation takes an optional operand and produces no results.
+    The operand type must match the signature of the function that contains
+    the operation.
+
+    ```mlir
+      func @foo() -> i32 {
+        ...
+        cir.return %0 : i32
+      }
+    ```
+  }];
+
+  // The return operation takes an optional input operand to return. This
+  // value must match the return type of the enclosing function.
+  let arguments = (ins Variadic<CIR_AnyType>:$input);
+
+  // The return operation only emits the input in the format if it is present.
+  let assemblyFormat = "($input^ `:` type($input))? attr-dict ";
+
+  // Allow building a ReturnOp with no return operand.
+  let builders = [
+    OpBuilder<(ins), [{ build($_builder, $_state, std::nullopt); }]>
+  ];
+
+  // Provide extra utility definitions on the c++ operation class definition.
+  let extraClassDeclaration = [{
+    bool hasOperand() { return getNumOperands() != 0; }
+  }];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// IfOp
+//===----------------------------------------------------------------------===//
+
+def IfOp : CIR_Op<"if",
+      [DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+       RecursivelySpeculatable, AutomaticAllocationScope, NoRegionArguments]> {
+  let summary = "The if-then-else operation";
+  let description = [{
+    The `cir.if` operation represents an if-then-else construct for
+    conditionally executing two regions of code. The operand is a `cir.bool`
+    type.
+
+    Examples:
+
+    ```mlir
+    cir.if %b  {
+      ...
+    } else {
+      ...
+    }
+
+    cir.if %c  {
+      ...
+    }
+
+    cir.if %c  {
+      ...
+      cir.br ^a
+    ^a:
+      cir.yield
+    }
+    ```
+
+    `cir.if` defines no values and the 'else' can be omitted. `cir.yield` must
+    explicitly terminate the region if it has more than one block.
+  }];
+  let arguments = (ins CIR_BoolType:$condition);
+  let regions = (region AnyRegion:$thenRegion, AnyRegion:$elseRegion);
+
+  let hasCustomAssemblyFormat = 1;
+  let hasVerifier = 1;
+
+  let skipDefaultBuilders = 1;
+  let builders = [
+    OpBuilder<(ins "Value":$cond, "bool":$withElseRegion,
+      CArg<"function_ref<void(OpBuilder &, Location)>",
+           "buildTerminatedBody">:$thenBuilder,
+      CArg<"function_ref<void(OpBuilder &, Location)>",
+           "nullptr">:$elseBuilder)>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// TernaryOp
+//===----------------------------------------------------------------------===//
+
+def TernaryOp : CIR_Op<"ternary",
+      [DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+       RecursivelySpeculatable, AutomaticAllocationScope, NoRegionArguments]> {
+  let summary = "The `cond ? a : b` C/C++ ternary operation";
+  let description = [{
+    The `cir.ternary` operation represents C/C++ ternary, much like a `select`
+    operation. First argument is a `cir.bool` condition to evaluate, followed
+    by two regions to execute (true or false). This is different from `cir.if`
+    since each region is one block sized and the `cir.yield` closing the block
+    scope should have one argument.
+
+    Example:
+
+    ```mlir
+    // x = cond ? a : b;
+
+    %x = cir.ternary (%cond, true_region {
+      ...
+      cir.yield %a : i32
+    }, false_region {
+      ...
+      cir.yield %b : i32
+    }) -> i32
+    ```
+  }];
+  let arguments = (ins CIR_BoolType:$cond);
+  let regions = (region SizedRegion<1>:$trueRegion,
+                        SizedRegion<1>:$falseRegion);
+  let results = (outs Optional<CIR_AnyType>:$result);
+
+  let skipDefaultBuilders = 1;
+  let builders = [
+    OpBuilder<(ins "Value":$cond,
+      "function_ref<void(OpBuilder &, Location)>":$trueBuilder,
+      "function_ref<void(OpBuilder &, Location)>":$falseBuilder)
+      >
+  ];
+
+  // All constraints already verified elsewhere.
+  let hasVerifier = 0;
+
+  let assemblyFormat = [{
+    `(` $cond `,`
+      `true` $trueRegion `,`
+      `false` $falseRegion
+    `)` `:` functional-type(operands, results) attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// SelectOp
+//===----------------------------------------------------------------------===//
+
+def SelectOp : CIR_Op<"select", [Pure,
+    AllTypesMatch<["true_value", "false_value", "result"]>]> {
+  let summary = "Yield one of two values based on a boolean value";
+  let description = [{
+    The `cir.select` operation takes three operands. The first operand
+    `condition` is a boolean value of type `!cir.bool`. The second and the third
+    operand can be of any CIR types, but their types must be the same. If the
+    first operand is `true`, the operation yields its second operand. Otherwise,
+    the operation yields its third operand.
+
+    Example:
+
+    ```mlir
+    %0 = cir.const #cir.bool<true> : !cir.bool
+    %1 = cir.const #cir.int<42> : !s32i
+    %2 = cir.const #cir.int<72> : !s32i
+    %3 = cir.select if %0 then %1 else %2 : (!cir.bool, !s32i, !s32i) -> !s32i
+    ```
+  }];
+
+  let arguments = (ins CIR_BoolType:$condition, CIR_AnyType:$true_value,
+                       CIR_AnyType:$false_value);
+  let results = (outs CIR_AnyType:$result);
+
+  let assemblyFormat = [{
+    `if` $condition `then` $true_value `else` $false_value
+    `:` `(`
+      qualified(type($condition)) `,`
+      qualified(type($true_value)) `,`
+      qualified(type($false_value))
+    `)` `->` qualified(type($result)) attr-dict
+  }];
+
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ConditionOp
+//===----------------------------------------------------------------------===//
+
+def ConditionOp : CIR_Op<"condition", [
+  Terminator,
+  DeclareOpInterfaceMethods<RegionBranchTerminatorOpInterface,
+                            ["getSuccessorRegions"]>
+]> {
+  let summary = "Loop continuation condition.";
+  let description = [{
+    The `cir.condition` terminates conditional regions. It takes a single
+    `cir.bool` operand and, depending on its value, may branch to different
+    regions:
+
+     - When in the `cond` region of a `cir.loop`, it continues the loop
+       if true, or exits it if false.
+     - When in the `ready` region of a `cir.await`, it branches to the `resume`
+       region when true, and to the `suspend` region when false.
+
+    Example:
+
+    ```mlir
+    cir.loop for(cond : {
+      cir.condition(%arg0) // Branches to `step` region or exits.
+    }, step : {
+      [...]
+    }) {
+      [...]
+    }
+
+    cir.await(user, ready : {
+      cir.condition(%arg0) // Branches to `resume` or `suspend` region.
+    }, suspend : {
+      [...]
+    }, resume : {
+      [...]
+    },)
+    ```
+  }];
+  let arguments = (ins CIR_BoolType:$condition);
+  let assemblyFormat = " `(` $condition `)` attr-dict ";
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// YieldOp
+//===----------------------------------------------------------------------===//
+
+def YieldOp : CIR_Op<"yield", [ReturnLike, Terminator,
+    ParentOneOf<["IfOp", "ScopeOp", "SwitchOp", "WhileOp", "ForOp", "AwaitOp",
+                 "TernaryOp", "GlobalOp", "DoWhileOp", "TryOp", "ArrayCtor",
+                 "ArrayDtor", "CallOp"]>]> {
+  let summary = "Represents the default branching behaviour of a region";
+  let description = [{
+    The `cir.yield` operation terminates regions on different CIR operations,
+    and it is used to represent the default branching behaviour of a region.
+    Said branching behaviour is determinted by the parent operation. For
+    example, a yield in a `switch-case` region implies a fallthrough, while
+    a yield in a `cir.if` region implies a branch to the exit block, and so
+    on.
+
+    In some cases, it might yield an SSA value and the semantics of how the
+    values are yielded is defined by the parent operation. For example, a
+    `cir.ternary` operation yields a value from one of its regions.
+
+    As a general rule, `cir.yield` must be explicitly used whenever a region has
+    more than one block and no terminator, or within `cir.switch` regions not
+    `cir.return` terminated.
+
+    Examples:
+    ```mlir
+    cir.if %4 {
+      ...
+      cir.yield
+    }
+
+    cir.switch (%5) [
+      case (equal, 3) {
+        ...
+        cir.yield
+      }, ...
+    ]
+
+    cir.scope {
+      ...
+      cir.yield
+    }
+
+    %x = cir.scope {
+      ...
+      cir.yield %val
+    }
+
+    %y = cir.ternary {
+      ...
+      cir.yield %val : i32
+    } : i32
+    ```
+  }];
+
+  let arguments = (ins Variadic<CIR_AnyType>:$args);
+  let assemblyFormat = "($args^ `:` type($args))? attr-dict";
+  let builders = [
+    OpBuilder<(ins), [{ /* nothing to do */ }]>,
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// BreakOp
+//===----------------------------------------------------------------------===//
+
+def BreakOp : CIR_Op<"break", [Terminator]> {
+  let summary = "C/C++ `break` statement equivalent";
+  let description = [{
+    The `cir.break` operation is used to cease the control flow to the parent
+    operation, exiting its region's control flow. It is only allowed if it is
+    within a breakable operation (loops and `switch`).
+  }];
+  let assemblyFormat = "attr-dict";
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ContinueOp
+//===----------------------------------------------------------------------===//
+
+def ContinueOp : CIR_Op<"continue", [Terminator]> {
+  let summary = "C/C++ `continue` statement equivalent";
+  let description = [{
+    The `cir.continue` operation is used to continue execution to the next
+    iteration of a loop. It is only allowed within `cir.loop` regions.
+  }];
+  let assemblyFormat = "attr-dict";
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Resume
+//===----------------------------------------------------------------------===//
+
+def ResumeOp : CIR_Op<"resume", [ReturnLike, Terminator,
+                                 AttrSizedOperandSegments]> {
+  let summary = "Resumes execution after not catching exceptions";
+  let description = [{
+    The `cir.resume` operation handles an uncaught exception scenario and
+    behaves in two different modes:
+
+    - As the terminator of a `CatchUnwind` region of `cir.try`, where it
+    does not receive any arguments (implied from the `cir.try` scope), or
+    - The terminator of a regular basic block without an enclosing `cir.try`
+    operation, where it requires an `exception_ptr` and a `type_id`.
+
+    The `rethrow` attribute is used to denote rethrowing behavior for the
+    resume operation (versus default terminaton).
+    ```
+  }];
+
+  let arguments = (ins Optional<VoidPtr>:$exception_ptr,
+                       Optional<UInt32>:$type_id,
+                       UnitAttr:$rethrow);
+  let assemblyFormat = [{
+    ($rethrow^)?
+    ($exception_ptr^)?
+    (`,` $type_id^)?
+    attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// ScopeOp
+//===----------------------------------------------------------------------===//
+
+def ScopeOp : CIR_Op<"scope", [
+       DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+       RecursivelySpeculatable, AutomaticAllocationScope,
+       NoRegionArguments]> {
+  let summary = "Represents a C/C++ scope";
+  let description = [{
+    `cir.scope` contains one region and defines a strict "scope" for all new
+    values produced within its blocks.
+
+    The region can contain an arbitrary number of blocks but usually defaults
+    to one and can optionally return a value (useful for representing values
+    coming out of C++ full-expressions) via `cir.yield`:
+
+
+    ```mlir
+    %rvalue = cir.scope {
+      ...
+      cir.yield %value
+    }
+    ```
+
+    If `cir.scope` yields no value, the `cir.yield` can be left out, and
+    will be inserted implicitly.
+  }];
+
+  let results = (outs Optional<CIR_AnyType>:$results);
+  let regions = (region AnyRegion:$scopeRegion);
+
+  let hasVerifier = 1;
+  let skipDefaultBuilders = 1;
+  let assemblyFormat = [{
+    custom<OmittedTerminatorRegion>($scopeRegion) (`:` type($results)^)? attr-dict
+  }];
+
+  let builders = [
+    // Scopes for yielding values.
+    OpBuilder<(ins
+              "function_ref<void(OpBuilder &, Type &, Location)>":$scopeBuilder)>,
+    // Scopes without yielding values.
+    OpBuilder<(ins "function_ref<void(OpBuilder &, Location)>":$scopeBuilder)>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// UnaryOp
+//===----------------------------------------------------------------------===//
+
+def UnaryOpKind_Inc   : I32EnumAttrCase<"Inc",   1, "inc">;
+def UnaryOpKind_Dec   : I32EnumAttrCase<"Dec",   2, "dec">;
+def UnaryOpKind_Plus  : I32EnumAttrCase<"Plus",  3, "plus">;
+def UnaryOpKind_Minus : I32EnumAttrCase<"Minus", 4, "minus">;
+def UnaryOpKind_Not   : I32EnumAttrCase<"Not",   5, "not">;
+
+def UnaryOpKind : I32EnumAttr<
+    "UnaryOpKind",
+    "unary operation kind",
+    [UnaryOpKind_Inc,
+     UnaryOpKind_Dec,
+     UnaryOpKind_Plus,
+     UnaryOpKind_Minus,
+     UnaryOpKind_Not,
+     ]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+// FIXME: Pure won't work when we add overloading.
+def UnaryOp : CIR_Op<"unary", [Pure, SameOperandsAndResultType]> {
+  let summary = "Unary operations";
+  let description = [{
+    `cir.unary` performs the unary operation according to
+    the specified opcode kind: [inc, dec, plus, minus, not].
+
+    It requires one input operand and has one result, both types
+    should be the same.
+
+    ```mlir
+    %7 = cir.unary(inc, %1) : i32 -> i32
+    %8 = cir.unary(dec, %2) : i32 -> i32
+    ```
+  }];
+
+  let results = (outs CIR_AnyType:$result);
+  let arguments = (ins Arg<UnaryOpKind, "unary op kind">:$kind, Arg<CIR_AnyType>:$input);
+
+  let assemblyFormat = [{
+      `(` $kind `,` $input `)` `:` type($input) `,` type($result) attr-dict
+  }];
+
+  let hasVerifier = 1;
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// BinOp
+//===----------------------------------------------------------------------===//
+
+// FIXME: represent Commutative, Idempotent traits for appropriate binops
+def BinOpKind_Mul : I32EnumAttrCase<"Mul", 1, "mul">;
+def BinOpKind_Div : I32EnumAttrCase<"Div", 2, "div">;
+def BinOpKind_Rem : I32EnumAttrCase<"Rem", 3, "rem">;
+def BinOpKind_Add : I32EnumAttrCase<"Add", 4, "add">;
+def BinOpKind_Sub : I32EnumAttrCase<"Sub", 5, "sub">;
+def BinOpKind_And : I32EnumAttrCase<"And", 8, "and">;
+def BinOpKind_Xor : I32EnumAttrCase<"Xor", 9, "xor">;
+def BinOpKind_Or  : I32EnumAttrCase<"Or", 10, "or">;
+
+def BinOpKind : I32EnumAttr<
+    "BinOpKind",
+    "binary operation (arith and logic) kind",
+    [BinOpKind_Mul, BinOpKind_Div, BinOpKind_Rem,
+     BinOpKind_Add, BinOpKind_Sub,
+     BinOpKind_And, BinOpKind_Xor,
+     BinOpKind_Or]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+// FIXME: Pure won't work when we add overloading.
+def BinOp : CIR_Op<"binop", [Pure,
+  SameTypeOperands, SameOperandsAndResultType]> {
+
+  let summary = "Binary operations (arith and logic)";
+  let description = [{
+    cir.binop performs the binary operation according to
+    the specified opcode kind: [mul, div, rem, add, sub,
+    and, xor, or].
+
+    It requires two input operands and has one result, all types
+    should be the same.
+
+    ```mlir
+    %7 = cir.binop(add, %1, %2) : !s32i
+    %7 = cir.binop(mul, %1, %2) : !u8i
+    ```
+  }];
+
+  // TODO: get more accurate than CIR_AnyType
+  let results = (outs CIR_AnyType:$result);
+  let arguments = (ins Arg<BinOpKind, "binop kind">:$kind,
+                       CIR_AnyType:$lhs, CIR_AnyType:$rhs,
+                       UnitAttr:$no_unsigned_wrap,
+                       UnitAttr:$no_signed_wrap);
+
+  let assemblyFormat = [{
+    `(` $kind `,` $lhs `,` $rhs  `)`
+    (`nsw` $no_signed_wrap^)?
+    (`nuw` $no_unsigned_wrap^)?
+    `:` type($lhs) attr-dict
+  }];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ShiftOp
+//===----------------------------------------------------------------------===//
+
+def ShiftOp : CIR_Op<"shift", [Pure]> {
+  let summary = "Shift";
+  let description = [{
+    Shift `left` or `right`, according to the first operand. Second operand is
+    the shift target and the third the amount.
+
+    ```mlir
+    %7 = cir.shift(left, %1 : !u64i, %4 : !s32i) -> !u64i
+    ```
+  }];
+
+  let results = (outs CIR_IntType:$result);
+  let arguments = (ins CIR_IntType:$value, CIR_IntType:$amount,
+                       UnitAttr:$isShiftleft);
+
+  let assemblyFormat = [{
+    `(`
+      (`left` $isShiftleft^) : (`right`)?
+      `,` $value `:` type($value)
+      `,` $amount `:` type($amount)
+    `)` `->` type($result) attr-dict
+  }];
+
+  // Already covered by the traits
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// CmpOp
+//===----------------------------------------------------------------------===//
+
+def CmpOpKind_LT : I32EnumAttrCase<"lt", 1>;
+def CmpOpKind_LE : I32EnumAttrCase<"le", 2>;
+def CmpOpKind_GT : I32EnumAttrCase<"gt", 3>;
+def CmpOpKind_GE : I32EnumAttrCase<"ge", 4>;
+def CmpOpKind_EQ : I32EnumAttrCase<"eq", 5>;
+def CmpOpKind_NE : I32EnumAttrCase<"ne", 6>;
+
+def CmpOpKind : I32EnumAttr<
+    "CmpOpKind",
+    "compare operation kind",
+    [CmpOpKind_LT, CmpOpKind_LE, CmpOpKind_GT,
+     CmpOpKind_GE, CmpOpKind_EQ, CmpOpKind_NE]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+// FIXME: Pure might not work when we add overloading.
+def CmpOp : CIR_Op<"cmp", [Pure, SameTypeOperands]> {
+
+  let summary = "Compare values two values and produce a boolean result";
+  let description = [{
+    `cir.cmp` compares two input operands of the same type and produces a
+    `cir.bool` result. The kinds of comparison available are:
+    [lt,gt,ge,eq,ne]
+
+    ```mlir
+    %7 = cir.cmp(gt, %1, %2) : i32, !cir.bool
+    ```
+  }];
+
+  // TODO: get more accurate than CIR_AnyType
+  let results = (outs CIR_AnyType:$result);
+  let arguments = (ins Arg<CmpOpKind, "cmp kind">:$kind,
+                       CIR_AnyType:$lhs, CIR_AnyType:$rhs);
+
+  let assemblyFormat = [{
+    `(` $kind `,` $lhs `,` $rhs  `)` `:` type($lhs) `,` type($result) attr-dict
+  }];
+
+  // Already covered by the traits
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// BinOpOverflowOp
+//===----------------------------------------------------------------------===//
+
+def BinOpOverflowKind : I32EnumAttr<
+    "BinOpOverflowKind",
+    "checked binary arithmetic operation kind",
+    [BinOpKind_Add, BinOpKind_Sub, BinOpKind_Mul]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def BinOpOverflowOp : CIR_Op<"binop.overflow", [Pure, SameTypeOperands]> {
+  let summary = "Perform binary integral arithmetic with overflow checking";
+  let description = [{
+    `cir.binop.overflow` performs binary arithmetic operations with overflow
+    checking on integral operands.
+
+    The `kind` argument specifies the kind of arithmetic operation to perform.
+    It can be either `add`, `sub`, or `mul`. The `lhs` and `rhs` arguments
+    specify the input operands of the arithmetic operation. The types of `lhs`
+    and `rhs` must be the same.
+
+    `cir.binop.overflow` produces two SSA values. `result` is the result of the
+    arithmetic operation truncated to its specified type. `overflow` is a
+    boolean value indicating whether overflow happens during the operation.
+
+    The exact semantic of this operation is as follows:
+
+      - `lhs` and `rhs` are promoted to an imaginary integral type that has
+        infinite precision.
+      - The arithmetic operation is performed on the promoted operands.
+      - The infinite-precision result is truncated to the type of `result`. The
+        truncated result is assigned to `result`.
+      - If the truncated result is equal to the un-truncated result, `overflow`
+        is assigned to false. Otherwise, `overflow` is assigned to true.
+  }];
+
+  let arguments = (ins Arg<BinOpOverflowKind, "arithmetic kind">:$kind,
+                       CIR_IntType:$lhs, CIR_IntType:$rhs);
+  let results = (outs CIR_IntType:$result, CIR_BoolType:$overflow);
+
+  let assemblyFormat = [{
+    `(` $kind `,` $lhs `,` $rhs `)` `:` type($lhs) `,`
+    `(` type($result) `,` type($overflow) `)`
+    attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins "mlir::cir::IntType":$resultTy,
+                   "mlir::cir::BinOpOverflowKind":$kind,
+                   "mlir::Value":$lhs,
+                   "mlir::Value":$rhs), [{
+      auto overflowTy = mlir::cir::BoolType::get($_builder.getContext());
+      build($_builder, $_state, resultTy, overflowTy, kind, lhs, rhs);
+    }]>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// ComplexCreateOp
+//===----------------------------------------------------------------------===//
+
+def ComplexCreateOp : CIR_Op<"complex.create", [Pure, SameTypeOperands]> {
+  let summary = "Create a complex value from its real and imaginary parts";
+  let description = [{
+    `cir.complex.create` operation takes two operands that represent the real
+    and imaginary part of a complex number, and yields the complex number.
+
+    Example:
+
+    ```mlir
+    %0 = cir.const #cir.fp<1.000000e+00> : !cir.double
+    %1 = cir.const #cir.fp<2.000000e+00> : !cir.double
+    %2 = cir.complex.create %0, %1 : !cir.complex<!cir.double>
+    ```
+  }];
+
+  let results = (outs CIR_ComplexType:$result);
+  let arguments = (ins CIR_AnyIntOrFloat:$real, CIR_AnyIntOrFloat:$imag);
+
+  let assemblyFormat = [{
+    $real `,` $imag
+    `:` qualified(type($real)) `->` qualified(type($result)) attr-dict
+  }];
+
+  let hasVerifier = 1;
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ComplexRealOp and ComplexImagOp
+//===----------------------------------------------------------------------===//
+
+def ComplexRealOp : CIR_Op<"complex.real", [Pure]> {
+  let summary = "Extract the real part of a complex value";
+  let description = [{
+    `cir.complex.real` operation takes an operand of `!cir.complex` type and
+    yields the real part of it.
+
+    Example:
+
+    ```mlir
+    %1 = cir.complex.real %0 : !cir.complex<!cir.float> -> !cir.float
+    ```
+  }];
+
+  let results = (outs CIR_AnyIntOrFloat:$result);
+  let arguments = (ins CIR_ComplexType:$operand);
+
+  let assemblyFormat = [{
+    $operand `:` qualified(type($operand)) `->` qualified(type($result))
+    attr-dict
+  }];
+
+  let hasVerifier = 1;
+  let hasFolder = 1;
+}
+
+def ComplexImagOp : CIR_Op<"complex.imag", [Pure]> {
+  let summary = "Extract the imaginary part of a complex value";
+  let description = [{
+    `cir.complex.imag` operation takes an operand of `!cir.complex` type and
+    yields the imaginary part of it.
+
+    Example:
+
+    ```mlir
+    %1 = cir.complex.imag %0 : !cir.complex<!cir.float> -> !cir.float
+    ```
+  }];
+
+  let results = (outs CIR_AnyIntOrFloat:$result);
+  let arguments = (ins CIR_ComplexType:$operand);
+
+  let assemblyFormat = [{
+    $operand `:` qualified(type($operand)) `->` qualified(type($result))
+    attr-dict
+  }];
+
+  let hasVerifier = 1;
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ComplexRealPtrOp and ComplexImagPtrOp
+//===----------------------------------------------------------------------===//
+
+def ComplexRealPtrOp : CIR_Op<"complex.real_ptr", [Pure]> {
+  let summary = "Derive a pointer to the real part of a complex value";
+  let description = [{
+    `cir.complex.real_ptr` operation takes a pointer operand that points to a
+    complex value of type `!cir.complex` and yields a pointer to the real part
+    of the operand.
+
+    Example:
+
+    ```mlir
+    %1 = cir.complex.real_ptr %0 : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+    ```
+  }];
+
+  let results = (outs PrimitiveIntOrFPPtr:$result);
+  let arguments = (ins ComplexPtr:$operand);
+
+  let assemblyFormat = [{
+    $operand `:`
+    qualified(type($operand)) `->` qualified(type($result)) attr-dict
+  }];
+
+  let hasVerifier = 1;
+}
+
+def ComplexImagPtrOp : CIR_Op<"complex.imag_ptr", [Pure]> {
+  let summary = "Derive a pointer to the imaginary part of a complex value";
+  let description = [{
+    `cir.complex.imag_ptr` operation takes a pointer operand that points to a
+    complex value of type `!cir.complex` and yields a pointer to the imaginary
+    part of the operand.
+
+    Example:
+
+    ```mlir
+    %1 = cir.complex.imag_ptr %0 : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+    ```
+  }];
+
+  let results = (outs PrimitiveIntOrFPPtr:$result);
+  let arguments = (ins ComplexPtr:$operand);
+
+  let assemblyFormat = [{
+    $operand `:`
+    qualified(type($operand)) `->` qualified(type($result)) attr-dict
+  }];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ComplexBinOp
+//===----------------------------------------------------------------------===//
+
+def ComplexBinOpKind : I32EnumAttr<
+    "ComplexBinOpKind",
+    "complex number binary operation kind",
+    [BinOpKind_Mul, BinOpKind_Div]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def ComplexRangeKind_Full : I32EnumAttrCase<"Full", 1, "full">;
+def ComplexRangeKind_Improved : I32EnumAttrCase<"Improved", 2, "improved">;
+def ComplexRangeKind_Promoted : I32EnumAttrCase<"Promoted", 3, "promoted">;
+def ComplexRangeKind_Basic : I32EnumAttrCase<"Basic", 4, "basic">;
+def ComplexRangeKind_None : I32EnumAttrCase<"None", 5, "none">;
+
+def ComplexRangeKind : I32EnumAttr<
+    "ComplexRangeKind",
+    "complex multiplication and division implementation",
+    [ComplexRangeKind_Full, ComplexRangeKind_Improved,
+     ComplexRangeKind_Promoted, ComplexRangeKind_Basic,
+     ComplexRangeKind_None]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def ComplexBinOp : CIR_Op<"complex.binop",
+    [Pure, SameTypeOperands, SameOperandsAndResultType]> {
+  let summary = "Binary operations on operands of complex type";
+  let description = [{
+    The `cir.complex.binop` operation represents a binary operation on operands
+    of C complex type (e.g. `float _Complex`). The operation can only represent
+    binary multiplication or division on complex numbers; other binary
+    operations, such as addition and subtraction, are represented by the
+    `cir.binop` operation.
+
+    The operation requires two input operands and has one result. The types of
+    all the operands and the result should be of the same `!cir.complex` type.
+
+    The operation also takes a `range` attribute that specifies the complex
+    range of the binary operation.
+
+    Examples:
+
+    ```mlir
+    %2 = cir.complex.binop add %0, %1 : !cir.complex<!cir.float>
+    %2 = cir.complex.binop mul %0, %1 : !cir.complex<!cir.float>
+    ```
+  }];
+
+  let results = (outs CIR_ComplexType:$result);
+  let arguments = (ins Arg<ComplexBinOpKind, "operation kind">:$kind,
+                       CIR_ComplexType:$lhs, CIR_ComplexType:$rhs,
+                       Arg<ComplexRangeKind, "complex range kind">:$range,
+                       UnitAttr:$promoted);
+
+  let assemblyFormat = [{
+    $kind $lhs `,` $rhs `range` `(` $range `)` (`promoted` $promoted^)?
+    `:` qualified(type($lhs)) attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// BitsOp
+//===----------------------------------------------------------------------===//
+
+class CIR_BitOp<string mnemonic, TypeConstraint inputTy>
+    : CIR_Op<mnemonic, [Pure]> {
+  let arguments = (ins inputTy:$input);
+  let results = (outs SInt32:$result);
+
+  let assemblyFormat = [{
+    `(` $input `:` type($input) `)` `:` type($result) attr-dict
+  }];
+}
+
+def BitClrsbOp : CIR_BitOp<"bit.clrsb", AnyTypeOf<[SInt32, SInt64]>> {
+  let summary = "Get the number of leading redundant sign bits in the input";
+  let description = [{
+    Compute the number of leading redundant sign bits in the input integer.
+
+    The input integer must be a signed integer. The most significant bit of the
+    input integer is the sign bit. The `cir.bit.clrsb` operation returns the
+    number of redundant sign bits in the input, that is, the number of bits
+    following the most significant bit that are identical to it.
+
+    The bit width of the input integer must be either 32 or 64.
+
+    Examples:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+
+    // %0 = 0xDEADBEEF, 0b1101_1110_1010_1101_1011_1110_1110_1111
+    %0 = cir.const #cir.int<3735928559> : !s32i
+    // %1 will be 1 because there is 1 bit following the most significant bit
+    // that is identical to it.
+    %1 = cir.bit.clrsb(%0 : !s32i) : !s32i
+
+    // %2 = 1, 0b0000_0000_0000_0000_0000_0000_0000_0001
+    %2 = cir.const #cir.int<1> : !s32i
+    // %3 will be 30
+    %3 = cir.bit.clrsb(%2 : !s32i) : !s32i
+    ```
+  }];
+}
+
+def BitClzOp : CIR_BitOp<"bit.clz", AnyTypeOf<[UInt16, UInt32, UInt64]>> {
+  let summary = "Get the number of leading 0-bits in the input";
+  let description = [{
+    Compute the number of leading 0-bits in the input.
+
+    The input integer must be an unsigned integer. The `cir.bit.clz` operation
+    returns the number of consecutive 0-bits at the most significant bit
+    position in the input.
+
+    This operation invokes undefined behavior if the input value is 0.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+    !u32i = !cir.int<u, 32>
+
+    // %0 = 0b0000_0000_0000_0000_0000_0000_0000_1000
+    %0 = cir.const #cir.int<8> : !u32i
+    // %1 will be 28
+    %1 = cir.bit.clz(%0 : !u32i) : !s32i
+    ```
+  }];
+}
+
+def BitCtzOp : CIR_BitOp<"bit.ctz", AnyTypeOf<[UInt16, UInt32, UInt64]>> {
+  let summary = "Get the number of trailing 0-bits in the input";
+  let description = [{
+    Compute the number of trailing 0-bits in the input.
+
+    The input integer must be an unsigned integer. The `cir.bit.ctz` operation
+    returns the number of consecutive 0-bits at the least significant bit
+    position in the input.
+
+    This operation invokes undefined behavior if the input value is 0.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+    !u32i = !cir.int<u, 32>
+
+    // %0 = 0b1000
+    %0 = cir.const #cir.int<8> : !u32i
+    // %1 will be 3
+    %1 = cir.bit.ctz(%0 : !u32i) : !s32i
+    ```
+  }];
+}
+
+def BitFfsOp : CIR_BitOp<"bit.ffs", AnyTypeOf<[SInt32, SInt64]>> {
+  let summary = "Get the position of the least significant 1-bit of input";
+  let description = [{
+    Compute the position of the least significant 1-bit of the input.
+
+    The input integer must be a signed integer. The `cir.bit.ffs` operation
+    returns one plus the index of the least significant 1-bit of the input
+    signed integer. As a special case, if the input integer is 0, `cir.bit.ffs`
+    returns 0.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+
+    // %0 = 0x0010_1000
+    %0 = cir.const #cir.int<40> : !s32i
+    // #1 will be 4 since the 4th least significant bit is 1.
+    %1 = cir.bit.ffs(%0 : !s32i) : !s32i
+    ```
+  }];
+}
+
+def BitParityOp : CIR_BitOp<"bit.parity", AnyTypeOf<[UInt32, UInt64]>> {
+  let summary = "Get the parity of input";
+  let description = [{
+    Compute the parity of the input. The parity of an integer is the number of
+    1-bits in it modulo 2.
+
+    The input must be an unsigned integer.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+    !u32i = !cir.int<u, 32>
+
+    // %0 = 0x0110_1000
+    %0 = cir.const #cir.int<104> : !u32i
+    // %1 will be 1 since there are 3 1-bits in %0
+    %1 = cir.bit.parity(%0 : !u32i) : !u32i
+    ```
+  }];
+}
+
+def BitPopcountOp
+    : CIR_BitOp<"bit.popcount", AnyTypeOf<[UInt16, UInt32, UInt64]>> {
+  let summary = "Get the number of 1-bits in input";
+  let description = [{
+    Compute the number of 1-bits in the input.
+
+    The input must be an unsigned integer.
+
+    Example:
+
+    ```mlir
+    !u32i = !cir.int<u, 32>
+
+    // %0 = 0x0110_1000
+    %0 = cir.const #cir.int<104> : !u32i
+    // %1 will be 3 since there are 3 1-bits in %0
+    %1 = cir.bit.popcount(%0 : !u32i) : !u32i
+    ```
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// ByteswapOp
+//===----------------------------------------------------------------------===//
+
+def ByteswapOp : CIR_Op<"bswap", [Pure, SameOperandsAndResultType]> {
+  let summary = "Reverse the bytes that constitute the operand integer";
+  let description = [{
+    The `cir.bswap` operation takes an integer as operand, and returns it with
+    the order of bytes that constitute the operand reversed.
+
+    The operand integer must be an unsigned integer. Its widths must be either
+    16, 32, or 64.
+
+    Example:
+
+    ```mlir
+    !u32i = !cir.int<u, 32>
+
+    // %0 = 0x12345678
+    %0 = cir.const #cir.int<305419896> : !u32i
+
+    // %1 should be 0x78563412
+    %1 = cir.bswap(%0 : !u32i) : !u32i
+    ```
+  }];
+
+  let results = (outs CIR_IntType:$result);
+  let arguments = (ins AnyTypeOf<[UInt16, UInt32, UInt64]>:$input);
+
+  let assemblyFormat = [{
+    `(` $input `:` type($input) `)` `:` type($result) attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// RotateOp
+//===----------------------------------------------------------------------===//
+
+def RotateOp : CIR_Op<"rotate", [Pure, SameOperandsAndResultType]> {
+  let summary = "Reverse the bytes that constitute the operand integer";
+  let description = [{
+    The `cir.rotate` rotates operand in `src` by the given bit amount `amt`.
+    Its widths must be either 8, 16, 32, or 64 and both `src`, `amt` and
+    `result` be of the same type. The rotate direction is specified by a
+    `left`/`right` keyword.
+
+    This operation covers different C/C++
+    builtins, some examples: `__builtin_rotateleft8`, `__builtin_rotateleft16`,
+    `__builtin_rotateleft32`, `__builtin_rotateleft64`, `_rotl8`, `_rotl16`,
+    `_rotl`, `_lrotl`, `_rotl64`, etc and their "right" variants.
+
+    Example:
+
+    ```mlir
+    %r = cir.rotate left %0, %1 -> !u32i
+    ```
+  }];
+
+  let results = (outs CIR_IntType:$result);
+  let arguments = (ins PrimitiveInt:$src, PrimitiveInt:$amt,
+                       UnitAttr:$left);
+
+  let assemblyFormat = [{
+    (`left` $left^) : (`right`)?
+    $src `,` $amt `->` type($result) attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// CmpThreeWayOp
+//===----------------------------------------------------------------------===//
+
+def CmpThreeWayOp : CIR_Op<"cmp3way", [Pure, SameTypeOperands]> {
+  let summary = "Compare two values with C++ three-way comparison semantics";
+  let description = [{
+    The `cir.cmp3way` operation models the `<=>` operator in C++20. It takes two
+    operands with the same type and produces a result indicating the ordering
+    between the two input operands.
+
+    The result of the operation is a signed integer that indicates the ordering
+    between the two input operands.
+
+    There are two kinds of ordering: strong ordering and partial ordering.
+    Comparing different types of values yields different kinds of orderings.
+    The `info` parameter gives the ordering kind and other necessary information
+    about the comparison.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+
+    #cmp3way_strong = #cmp3way_info<strong, lt = -1, eq = 0, gt = 1>
+    #cmp3way_partial = #cmp3way_info<strong, lt = -1, eq = 0, gt = 1, unordered = 2>
+
+    %0 = cir.const #cir.int<0> : !s32i
+    %1 = cir.const #cir.int<1> : !s32i
+    %2 = cir.cmp3way(%0 : !s32i, %1, #cmp3way_strong) : !s8i
+
+    %3 = cir.const #cir.fp<0.0> : !cir.float
+    %4 = cir.const #cir.fp<1.0> : !cir.float
+    %5 = cir.cmp3way(%3 : !cir.float, %4, #cmp3way_partial) : !s8i
+    ```
+  }];
+
+  let results = (outs PrimitiveSInt:$result);
+  let arguments = (ins CIR_AnyType:$lhs, CIR_AnyType:$rhs,
+                       CmpThreeWayInfoAttr:$info);
+
+  let assemblyFormat = [{
+    `(` $lhs `:` type($lhs) `,` $rhs `,` qualified($info) `)`
+    `:` type($result) attr-dict
+  }];
+
+  let hasVerifier = 0;
+
+  let extraClassDeclaration = [{
+    /// Determine whether this three-way comparison produces a strong ordering.
+    bool isStrongOrdering() {
+      return getInfo().getOrdering() == mlir::cir::CmpOrdering::Strong;
+    }
+
+    /// Determine whether this three-way comparison compares integral operands.
+    bool isIntegralComparison() {
+      return mlir::isa<mlir::cir::IntType>(getLhs().getType());
+    }
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// SwitchOp
+//===----------------------------------------------------------------------===//
+
+def CaseOpKind_DT : I32EnumAttrCase<"Default", 1, "default">;
+def CaseOpKind_EQ : I32EnumAttrCase<"Equal", 2, "equal">;
+def CaseOpKind_AO : I32EnumAttrCase<"Anyof", 3, "anyof">;
+def CaseOpKind_RG : I32EnumAttrCase<"Range", 4, "range">;
+
+def CaseOpKind : I32EnumAttr<
+    "CaseOpKind",
+    "case kind",
+    [CaseOpKind_DT, CaseOpKind_EQ, CaseOpKind_AO, CaseOpKind_RG]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def CaseEltValueListAttr :
+  TypedArrayAttrBase<AnyAttr, "cir.switch case value condition"> {
+  let constBuilderCall = ?;
+}
+
+def CaseAttr : AttrDef<CIR_Dialect, "Case"> {
+  // FIXME: value should probably be optional for more clear "default"
+  // representation.
+  let parameters = (ins "ArrayAttr":$value, "CaseOpKindAttr":$kind);
+  let mnemonic = "case";
+  let assemblyFormat = "`<` struct(params) `>`";
+}
+
+def CaseArrayAttr :
+  TypedArrayAttrBase<CaseAttr, "cir.switch case array attribute"> {
+  let constBuilderCall = ?;
+}
+
+def SwitchOp : CIR_Op<"switch",
+      [SameVariadicOperandSize,
+       DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+       RecursivelySpeculatable, AutomaticAllocationScope, NoRegionArguments]> {
+  let summary = "Switch operation";
+  let description = [{
+    The `cir.switch` operation represents C/C++ switch functionality for
+    conditionally executing multiple regions of code. The operand to an switch
+    is an integral condition value.
+
+    A variadic list of "case" attribute operands and regions track the possible
+    control flow within `cir.switch`. A `case` must be in one of the following forms:
+    - `equal, <constant>`: equality of the second case operand against the
+    condition.
+    - `anyof, [constant-list]`: equals to any of the values in a subsequent
+    following list.
+    - `range, [lower-bound, upper-bound]`: the condition is within the closed interval.
+    - `default`: any other value.
+
+    Each case region must be explicitly terminated.
+
+    Examples:
+
+    ```mlir
+    cir.switch (%b : i32) [
+      case (equal, 20) {
+        ...
+        cir.yield break
+      },
+      case (anyof, [1, 2, 3] : i32) {
+        ...
+        cir.return ...
+      }
+      case (range, [10, 15]) {
+        ...
+        cir.yield break
+      },
+      case (default) {
+        ...
+        cir.yield fallthrough
+      }
+    ]
+    ```
+  }];
+
+  let arguments = (ins CIR_IntType:$condition,
+                       OptionalAttr<CaseArrayAttr>:$cases);
+
+  let regions = (region VariadicRegion<AnyRegion>:$regions);
+
+  let hasVerifier = 1;
+
+  let skipDefaultBuilders = 1;
+  let builders = [
+    OpBuilder<(ins "Value":$condition,
+               "function_ref<void(OpBuilder &, Location, OperationState &)>":$switchBuilder)>
+  ];
+
+  let assemblyFormat = [{
+    custom<SwitchOp>(
+      $regions, $cases, $condition, type($condition)
+    )
+    attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// BrOp
+//===----------------------------------------------------------------------===//
+
+def BrOp : CIR_Op<"br",
+      [DeclareOpInterfaceMethods<BranchOpInterface, ["getSuccessorForOperands"]>,
+     Pure, Terminator]> {
+  let summary = "Unconditional branch";
+  let description = [{
+    The `cir.br` branches unconditionally to a block. Used to represent C/C++
+    goto's and general block branching.
+
+    Example:
+
+    ```mlir
+      ...
+        cir.br ^bb3
+      ^bb3:
+        cir.return
+    ```
+  }];
+
+  let builders = [
+    OpBuilder<(ins "Block *":$dest,
+              CArg<"ValueRange", "{}">:$destOperands), [{
+      $_state.addSuccessors(dest);
+      $_state.addOperands(destOperands);
+    }]>
+  ];
+
+  let arguments = (ins Variadic<CIR_AnyType>:$destOperands);
+  let successors = (successor AnySuccessor:$dest);
+  let assemblyFormat = [{
+    $dest (`(` $destOperands^ `:` type($destOperands) `)`)? attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// BrCondOp
+//===----------------------------------------------------------------------===//
+
+def BrCondOp : CIR_Op<"brcond",
+      [DeclareOpInterfaceMethods<BranchOpInterface, ["getSuccessorForOperands"]>,
+       Pure, Terminator, AttrSizedOperandSegments]> {
+  let summary = "Conditional branch";
+  let description = [{
+    The `cir.brcond %cond, ^bb0, ^bb1` branches to 'bb0' block in case
+    %cond (which must be a !cir.bool type) evaluates to true, otherwise
+    it branches to 'bb1'.
+
+    Example:
+
+    ```mlir
+      ...
+        cir.brcond %a, ^bb3, ^bb4
+      ^bb3:
+        cir.return
+      ^bb4:
+        cir.yield
+    ```
+  }];
+
+  let builders = [
+    OpBuilder<(ins "Value":$cond, "Block *":$destTrue, "Block *":$destFalse,
+               CArg<"ValueRange", "{}">:$destOperandsTrue,
+               CArg<"ValueRange", "{}">:$destOperandsFalse), [{
+      build($_builder, $_state, cond, destOperandsTrue,
+            destOperandsFalse, destTrue, destFalse);
+    }]>
+  ];
+
+  let arguments = (ins CIR_BoolType:$cond,
+                       Variadic<CIR_AnyType>:$destOperandsTrue,
+                       Variadic<CIR_AnyType>:$destOperandsFalse);
+  let successors = (successor AnySuccessor:$destTrue, AnySuccessor:$destFalse);
+  let assemblyFormat = [{
+    $cond
+    $destTrue (`(` $destOperandsTrue^ `:` type($destOperandsTrue) `)`)?
+    `,`
+    $destFalse (`(` $destOperandsFalse^ `:` type($destOperandsFalse) `)`)?
+    attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// While & DoWhileOp
+//===----------------------------------------------------------------------===//
+
+class WhileOpBase<string mnemonic> : CIR_Op<mnemonic, [
+  LoopOpInterface,
+  NoRegionArguments,
+]> {
+  defvar isWhile = !eq(mnemonic, "while");
+  let summary = "C/C++ " # !if(isWhile, "while", "do-while") # " loop";
+  let builders = [
+    OpBuilder<(ins "function_ref<void(OpBuilder &, Location)>":$condBuilder,
+                   "function_ref<void(OpBuilder &, Location)>":$bodyBuilder), [{
+        OpBuilder::InsertionGuard guard($_builder);
+        $_builder.createBlock($_state.addRegion());
+      }] # !if(isWhile, [{
+        condBuilder($_builder, $_state.location);
+        $_builder.createBlock($_state.addRegion());
+        bodyBuilder($_builder, $_state.location);
+      }], [{
+        bodyBuilder($_builder, $_state.location);
+        $_builder.createBlock($_state.addRegion());
+        condBuilder($_builder, $_state.location);
+      }])>
+  ];
+}
+
+def WhileOp : WhileOpBase<"while"> {
+  let regions = (region SizedRegion<1>:$cond, MinSizedRegion<1>:$body);
+  let assemblyFormat = "$cond `do` $body attr-dict";
+
+  let description = [{
+    Represents a C/C++ while loop. It consists of two regions:
+
+     - `cond`: single block region with the loop's condition. Should be
+     terminated with a `cir.condition` operation.
+     - `body`: contains the loop body and an arbitrary number of blocks.
+
+    Example:
+
+    ```mlir
+    cir.while {
+      cir.break
+    ^bb2:
+      cir.yield
+    } do {
+      cir.condition %cond : cir.bool
+    }
+    ```
+  }];
+}
+
+def DoWhileOp : WhileOpBase<"do"> {
+  let regions = (region MinSizedRegion<1>:$body, SizedRegion<1>:$cond);
+  let assemblyFormat = " $body `while` $cond attr-dict";
+
+  let extraClassDeclaration = [{
+    Region &getEntry() { return getBody(); }
+  }];
+
+  let description = [{
+    Represents a C/C++ do-while loop. Identical to `cir.while` but the
+    condition is evaluated after the body.
+
+    Example:
+
+    ```mlir
+    cir.do {
+      cir.break
+    ^bb2:
+      cir.yield
+    } while {
+      cir.condition %cond : cir.bool
+    }
+    ```
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// ForOp
+//===----------------------------------------------------------------------===//
+
+def ForOp : CIR_Op<"for", [LoopOpInterface, NoRegionArguments]> {
+  let summary = "C/C++ for loop counterpart";
+  let description = [{
+    Represents a C/C++ for loop. It consists of three regions:
+
+     - `cond`: single block region with the loop's condition. Should be
+     terminated with a `cir.condition` operation.
+     - `body`: contains the loop body and an arbitrary number of blocks.
+     - `step`: single block region with the loop's step.
+
+    Example:
+
+    ```mlir
+    cir.for cond {
+      cir.condition(%val)
+    } body {
+      cir.break
+    ^bb2:
+      cir.yield
+    } step {
+      cir.yield
+    }
+    ```
+  }];
+
+  let regions = (region SizedRegion<1>:$cond,
+                        MinSizedRegion<1>:$body,
+                        SizedRegion<1>:$step);
+  let assemblyFormat = [{
+    `:` `cond` $cond
+    `body` $body
+    `step` $step
+    attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins "function_ref<void(OpBuilder &, Location)>":$condBuilder,
+                   "function_ref<void(OpBuilder &, Location)>":$bodyBuilder,
+                   "function_ref<void(OpBuilder &, Location)>":$stepBuilder), [{
+        OpBuilder::InsertionGuard guard($_builder);
+
+        // Build condition region.
+        $_builder.createBlock($_state.addRegion());
+        condBuilder($_builder, $_state.location);
+
+        // Build body region.
+        $_builder.createBlock($_state.addRegion());
+        bodyBuilder($_builder, $_state.location);
+
+        // Build step region.
+        $_builder.createBlock($_state.addRegion());
+        stepBuilder($_builder, $_state.location);
+      }]>
+  ];
+
+  let extraClassDeclaration = [{
+    Region *maybeGetStep() { return &getStep(); }
+    llvm::SmallVector<Region *> getRegionsInExecutionOrder() {
+      return llvm::SmallVector<Region *, 3>{&getCond(), &getBody(), &getStep()};
+    }
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// GlobalOp
+//===----------------------------------------------------------------------===//
+
+// Linkage types. This is currently a replay of llvm/IR/GlobalValue.h, this is
+// currently handy as part of forwarding appropriate linkage types for LLVM
+// lowering, specially useful for C++ support.
+
+// Externally visible function
+def Global_ExternalLinkage :
+  I32EnumAttrCase<"ExternalLinkage", 0, "external">;
+// Available for inspection, not emission.
+def Global_AvailableExternallyLinkage :
+  I32EnumAttrCase<"AvailableExternallyLinkage", 1, "available_externally">;
+// Keep one copy of function when linking (inline)
+def Global_LinkOnceAnyLinkage :
+  I32EnumAttrCase<"LinkOnceAnyLinkage", 2, "linkonce">;
+// Same, but only replaced by something equivalent.
+def Global_LinkOnceODRLinkage :
+  I32EnumAttrCase<"LinkOnceODRLinkage", 3, "linkonce_odr">;
+// Keep one copy of named function when linking (weak)
+def Global_WeakAnyLinkage :
+  I32EnumAttrCase<"WeakAnyLinkage", 4, "weak">;
+// Same, but only replaced by something equivalent.
+def Global_WeakODRLinkage :
+  I32EnumAttrCase<"WeakODRLinkage", 5, "weak_odr">;
+// TODO: should we add something like appending linkage too?
+// Special purpose, only applies to global arrays
+// def Global_AppendingLinkage :
+//   I32EnumAttrCase<"AppendingLinkage", 6, "appending">;
+// Rename collisions when linking (static functions).
+def Global_InternalLinkage :
+  I32EnumAttrCase<"InternalLinkage", 7, "internal">;
+// Like Internal, but omit from symbol table, prefix it with
+// "cir_" to prevent clash with MLIR's symbol "private".
+def Global_PrivateLinkage :
+  I32EnumAttrCase<"PrivateLinkage", 8, "cir_private">;
+// ExternalWeak linkage description.
+def Global_ExternalWeakLinkage :
+  I32EnumAttrCase<"ExternalWeakLinkage", 9, "extern_weak">;
+// Tentative definitions.
+def Global_CommonLinkage :
+  I32EnumAttrCase<"CommonLinkage", 10, "common">;
+
+/// An enumeration for the kinds of linkage for global values.
+def GlobalLinkageKind : I32EnumAttr<
+    "GlobalLinkageKind",
+    "Linkage type/kind",
+    [Global_ExternalLinkage, Global_AvailableExternallyLinkage,
+     Global_LinkOnceAnyLinkage, Global_LinkOnceODRLinkage,
+     Global_WeakAnyLinkage, Global_WeakODRLinkage,
+     Global_InternalLinkage, Global_PrivateLinkage,
+     Global_ExternalWeakLinkage, Global_CommonLinkage
+     ]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def SOB_Undefined : I32EnumAttrCase<"undefined", 1>;
+def SOB_Defined : I32EnumAttrCase<"defined", 2>;
+def SOB_Trapping : I32EnumAttrCase<"trapping", 3>;
+
+def SignedOverflowBehaviorEnum : I32EnumAttr<
+    "SignedOverflowBehavior",
+    "the behavior for signed overflow",
+    [SOB_Undefined, SOB_Defined, SOB_Trapping]> {
+  let cppNamespace = "::mlir::cir::sob";
+}
+
+/// Definition of TLS related kinds.
+def TLS_GeneralDynamic :
+  I32EnumAttrCase<"GeneralDynamic", 0, "tls_dyn">;
+def TLS_LocalDynamic :
+  I32EnumAttrCase<"LocalDynamic", 1, "tls_local_dyn">;
+def TLS_InitialExec :
+  I32EnumAttrCase<"InitialExec", 2, "tls_init_exec">;
+def TLS_LocalExec :
+  I32EnumAttrCase<"LocalExec", 3, "tls_local_exec">;
+
+def TLSModel : I32EnumAttr<
+    "TLS_Model",
+    "TLS model",
+    [TLS_GeneralDynamic, TLS_LocalDynamic, TLS_InitialExec, TLS_LocalExec]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def GlobalOp : CIR_Op<"global",
+                      [DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+                       DeclareOpInterfaceMethods<CIRGlobalValueInterface>,
+                       NoRegionArguments]> {
+  let summary = "Declares or defines a global variable";
+  let description = [{
+    The `cir.global` operation declares or defines a named global variable.
+
+    The backing memory for the variable is allocated statically and is
+    described by the type of the variable.
+
+    The operation is a declaration if no `inital_value` is
+    specified, else it is a definition.
+
+    The global variable can also be marked constant using the
+    `constant` unit attribute. Writing to such constant global variables is
+    undefined.
+
+    The `linkage` tracks C/C++ linkage types, currently very similar to LLVM's.
+    Symbol visibility in `sym_visibility` is defined in terms of MLIR's visibility
+    and verified to be in accordance to `linkage`.
+
+    `visibility_attr` is defined in terms of CIR's visibility.
+
+    Example:
+
+    ```mlir
+    // Public and constant variable with initial value.
+    cir.global public constant @c : i32 = 4;
+    ```
+  }];
+
+  // Note that both sym_name and sym_visibility are tied to Symbol trait.
+  // TODO: sym_visibility can possibly be represented by implementing the
+  // necessary Symbol's interface in terms of linkage instead.
+  let arguments = (ins SymbolNameAttr:$sym_name,
+                       VisibilityAttr:$global_visibility,
+                       OptionalAttr<StrAttr>:$sym_visibility,
+                       TypeAttr:$sym_type,
+                       Arg<GlobalLinkageKind, "linkage type">:$linkage,
+                       OptionalAttr<AddressSpaceAttr>:$addr_space,
+                       OptionalAttr<TLSModel>:$tls_model,
+                       // Note this can also be a FlatSymbolRefAttr
+                       OptionalAttr<AnyAttr>:$initial_value,
+                       UnitAttr:$comdat,
+                       UnitAttr:$constant,
+                       UnitAttr:$dsolocal,
+                       OptionalAttr<I64Attr>:$alignment,
+                       OptionalAttr<ASTVarDeclInterface>:$ast,
+                       OptionalAttr<StrAttr>:$section,
+                       OptionalAttr<ArrayAttr>:$annotations);
+  let regions = (region AnyRegion:$ctorRegion, AnyRegion:$dtorRegion);
+  let assemblyFormat = [{
+       ($sym_visibility^)?
+       custom<OmitDefaultVisibility>($global_visibility)
+       (`constant` $constant^)?
+       $linkage
+       (`comdat` $comdat^)?
+       ($tls_model^)?
+       (`dsolocal` $dsolocal^)?
+       (`addrspace` `(` custom<GlobalOpAddrSpace>($addr_space)^ `)`)?
+       $sym_name
+       custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value, $ctorRegion, $dtorRegion)
+       ($annotations^)?
+       attr-dict
+  }];
+
+  let extraClassDeclaration = [{
+    bool isDeclaration() {
+      return !getInitialValue() && getCtorRegion().empty() && getDtorRegion().empty();
+    }
+    bool hasInitializer() { return !isDeclaration(); }
+    bool hasAvailableExternallyLinkage() {
+      return mlir::cir::isAvailableExternallyLinkage(getLinkage());
+    }
+    /// Whether the definition of this global may be replaced at link time.
+    bool isWeakForLinker() { return cir::isWeakForLinker(getLinkage()); }
+  }];
+
+  let skipDefaultBuilders = 1;
+  let builders = [
+    OpBuilder<(ins
+      // MLIR's default visibility is public.
+      "StringRef":$sym_name,
+      "Type":$sym_type,
+      CArg<"bool", "false">:$isConstant,
+      // CIR defaults to external linkage.
+      CArg<"cir::GlobalLinkageKind",
+            "cir::GlobalLinkageKind::ExternalLinkage">:$linkage,
+      CArg<"cir::AddressSpaceAttr", "{}">:$addrSpace,
+      CArg<"function_ref<void(OpBuilder &, Location)>",
+           "nullptr">:$ctorBuilder,
+      CArg<"function_ref<void(OpBuilder &, Location)>",
+           "nullptr">:$dtorBuilder)
+    >
+  ];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// GetGlobalOp
+//===----------------------------------------------------------------------===//
+
+def GetGlobalOp : CIR_Op<"get_global",
+    [Pure, DeclareOpInterfaceMethods<SymbolUserOpInterface>]> {
+  let summary = "Get the address of a global variable";
+  let description = [{
+    The `cir.get_global` operation retrieves the address pointing to a
+    named global variable. If the global variable is marked constant, writing
+    to the resulting address (such as through a `cir.store` operation) is
+    undefined. Resulting type must always be a `!cir.ptr<...>` type with the
+    same address space as the global variable.
+
+    Addresses of thread local globals can only be retrieved if this operation
+    is marked `thread_local`, which indicates the address isn't constant.
+
+    Example:
+    ```mlir
+    %x = cir.get_global @foo : !cir.ptr<i32>
+    ...
+    %y = cir.get_global thread_local @batata : !cir.ptr<i32>
+    ...
+    cir.global external addrspace(offload_global) @gv = #cir.int<0> : !s32i
+    %z = cir.get_global @gv : !cir.ptr<!s32i, addrspace(offload_global)>
+    ```
+  }];
+
+  let arguments = (ins FlatSymbolRefAttr:$name, UnitAttr:$tls);
+  let results = (outs Res<CIR_PointerType, "", []>:$addr);
+
+  let assemblyFormat = [{
+    (`thread_local` $tls^)?
+    $name `:` qualified(type($addr)) attr-dict
+  }];
+
+  // `GetGlobalOp` is fully verified by its traits.
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// VTableAddrPointOp
+//===----------------------------------------------------------------------===//
+
+def VTableAddrPointOp : CIR_Op<"vtable.address_point",
+    [Pure, DeclareOpInterfaceMethods<SymbolUserOpInterface>]> {
+  let summary = "Get the vtable (global variable) address point";
+  let description = [{
+    The `vtable.address_point` operation retrieves the "effective" address
+    (address point) of a C++ virtual table. An object internal `__vptr`
+    gets initializated on top of the value returned by this operation.
+
+    `vtable_index` provides the appropriate vtable within the vtable group
+    (as specified by Itanium ABI), and `addr_point_index` the actual address
+    point within that vtable.
+
+    The return type is always a `!cir.ptr<!cir.ptr<() -> i32>>`.
+
+    Example:
+    ```mlir
+    cir.global linkonce_odr @_ZTV1B = ...
+    ...
+    %3 = cir.vtable.address_point(@_ZTV1B, vtable_index = 0, address_point_index = 2) : !cir.ptr<!cir.ptr<() -> i32>>
+    ```
+  }];
+
+  let arguments = (ins OptionalAttr<FlatSymbolRefAttr>:$name,
+                       Optional<CIR_AnyType>:$sym_addr,
+                       I32Attr:$vtable_index,
+                       I32Attr:$address_point_index);
+  let results = (outs Res<CIR_PointerType, "", []>:$addr);
+
+  let assemblyFormat = [{
+    `(`
+      ($name^)?
+      ($sym_addr^ `:` type($sym_addr))?
+      `,`
+        `vtable_index` `=` $vtable_index `,`
+        `address_point_index` `=` $address_point_index
+    `)`
+    `:` qualified(type($addr)) attr-dict
+  }];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SetBitfieldOp
+//===----------------------------------------------------------------------===//
+
+def SetBitfieldOp : CIR_Op<"set_bitfield"> {
+  let summary = "Set a bitfield";
+  let description = [{
+    The `cir.set_bitfield` operation provides a store-like access to
+    a bit field of a record.
+
+    It expects an address of a storage where to store, a type of the storage,
+    a value being stored, a name of a bit field, a pointer to the storage in the
+    base record, a size of the storage, a size the bit field, an offset
+    of the bit field and a sign. Returns a value being stored.
+
+    A unit attribute `volatile` can be used to indicate a volatile load of the
+    bitfield.
+
+    Example.
+    Suppose we have a struct with multiple bitfields stored in
+    different storages. The `cir.set_bitfield` operation sets the value
+    of the bitfield.
+    ```C++
+    typedef struct {
+      int a : 4;
+      int b : 27;
+      int c : 17;
+      int d : 2;
+      int e : 15;
+    } S;
+
+    void store_bitfield(S& s) {
+      s.d = 3;
+    }
+    ```
+
+    ```mlir
+    // 'd' is in the storage with the index 1
+    !struct_type = !cir.struct<struct "S" {!cir.int<u, 32>, !cir.int<u, 32>, !cir.int<u, 16>} #cir.record.decl.ast>
+    #bfi_d = #cir.bitfield_info<name = "d", storage_type = !u32i, size = 2, offset = 17, is_signed = true>
+
+    %1 = cir.const #cir.int<3> : !s32i
+    %2 = cir.load %0 : !cir.ptr<!cir.ptr<!struct_type>>, !cir.ptr<!struct_type>
+    %3 = cir.get_member %2[1] {name = "d"} : !cir.ptr<!struct_type> -> !cir.ptr<!u32i>
+    %4 = cir.set_bitfield(#bfi_d, %3 : !cir.ptr<!u32i>, %1 : !s32i) -> !s32i
+    ```
+   }];
+
+  let arguments = (ins
+    Arg<CIR_PointerType, "the address to store the value", [MemWrite]>:$addr,
+    CIR_AnyType:$src,
+    BitfieldInfoAttr:$bitfield_info,
+    UnitAttr:$is_volatile
+  );
+
+  let results = (outs CIR_IntType:$result);
+
+  let assemblyFormat = [{ `(`$bitfield_info`,` $addr`:`qualified(type($addr))`,`
+    $src`:`type($src) `)`  attr-dict `->` type($result) }];
+
+  let builders = [
+    OpBuilder<(ins "Type":$type,
+                   "Value":$addr,
+                   "Type":$storage_type,
+                   "Value":$src,
+                   "StringRef":$name,
+                   "unsigned":$size,
+                   "unsigned":$offset,
+                   "bool":$is_signed,
+                   "bool":$is_volatile
+                   ),
+   [{
+      BitfieldInfoAttr info =
+        BitfieldInfoAttr::get($_builder.getContext(),
+                              name, storage_type,
+                              size, offset, is_signed);
+      build($_builder, $_state, type, addr, src, info, is_volatile);
+    }]>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// GetBitfieldOp
+//===----------------------------------------------------------------------===//
+
+def GetBitfieldOp : CIR_Op<"get_bitfield"> {
+  let summary = "Get a bitfield";
+  let description = [{
+    The `cir.get_bitfield` operation provides a load-like access to
+    a bit field of a record.
+
+    It expects a name if a bit field, a pointer to a storage in the
+    base record, a type of the storage, a name of the bitfield,
+    a size the bit field, an offset of the bit field and a sign.
+
+    A unit attribute `volatile` can be used to indicate a volatile load of the
+    bitfield.
+
+    Example:
+    Suppose we have a struct with multiple bitfields stored in
+    different storages. The `cir.get_bitfield` operation gets the value
+    of the bitfield
+    ```C++
+    typedef struct {
+      int a : 4;
+      int b : 27;
+      int c : 17;
+      int d : 2;
+      int e : 15;
+    } S;
+
+    int load_bitfield(S& s) {
+      return s.d;
+    }
+    ```
+
+    ```mlir
+    // 'd' is in the storage with the index 1
+    !struct_type = !cir.struct<struct "S" {!cir.int<u, 32>, !cir.int<u, 32>, !cir.int<u, 16>} #cir.record.decl.ast>
+    #bfi_d = #cir.bitfield_info<name = "d", storage_type = !u32i, size = 2, offset = 17, is_signed = true>
+
+    %2 = cir.load %0 : !cir.ptr<!cir.ptr<!struct_type>>, !cir.ptr<!struct_type>
+    %3 = cir.get_member %2[1] {name = "d"} : !cir.ptr<!struct_type> -> !cir.ptr<!u32i>
+    %4 = cir.get_bitfield(#bfi_d, %3 : !cir.ptr<!u32i>) -> !s32i
+    ```
+    }];
+
+  let arguments = (ins
+    Arg<CIR_PointerType, "the address to load from", [MemRead]>:$addr,
+    BitfieldInfoAttr:$bitfield_info,
+    UnitAttr:$is_volatile
+    );
+
+  let results = (outs CIR_IntType:$result);
+
+  let assemblyFormat = [{ `(`$bitfield_info `,` $addr attr-dict `:`
+   qualified(type($addr)) `)` `->` type($result) }];
+
+  let builders = [
+    OpBuilder<(ins "Type":$type,
+                   "Value":$addr,
+                   "Type":$storage_type,
+                   "StringRef":$name,
+                   "unsigned":$size,
+                   "unsigned":$offset,
+                   "bool":$is_signed,
+                   "bool":$is_volatile
+                   ),
+   [{
+      BitfieldInfoAttr info =
+        BitfieldInfoAttr::get($_builder.getContext(),
+                              name, storage_type,
+                              size, offset, is_signed);
+      build($_builder, $_state, type, addr, info, is_volatile);
+    }]>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// GetMemberOp
+//===----------------------------------------------------------------------===//
+
+def GetMemberOp : CIR_Op<"get_member"> {
+  let summary = "Get the address of a member of a struct";
+  let description = [{
+    The `cir.get_member` operation gets the address of a particular named
+    member from the input record.
+
+    It expects a pointer to the base record as well as the name of the member
+    and its field index.
+
+    Example:
+    ```mlir
+    // Suppose we have a struct with multiple members.
+    !s32i = !cir.int<s, 32>
+    !s8i = !cir.int<s, 32>
+    !struct_ty = !cir.struct<"struct.Bar" {!s32i, !s8i}>
+
+    // Get the address of the member at index 1.
+    %1 = cir.get_member %0[1] {name = "i"} : (!cir.ptr<!struct_ty>) -> !cir.ptr<!s8i>
+    ```
+  }];
+
+  let arguments = (ins
+    Arg<CIR_PointerType, "the address to load from", [MemRead]>:$addr,
+    StrAttr:$name,
+    IndexAttr:$index_attr);
+
+  let results = (outs Res<CIR_PointerType, "">:$result);
+
+  let assemblyFormat = [{
+    $addr `[` $index_attr `]` attr-dict
+    `:` qualified(type($addr)) `->` qualified(type($result))
+  }];
+
+  let builders = [
+    OpBuilder<(ins "Type":$type,
+                   "Value":$value,
+                   "llvm::StringRef":$name,
+                   "unsigned":$index),
+    [{
+      mlir::APInt fieldIdx(64, index);
+      build($_builder, $_state, type, value, name, fieldIdx);
+    }]>
+  ];
+
+  let extraClassDeclaration = [{
+    /// Return the index of the struct member being accessed.
+    uint64_t getIndex() { return getIndexAttr().getZExtValue(); }
+
+    /// Return the record type pointed by the base pointer.
+    mlir::cir::PointerType getAddrTy() { return getAddr().getType(); }
+
+    /// Return the result type.
+    mlir::cir::PointerType getResultTy() {
+      return mlir::cast<mlir::cir::PointerType>(getResult().getType());
+    }
+  }];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// GetRuntimeMemberOp
+//===----------------------------------------------------------------------===//
+
+def GetRuntimeMemberOp : CIR_Op<"get_runtime_member"> {
+  let summary = "Get the address of a member of a struct";
+  let description = [{
+    The `cir.get_runtime_member` operation gets the address of a member from
+    the input record. The target member is given by a value of type
+    `!cir.data_member` (i.e. a pointer-to-data-member value).
+
+    This operation differs from `cir.get_member` in when the target member can
+    be determined. For the `cir.get_member` operation, the target member is
+    specified as a constant index so the member it returns access to is known
+    when the operation is constructed. For the `cir.get_runtime_member`
+    operation, the target member is given through a pointer-to-data-member
+    value which is unknown until the program being compiled is executed. In
+    other words, `cir.get_member` represents a normal member access through the
+    `.` operator in C/C++:
+
+    ```cpp
+    struct Foo { int x; };
+    Foo f;
+    (void)f.x;  // cir.get_member
+    ```
+
+    And `cir.get_runtime_member` represents a member access through the `.*` or
+    the `->*` operator in C++:
+
+    ```cpp
+    struct Foo { int x; }
+    Foo f;
+    Foo *p;
+    int Foo::*member;
+
+    (void)f.*member;   // cir.get_runtime_member
+    (void)f->*member;  // cir.get_runtime_member
+    ```
+
+    This operation expects a pointer to the base record as well as the pointer
+    to the target member.
+  }];
+
+  let arguments = (ins
+    Arg<StructPtr, "address of the struct object", [MemRead]>:$addr,
+    Arg<CIR_DataMemberType, "pointer to the target member">:$member);
+
+  let results = (outs Res<CIR_PointerType, "">:$result);
+
+  let assemblyFormat = [{
+    $addr `[` $member `:` qualified(type($member)) `]` attr-dict
+    `:` qualified(type($addr)) `->` qualified(type($result))
+  }];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// GetMethodOp
+//===----------------------------------------------------------------------===//
+
+def GetMethodOp : CIR_Op<"get_method"> {
+  let summary = "Resolve a method to a function pointer as callee";
+  let description = [{
+    The `cir.get_method` operation takes a method and an object as input, and
+    yields a function pointer that points to the actual function corresponding
+    to the input method. The operation also applies any necessary adjustments to
+    the input object pointer for calling the method and yields the adjusted
+    pointer.
+
+    This operation is generated when calling a method through a pointer-to-
+    member-function in C++:
+
+    ```cpp
+    // Foo *object;
+    // int arg;
+    // void (Foo::*method)(int);
+
+    (object->*method)(arg);
+    ```
+
+    The code above will generate CIR similar as:
+
+    ```mlir
+    // %object = ...
+    // %arg = ...
+    // %method = ...
+    %callee, %this = cir.get_method %method, %object
+    cir.call %callee(%this, %arg)
+    ```
+
+    The method type must match the callee type. That is:
+    - The return type of the method must match the return type of the callee.
+    - The first parameter of the callee must have type `!cir.ptr<!cir.void>`.
+    - Types of other parameters of the callee must match the parameters of the
+      method.
+  }];
+
+  let arguments = (ins CIR_MethodType:$method, StructPtr:$object);
+  let results = (outs FuncPtr:$callee, VoidPtr:$adjusted_this);
+
+  let assemblyFormat = [{
+    $method `,` $object
+    `:` `(` qualified(type($method)) `,` qualified(type($object)) `)`
+    `->` `(` qualified(type($callee)) `,` qualified(type($adjusted_this)) `)`
+    attr-dict
+  }];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// VecInsertOp
+//===----------------------------------------------------------------------===//
+
+def VecInsertOp : CIR_Op<"vec.insert", [Pure,
+  TypesMatchWith<"argument type matches vector element type", "vec", "value",
+                 "cast<VectorType>($_self).getEltType()">,
+  AllTypesMatch<["result", "vec"]>]> {
+
+  let summary = "Insert one element into a vector object";
+  let description = [{
+    The `cir.vec.insert` operation replaces the element of the given vector at
+    the given index with the given value.  The new vector with the inserted
+    element is returned.
+  }];
+
+  let arguments = (ins CIR_VectorType:$vec, AnyType:$value, PrimitiveInt:$index);
+  let results = (outs CIR_VectorType:$result);
+
+  let assemblyFormat = [{
+    $value `,` $vec `[` $index `:` type($index) `]` attr-dict `:`
+    qualified(type($vec))
+  }];
+
+  let hasVerifier = 0;
+
+  let llvmOp = "InsertElementOp";
+}
+
+//===----------------------------------------------------------------------===//
+// VecExtractOp
+//===----------------------------------------------------------------------===//
+
+def VecExtractOp : CIR_Op<"vec.extract", [Pure,
+  TypesMatchWith<"type of 'result' matches element type of 'vec'", "vec",
+                 "result", "cast<VectorType>($_self).getEltType()">]> {
+
+  let summary = "Extract one element from a vector object";
+  let description = [{
+    The `cir.vec.extract` operation extracts the element at the given index
+    from a vector object.
+  }];
+
+  let arguments = (ins CIR_VectorType:$vec, PrimitiveInt:$index);
+  let results = (outs CIR_AnyType:$result);
+
+  let assemblyFormat = [{
+    $vec `[` $index `:` type($index) `]` attr-dict `:` qualified(type($vec))
+  }];
+
+  let hasVerifier = 0;
+
+  let llvmOp = "ExtractElementOp";
+}
+
+//===----------------------------------------------------------------------===//
+// VecCreate
+//===----------------------------------------------------------------------===//
+
+def VecCreateOp : CIR_Op<"vec.create", [Pure]> {
+
+  let summary = "Create a vector value";
+  let description = [{
+    The `cir.vec.create` operation creates a vector value with the given element
+    values. The number of element arguments must match the number of elements
+    in the vector type.
+  }];
+
+  let arguments = (ins Variadic<CIR_AnyType>:$elements);
+  let results = (outs CIR_VectorType:$result);
+
+  let assemblyFormat = [{
+    `(` ($elements^ `:` type($elements))? `)` `:` qualified(type($result))
+    attr-dict
+  }];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// VecSplat
+//===----------------------------------------------------------------------===//
+
+// cir.vec.splat is a separate operation from cir.vec.create because more
+// efficient LLVM IR can be generated for it, and because some optimization and
+// analysis passes can benefit from knowing that all elements of the vector
+// have the same value.
+
+def VecSplatOp : CIR_Op<"vec.splat", [Pure,
+  TypesMatchWith<"type of 'value' matches element type of 'result'", "result",
+                 "value", "cast<VectorType>($_self).getEltType()">]> {
+
+  let summary = "Convert a scalar into a vector";
+  let description = [{
+    The `cir.vec.splat` operation creates a vector value from a scalar value.
+    All elements of the vector have the same value, that of the given scalar.
+  }];
+
+  let arguments = (ins CIR_AnyType:$value);
+  let results = (outs CIR_VectorType:$result);
+
+  let assemblyFormat = [{
+    $value `:` type($value) `,` qualified(type($result)) attr-dict
+  }];
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// VecCmp
+//===----------------------------------------------------------------------===//
+
+def VecCmpOp : CIR_Op<"vec.cmp", [Pure, SameTypeOperands]> {
+
+  let summary = "Compare two vectors";
+  let description = [{
+    The `cir.vec.cmp` operation does an element-wise comparison of two vectors
+    of the same type. The result is a vector of the same size as the operands
+    whose element type is the signed integral type that is the same size as the
+    element type of the operands.  The values in the result are 0 or -1.
+  }];
+
+  let arguments = (ins Arg<CmpOpKind, "cmp kind">:$kind, CIR_VectorType:$lhs,
+                       CIR_VectorType:$rhs);
+  let results = (outs CIR_VectorType:$result);
+
+  let assemblyFormat = [{
+    `(` $kind `,` $lhs `,` $rhs `)` `:` qualified(type($lhs)) `,`
+    qualified(type($result)) attr-dict
+  }];
+
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// VecTernary
+//===----------------------------------------------------------------------===//
+
+def VecTernaryOp : CIR_Op<"vec.ternary",
+                   [Pure, AllTypesMatch<["result", "vec1", "vec2"]>]> {
+  let summary = "The `cond ? a : b` ternary operator for vector types";
+  let description = [{
+    The `cir.vec.ternary` operation represents the C/C++ ternary operator,
+    `?:`, for vector types, which does a `select` on individual elements of the
+    vectors. Unlike a regular `?:` operator, there is no short circuiting. All
+    three arguments are always evaluated.  Because there is no short
+    circuiting, there are no regions in this operation, unlike cir.ternary.
+
+    The first argument is a vector of integral type. The second and third
+    arguments are vectors of the same type and have the same number of elements
+    as the first argument.
+
+    The result is a vector of the same type as the second and third arguments.
+    Each element of the result is `(bool)a[n] ? b[n] : c[n]`.
+  }];
+  let arguments = (ins IntegerVector:$cond, CIR_VectorType:$vec1,
+		       CIR_VectorType:$vec2);
+  let results = (outs CIR_VectorType:$result);
+  let assemblyFormat = [{
+    `(` $cond `,` $vec1 `,` $vec2 `)` `:` qualified(type($cond)) `,`
+    qualified(type($vec1)) attr-dict
+  }];
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// VecShuffle
+//===----------------------------------------------------------------------===//
+
+// TODO: Create an interface that both VecShuffleOp and VecShuffleDynamicOp
+// implement.  This could be useful for passes that don't care how the vector
+// shuffle was specified.
+
+def VecShuffleOp : CIR_Op<"vec.shuffle",
+                   [Pure, AllTypesMatch<["vec1", "vec2"]>]> {
+  let summary = "Combine two vectors using indices passed as constant integers";
+  let description = [{
+    The `cir.vec.shuffle` operation implements the documented form of Clang's
+    __builtin_shufflevector, where the indices of the shuffled result are
+    integer constants.
+
+    The two input vectors, which must have the same type, are concatenated.
+    Each of the integer constant arguments is interpreted as an index into that
+    concatenated vector, with a value of -1 meaning that the result value
+    doesn't matter. The result vector, which must have the same element type as
+    the input vectors and the same number of elements as the list of integer
+    constant indices, is constructed by taking the elements at the given
+    indices from the concatenated vector.  The size of the result vector does
+    not have to match the size of the individual input vectors or of the
+    concatenated vector.
+  }];
+  let arguments = (ins CIR_VectorType:$vec1, CIR_VectorType:$vec2,
+		       ArrayAttr:$indices);
+  let results = (outs CIR_VectorType:$result);
+  let assemblyFormat = [{
+    `(` $vec1 `,` $vec2 `:` qualified(type($vec1)) `)` $indices `:`
+     qualified(type($result)) attr-dict
+  }];
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// VecShuffleDynamic
+//===----------------------------------------------------------------------===//
+
+def VecShuffleDynamicOp : CIR_Op<"vec.shuffle.dynamic",
+                          [Pure, AllTypesMatch<["vec", "result"]>]> {
+  let summary = "Shuffle a vector using indices in another vector";
+  let description = [{
+    The `cir.vec.shuffle.dynamic` operation implements the undocumented form of
+    Clang's __builtin_shufflevector, where the indices of the shuffled result
+    can be runtime values.
+
+    There are two input vectors, which must have the same number of elements.
+    The second input vector must have an integral element type. The elements of
+    the second vector are interpreted as indices into the first vector. The
+    result vector is constructed by taking the elements from the first input
+    vector from the indices indicated by the elements of the second vector.
+  }];
+  let arguments = (ins CIR_VectorType:$vec, IntegerVector:$indices);
+  let results = (outs CIR_VectorType:$result);
+  let assemblyFormat = [{
+    $vec `:` qualified(type($vec)) `,` $indices `:` qualified(type($indices))
+    attr-dict
+  }];
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// BaseClassAddr
+//===----------------------------------------------------------------------===//
+
+def BaseClassAddrOp : CIR_Op<"base_class_addr"> {
+  let summary = "Get the base class address for a class/struct";
+  let description = [{
+    The `cir.base_class_addr` operaration gets the address of a particular
+    base class given a derived class pointer.
+
+    Example:
+    ```mlir
+    TBD
+    ```
+  }];
+
+  let arguments = (ins
+    Arg<CIR_PointerType, "derived class pointer", [MemRead]>:$derived_addr);
+
+  let results = (outs Res<CIR_PointerType, "">:$base_addr);
+
+  let assemblyFormat = [{
+    `(`
+      $derived_addr `:` qualified(type($derived_addr))
+    `)` `->` qualified(type($base_addr)) attr-dict
+  }];
+
+  // FIXME: add verifier.
+  // Check whether both src/dst pointee's are compatible.
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// FuncOp
+//===----------------------------------------------------------------------===//
+
+// The enumeration values are not necessarily in sync with `clang::CallingConv`
+// or `llvm::CallingConv`.
+def CC_C : I32EnumAttrCase<"C", 1, "c">;
+def CC_SpirKernel : I32EnumAttrCase<"SpirKernel", 2, "spir_kernel">;
+def CC_SpirFunction : I32EnumAttrCase<"SpirFunction", 3, "spir_function">;
+
+def CallingConv : I32EnumAttr<
+    "CallingConv",
+    "calling convention",
+    [CC_C, CC_SpirKernel, CC_SpirFunction]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def FuncOp : CIR_Op<"func", [
+  AutomaticAllocationScope, CallableOpInterface, FunctionOpInterface,
+  DeclareOpInterfaceMethods<CIRGlobalValueInterface>,
+  IsolatedFromAbove
+]> {
+  let summary = "Declare or define a function";
+  let description = [{
+
+    Similar to `mlir::FuncOp` built-in:
+    > Operations within the function cannot implicitly capture values defined
+    > outside of the function, i.e. Functions are `IsolatedFromAbove`. All
+    > external references must use function arguments or attributes that establish
+    > a symbolic connection (e.g. symbols referenced by name via a string
+    > attribute like SymbolRefAttr). An external function declaration (used when
+    > referring to a function declared in some other module) has no body. While
+    > the MLIR textual form provides a nice inline syntax for function arguments,
+    > they are internally represented as “block arguments” to the first block in
+    > the region.
+    >
+    > Only dialect attribute names may be specified in the attribute dictionaries
+    > for function arguments, results, or the function itself.
+
+    The function linkage information is specified by `linkage`, as defined by
+    `GlobalLinkageKind` attribute.
+
+    The `calling_conv` attribute specifies the calling convention of the function.
+    The default calling convention is `CallingConv::C`.
+
+    A compiler builtin function must be marked as `builtin` for further
+    processing when lowering from CIR.
+
+    The `coroutine` keyword is used to mark coroutine function, which requires
+    at least one `cir.await` instruction to be used in its body.
+
+    The `lambda` translates to a C++ `operator()` that implements a lambda, this
+    allow callsites to make certain assumptions about the real function nature
+    when writing analysis. The verifier should, but do act on this keyword yet.
+
+    The `no_proto` keyword is used to identify functions that were declared
+    without a prototype and, consequently, may contain calls with invalid
+    arguments and undefined behavior.
+
+    The `extra_attrs`, which is an aggregate of function-specific attributes is
+    required and mandatory to describle additional attributes that are not listed
+    above. Though mandatory, the prining of the attribute can be omitted if it is
+    empty.
+
+    The `global_ctor` indicates whether a function should execute before `main()`
+    function, as specified by `__attribute__((constructor))`. A execution priority
+    can also be specified `global_ctor(<prio>)`. Similarly, for global destructors
+    both `global_dtor` and `global_dtor(<prio>)` are available.
+
+    Example:
+
+    ```mlir
+    // External function definitions.
+    cir.func @abort()
+
+    // A function with internal linkage.
+    cir.func internal @count(%x: i64) -> (i64)
+      return %x : i64
+    }
+
+    // Linkage information
+    cir.func linkonce_odr @some_method(...)
+
+    // Calling convention information
+    cir.func @another_func(...) cc(spir_kernel) extra(#fn_attr)
+
+    // Builtin function
+    cir.func builtin @__builtin_coro_end(!cir.ptr<i8>, !cir.bool) -> !cir.bool
+
+    // Coroutine
+    cir.func coroutine @_Z10silly_taskv() -> !CoroTask {
+      ...
+      cir.await(...)
+      ...
+    }
+    ```
+  }];
+
+  let arguments = (ins SymbolNameAttr:$sym_name,
+                       VisibilityAttr:$global_visibility,
+                       TypeAttrOf<CIR_FuncType>:$function_type,
+                       UnitAttr:$builtin,
+                       UnitAttr:$coroutine,
+                       UnitAttr:$lambda,
+                       UnitAttr:$no_proto,
+                       UnitAttr:$dsolocal,
+                       DefaultValuedAttr<GlobalLinkageKind,
+                                         "GlobalLinkageKind::ExternalLinkage">:$linkage,
+                       DefaultValuedAttr<CallingConv,
+                                         "CallingConv::C">:$calling_conv,
+                       ExtraFuncAttr:$extra_attrs,
+                       OptionalAttr<StrAttr>:$sym_visibility,
+                       UnitAttr:$comdat,
+                       OptionalAttr<DictArrayAttr>:$arg_attrs,
+                       OptionalAttr<DictArrayAttr>:$res_attrs,
+                       OptionalAttr<FlatSymbolRefAttr>:$aliasee,
+                       OptionalAttr<GlobalCtorAttr>:$global_ctor,
+                       OptionalAttr<GlobalDtorAttr>:$global_dtor,
+                       OptionalAttr<ArrayAttr>:$annotations,
+                       OptionalAttr<AnyASTFunctionDeclAttr>:$ast);
+  let regions = (region AnyRegion:$body);
+  let skipDefaultBuilders = 1;
+
+  let builders = [OpBuilder<(ins
+    "StringRef":$name, "FuncType":$type,
+    CArg<"GlobalLinkageKind", "GlobalLinkageKind::ExternalLinkage">:$linkage,
+    CArg<"CallingConv", "CallingConv::C">:$callingConv,
+    CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs,
+    CArg<"ArrayRef<DictionaryAttr>", "{}">:$argAttrs)
+  >];
+
+  let extraClassDeclaration = [{
+    /// Returns the region on the current operation that is callable. This may
+    /// return null in the case of an external callable object, e.g. an external
+    /// function.
+    ::mlir::Region *getCallableRegion();
+
+    /// Returns the results types that the callable region produces when
+    /// executed.
+    ArrayRef<Type> getCallableResults() {
+      if (::llvm::isa<cir::VoidType>(getFunctionType().getReturnType()))
+        return {};
+      return getFunctionType().getReturnTypes();
+    }
+
+    /// Returns the argument attributes for all callable region arguments or
+    /// null if there are none.
+    ::mlir::ArrayAttr getCallableArgAttrs() {
+      return getArgAttrs().value_or(nullptr);
+    }
+
+    /// Returns the result attributes for all callable region results or null if
+    /// there are none.
+    ::mlir::ArrayAttr getCallableResAttrs() {
+      return getResAttrs().value_or(nullptr);
+    }
+
+    /// Returns the argument types of this function.
+    ArrayRef<Type> getArgumentTypes() { return getFunctionType().getInputs(); }
+
+    /// Returns the result types of this function.
+    ArrayRef<Type> getResultTypes() { return getFunctionType().getReturnTypes(); }
+
+    /// Hook for OpTrait::FunctionOpInterfaceTrait, called after verifying that
+    /// the 'type' attribute is present and checks if it holds a function type.
+    /// Ensures getType, getNumFuncArguments, and getNumFuncResults can be
+    /// called safely.
+    LogicalResult verifyType();
+
+    //===------------------------------------------------------------------===//
+    // SymbolOpInterface Methods
+    //===------------------------------------------------------------------===//
+
+    bool isDeclaration();
+
+    bool hasAvailableExternallyLinkage() {
+      return mlir::cir::isAvailableExternallyLinkage(getLinkage());
+    }
+  }];
+
+  let hasCustomAssemblyFormat = 1;
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// IntrinsicCallOp
+//===----------------------------------------------------------------------===//
+
+def IntrinsicCallOp : CIR_Op<"llvm.intrinsic"> {
+  let summary = "Call to intrinsic functions that is not defined in CIR";
+  let description = [{
+    `cir.llvm.intrinsic` operation represents a call-like expression which has
+    return type and arguments that maps directly to a llvm intrinsic.
+    It only records intrinsic `intrinsic_name`.
+  }];
+
+  let results = (outs Optional<CIR_AnyType>:$result);
+  let arguments = (ins
+                   StrAttr:$intrinsic_name, Variadic<CIR_AnyType>:$arg_ops);
+
+  let skipDefaultBuilders = 1;
+
+  let assemblyFormat = [{
+    $intrinsic_name $arg_ops `:` functional-type($arg_ops, $result) attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins "mlir::StringAttr":$intrinsic_name, "mlir::Type":$resType,
+              CArg<"ValueRange", "{}">:$operands), [{
+      $_state.addAttribute("intrinsic_name", intrinsic_name);
+      $_state.addOperands(operands);
+      if (resType)
+        $_state.addTypes(resType);
+    }]>,
+  ];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// CallOp and TryCallOp
+//===----------------------------------------------------------------------===//
+
+class CIR_CallOp<string mnemonic, list<Trait> extra_traits = []> :
+    Op<CIR_Dialect, mnemonic,
+       !listconcat(extra_traits,
+                   [DeclareOpInterfaceMethods<CIRCallOpInterface>,
+                    DeclareOpInterfaceMethods<SymbolUserOpInterface>])> {
+  let extraClassDeclaration = [{
+    /// Get the argument operands to the called function.
+    OperandRange getArgOperands() {
+      return {arg_operand_begin(), arg_operand_end()};
+    }
+
+    MutableOperandRange getArgOperandsMutable() {
+      llvm_unreachable("NYI");
+    }
+
+    /// Return the callee of this operation
+    CallInterfaceCallable getCallableForCallee() {
+      return (*this)->getAttrOfType<SymbolRefAttr>("callee");
+    }
+
+    /// Set the callee for this operation.
+    void setCalleeFromCallable(::mlir::CallInterfaceCallable callee) {
+      if (auto calling =
+              (*this)->getAttrOfType<mlir::SymbolRefAttr>(getCalleeAttrName()))
+        (*this)->setAttr(getCalleeAttrName(), callee.get<mlir::SymbolRefAttr>());
+      setOperand(0, callee.get<mlir::Value>());
+    }
+
+    bool isIndirect() { return !getCallee(); }
+    mlir::Value getIndirectCall();
+  }];
+
+  let hasCustomAssemblyFormat = 1;
+  let skipDefaultBuilders = 1;
+  let hasVerifier = 0;
+
+  dag commonArgs = (ins
+    OptionalAttr<FlatSymbolRefAttr>:$callee,
+    Variadic<CIR_AnyType>:$arg_ops,
+    DefaultValuedAttr<CallingConv, "CallingConv::C">:$calling_conv,
+    ExtraFuncAttr:$extra_attrs,
+    OptionalAttr<ASTCallExprInterface>:$ast
+  );
+}
+
+def CallOp : CIR_CallOp<"call", [NoRegionArguments]> {
+  let summary = "call operation";
+  let description = [{
+    Direct and indirect calls.
+
+    For direct calls, the `call` operation represents a direct call to a
+    function that is within the same symbol scope as the call. The operands
+    and result types of the call must match the specified function type.
+    The callee is encoded as a aymbol reference attribute named "callee".
+
+    For indirect calls, the first `mlir::Operation` operand is the call target.
+
+    Given the way indirect calls are encoded, avoid using `mlir::Operation`
+    methods to walk the operands for this operation, instead use the methods
+    provided by `CIRCallOpInterface`.
+
+    If the `cir.call` has the `exception` keyword, the call can throw. In this
+    case, cleanups can be added in the `cleanup` region.
+
+    Example:
+
+    ```mlir
+    // Direct call
+    %2 = cir.call @my_add(%0, %1) : (f32, f32) -> f32
+     ...
+    // Indirect call
+    %20 = cir.call %18(%17)
+     ...
+    // Call that might throw
+    cir.call exception @my_div() -> () cleanup {
+      // call dtor...
+    }
+    ```
+  }];
+
+  let results = (outs Optional<CIR_AnyType>:$result);
+  let arguments = !con((ins
+    UnitAttr:$exception
+  ), commonArgs);
+  let regions = (region AnyRegion:$cleanup);
+
+  let skipDefaultBuilders = 1;
+
+  let builders = [
+    OpBuilder<(ins "SymbolRefAttr":$callee, "mlir::Type":$resType,
+              CArg<"ValueRange", "{}">:$operands,
+              CArg<"CallingConv", "CallingConv::C">:$callingConv,
+              CArg<"UnitAttr", "{}">:$exception), [{
+      $_state.addOperands(operands);
+      if (callee)
+        $_state.addAttribute("callee", callee);
+      $_state.addAttribute("calling_conv",
+        CallingConvAttr::get($_builder.getContext(), callingConv));
+      if (exception)
+        $_state.addAttribute("exception", exception);
+      if (resType && !isa<VoidType>(resType))
+        $_state.addTypes(resType);
+      // Create region placeholder for potential cleanups.
+      $_state.addRegion();
+    }]>,
+    OpBuilder<(ins "Value":$ind_target,
+               "FuncType":$fn_type,
+               CArg<"ValueRange", "{}">:$operands,
+               CArg<"CallingConv", "CallingConv::C">:$callingConv,
+               CArg<"UnitAttr", "{}">:$exception), [{
+      $_state.addOperands(ValueRange{ind_target});
+      $_state.addOperands(operands);
+      if (!fn_type.isVoid())
+        $_state.addTypes(fn_type.getReturnType());
+      $_state.addAttribute("calling_conv",
+        CallingConvAttr::get($_builder.getContext(), callingConv));
+      if (exception)
+        $_state.addAttribute("exception", exception);
+      // Create region placeholder for potential cleanups.
+      $_state.addRegion();
+    }]>
+  ];
+}
+
+def TryCallOp : CIR_CallOp<"try_call",
+      [DeclareOpInterfaceMethods<BranchOpInterface>, Terminator,
+       AttrSizedOperandSegments]> {
+  let summary = "try_call operation";
+  let description = [{
+    Mostly similar to cir.call but requires two destination
+    branches, one for handling exceptions in case its thrown and
+    the other one to follow on regular control-flow.
+
+    Example:
+
+    ```mlir
+    // Direct call
+    %2 = cir.try_call @my_add(%0, %1) ^continue, ^landing_pad : (f32, f32) -> f32
+    ```
+  }];
+
+  let arguments = !con((ins
+    Variadic<CIR_AnyType>:$contOperands,
+    Variadic<CIR_AnyType>:$landingPadOperands
+  ), commonArgs);
+
+  let results = (outs Optional<CIR_AnyType>:$result);
+  let successors = (successor AnySuccessor:$cont,
+                              AnySuccessor:$landing_pad);
+
+  let skipDefaultBuilders = 1;
+
+  let builders = [
+    OpBuilder<(ins "SymbolRefAttr":$callee, "mlir::Type":$resType,
+               "Block *":$cont, "Block *":$landing_pad,
+               CArg<"ValueRange", "{}">:$operands,
+               CArg<"ValueRange", "{}">:$contOperands,
+               CArg<"ValueRange", "{}">:$landingPadOperands,
+               CArg<"CallingConv", "CallingConv::C">:$callingConv), [{
+      $_state.addOperands(operands);
+      if (callee)
+        $_state.addAttribute("callee", callee);
+      if (resType && !isa<VoidType>(resType))
+        $_state.addTypes(resType);
+
+      $_state.addAttribute("calling_conv",
+        CallingConvAttr::get($_builder.getContext(), callingConv));
+
+      // Handle branches
+      $_state.addOperands(contOperands);
+      $_state.addOperands(landingPadOperands);
+      // The TryCall ODS layout is: cont, landing_pad, operands.
+      llvm::copy(::llvm::ArrayRef<int32_t>({
+        static_cast<int32_t>(contOperands.size()),
+        static_cast<int32_t>(landingPadOperands.size()),
+        static_cast<int32_t>(operands.size())
+        }),
+        odsState.getOrAddProperties<Properties>().operandSegmentSizes.begin());
+      $_state.addSuccessors(cont);
+      $_state.addSuccessors(landing_pad);
+    }]>,
+    OpBuilder<(ins "Value":$ind_target,
+               "FuncType":$fn_type,
+               "Block *":$cont, "Block *":$landing_pad,
+               CArg<"ValueRange", "{}">:$operands,
+               CArg<"ValueRange", "{}">:$contOperands,
+               CArg<"ValueRange", "{}">:$landingPadOperands,
+               CArg<"CallingConv", "CallingConv::C">:$callingConv), [{
+      ::llvm::SmallVector<mlir::Value, 4> finalCallOperands({ind_target});
+      finalCallOperands.append(operands.begin(), operands.end());
+      $_state.addOperands(finalCallOperands);
+
+      if (!fn_type.isVoid())
+        $_state.addTypes(fn_type.getReturnType());
+
+      $_state.addAttribute("calling_conv",
+        CallingConvAttr::get($_builder.getContext(), callingConv));
+
+      // Handle branches
+      $_state.addOperands(contOperands);
+      $_state.addOperands(landingPadOperands);
+      // The TryCall ODS layout is: cont, landing_pad, operands.
+      llvm::copy(::llvm::ArrayRef<int32_t>({
+        static_cast<int32_t>(contOperands.size()),
+        static_cast<int32_t>(landingPadOperands.size()),
+        static_cast<int32_t>(finalCallOperands.size())
+        }),
+        odsState.getOrAddProperties<Properties>().operandSegmentSizes.begin());
+      $_state.addSuccessors(cont);
+      $_state.addSuccessors(landing_pad);
+    }]>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// AwaitOp
+//===----------------------------------------------------------------------===//
+
+def AK_Initial : I32EnumAttrCase<"init", 1>;
+def AK_User    : I32EnumAttrCase<"user", 2>;
+def AK_Yield   : I32EnumAttrCase<"yield", 3>;
+def AK_Final   : I32EnumAttrCase<"final", 4>;
+
+def AwaitKind : I32EnumAttr<
+    "AwaitKind",
+    "await kind",
+    [AK_Initial, AK_User, AK_Yield, AK_Final]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def AwaitOp : CIR_Op<"await",
+       [DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+        RecursivelySpeculatable, NoRegionArguments]> {
+  let summary = "Wraps C++ co_await implicit logic";
+  let description = [{
+    The under the hood effect of using C++ `co_await expr` roughly
+    translates to:
+
+    ```c++
+    // co_await expr;
+
+    auto &&x = CommonExpr();
+    if (!x.await_ready()) {
+       ...
+       x.await_suspend(...);
+       ...
+    }
+    x.await_resume();
+    ```
+
+    `cir.await` represents this logic by using 3 regions:
+      - ready: covers veto power from x.await_ready()
+      - suspend: wraps actual x.await_suspend() logic
+      - resume: handles x.await_resume()
+
+    Breaking this up in regions allow individual scrutiny of conditions
+    which might lead to folding some of them out. Lowerings coming out
+    of CIR, e.g. LLVM, should use the `suspend` region to track more
+    lower level codegen (e.g. intrinsic emission for coro.save/coro.suspend).
+
+    There are also 4 flavors of `cir.await` available:
+    - `init`: compiler generated initial suspend via implicit `co_await`.
+    - `user`: also known as normal, representing user written co_await's.
+    - `yield`: user written `co_yield` expressions.
+    - `final`: compiler generated final suspend via implicit `co_await`.
+
+    From the C++ snippet we get:
+
+    ```mlir
+      cir.scope {
+        ... // auto &&x = CommonExpr();
+        cir.await(user, ready : {
+          ... // x.await_ready()
+        }, suspend : {
+          ... // x.await_suspend()
+        }, resume : {
+          ... // x.await_resume()
+        })
+      }
+    ```
+
+    Note that resulution of the common expression is assumed to happen
+    as part of the enclosing await scope.
+  }];
+
+  let arguments = (ins AwaitKind:$kind);
+  let regions = (region SizedRegion<1>:$ready,
+                        SizedRegion<1>:$suspend,
+                        SizedRegion<1>:$resume);
+  let assemblyFormat = [{
+    `(` $kind `,`
+    `ready` `:` $ready `,`
+    `suspend` `:` $suspend `,`
+    `resume` `:` $resume `,`
+    `)`
+    attr-dict
+  }];
+
+  let skipDefaultBuilders = 1;
+  let builders = [
+    OpBuilder<(ins
+      "mlir::cir::AwaitKind":$kind,
+      CArg<"function_ref<void(OpBuilder &, Location)>",
+           "nullptr">:$readyBuilder,
+      CArg<"function_ref<void(OpBuilder &, Location)>",
+           "nullptr">:$suspendBuilder,
+      CArg<"function_ref<void(OpBuilder &, Location)>",
+           "nullptr">:$resumeBuilder
+      )>
+  ];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// TryOp
+//===----------------------------------------------------------------------===//
+
+// Represents the unwind region where unwind continues or
+// the program std::terminate's.
+def CatchUnwind : CIRUnitAttr<"CatchUnwind", "unwind"> {
+  let storageType = [{ CatchUnwind }];
+}
+
+// Represents the catch_all region.
+def CatchAllAttr : CIRUnitAttr<"CatchAll", "all"> {
+  let storageType = [{ CatchAllAttr }];
+}
+
+def TryOp : CIR_Op<"try",
+       [DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+        RecursivelySpeculatable, AutomaticAllocationScope,
+        NoRegionArguments]> {
+  let summary = "C++ try block";
+  let description = [{
+    ```mlir
+
+    Holds the lexical scope of `try {}`. Note that resources used on catch
+    clauses are usually allocated in the same parent as `cir.try`.
+
+    `synthetic`: use `cir.try` to represent try/catches not originally
+    present in the source code (e.g. `g = new Class` under `-fexceptions`).
+
+    `cleanup`: signal to targets (LLVM for now) that this try/catch, needs
+    to specially tag their landing pads as needing "cleanup".
+
+    Example: TBD
+    ```
+  }];
+
+  let arguments = (ins UnitAttr:$synthetic, UnitAttr:$cleanup,
+                       OptionalAttr<ArrayAttr>:$catch_types);
+  let regions = (region AnyRegion:$try_region,
+                        VariadicRegion<AnyRegion>:$catch_regions);
+
+  let assemblyFormat = [{
+    (`synthetic` $synthetic^)?
+    (`cleanup` $cleanup^)?
+    $try_region
+    custom<CatchRegions>($catch_regions, $catch_types)
+    attr-dict
+  }];
+
+  // Everything already covered elsewhere.
+  let hasVerifier = 0;
+  let builders = [
+    OpBuilder<(ins
+              "function_ref<void(OpBuilder &, Location)>":$tryBuilder,
+              "function_ref<void(OpBuilder &, Location, OperationState &)>"
+              :$catchBuilder)>,
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// CatchParamOp
+//===----------------------------------------------------------------------===//
+
+def CatchParamBegin : I32EnumAttrCase<"begin",  0>;
+def CatchParamEnd   : I32EnumAttrCase<"end",  1>;
+def CatchParamKind  : I32EnumAttr<
+    "CatchParamKind",
+    "Designate limits for begin/end of catch param handling",
+    [CatchParamBegin, CatchParamEnd]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def CatchParamOp : CIR_Op<"catch_param"> {
+  let summary = "Represents catch clause formal parameter";
+  let description = [{
+    The `cir.catch_param` can operate in two modes: within catch regions of
+    `cir.try` or anywhere else with the `begin` or `end` markers. The `begin`
+    version requires an exception pointer of `cir.ptr<!void>`.
+
+    Example:
+    ```mlir
+    // TBD
+    ```
+  }];
+
+  let arguments = (ins Optional<VoidPtr>:$exception_ptr,
+                       OptionalAttr<CatchParamKind>:$kind);
+  let results = (outs Optional<CIR_AnyType>:$param);
+  let assemblyFormat = [{
+    ($kind^)?
+    ($exception_ptr^)?
+    (`->` qualified(type($param))^)?
+    attr-dict
+  }];
+
+  let extraClassDeclaration = [{
+    bool isBegin() { return getKind() == mlir::cir::CatchParamKind::begin; }
+    bool isEnd() { return getKind() == mlir::cir::CatchParamKind::end; }
+  }];
+
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Exception related: EhInflightOp, EhTypeIdOp
+//===----------------------------------------------------------------------===//
+
+def EhInflightOp : CIR_Op<"eh.inflight_exception"> {
+  let summary = "Materialize the catch clause formal parameter";
+  let description = [{
+    `cir.eh.inflight_exception` returns two values:
+      - `exception_ptr`: The exception pointer for the inflight exception
+      - `type_id`: pointer to the exception object
+    This operation is expected to be the first one basic blocks on the
+    exception path out of `cir.try_call` operations.
+
+    The `cleanup` attribute indicates that clean up code might run before the
+    values produced by this operation are used to gather exception information.
+    This helps CIR to pass down more accurate information for LLVM lowering
+    to landingpads.
+  }];
+
+  let arguments = (ins UnitAttr:$cleanup,
+                       OptionalAttr<FlatSymbolRefArrayAttr>:$sym_type_list);
+  let results = (outs VoidPtr:$exception_ptr, UInt32:$type_id);
+  let assemblyFormat = [{
+    (`cleanup` $cleanup^)?
+    ($sym_type_list^)?
+    attr-dict
+  }];
+
+  let hasVerifier = 0;
+}
+
+def EhTypeIdOp : CIR_Op<"eh.typeid",
+  [Pure, DeclareOpInterfaceMethods<SymbolUserOpInterface>]> {
+  let summary = "Compute exception type id from it's global type symbol";
+  let description = [{
+    Returns the exception type id for a given global symbol representing
+    a type.
+  }];
+
+  let arguments = (ins FlatSymbolRefAttr:$type_sym);
+  let results = (outs UInt32:$type_id);
+  let assemblyFormat = [{
+    $type_sym attr-dict
+  }];
+
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// CopyOp
+//===----------------------------------------------------------------------===//
+
+def CopyOp : CIR_Op<"copy",
+             [SameTypeOperands,
+              DeclareOpInterfaceMethods<PromotableMemOpInterface>]> {
+  let arguments = (ins Arg<CIR_PointerType, "", [MemWrite]>:$dst,
+                       Arg<CIR_PointerType, "", [MemRead]>:$src,
+                       UnitAttr:$is_volatile);
+  let summary = "Copies contents from a CIR pointer to another";
+  let description = [{
+    Given two CIR pointers, `src` and `dst`, `cir.copy` will copy the memory
+    pointed by `src` to the memory pointed by `dst`.
+
+    The amount of bytes copied is inferred from the pointee type. Naturally,
+    the pointee type of both `src` and `dst` must match and must implement
+    the `DataLayoutTypeInterface`.
+
+    Examples:
+
+    ```mlir
+      // Copying contents from one struct to another:
+      cir.copy %0 to %1 : !cir.ptr<!struct_ty>
+    ```
+  }];
+
+  let assemblyFormat = [{$src `to` $dst (`volatile` $is_volatile^)?
+                        attr-dict `:` qualified(type($dst)) }];
+  let hasVerifier = 1;
+
+  let extraClassDeclaration = [{
+    /// Returns the pointer type being copied.
+    mlir::cir::PointerType getType() { return getSrc().getType(); }
+
+    /// Returns the number of bytes to be copied.
+    unsigned getLength() {
+      return DataLayout::closest(*this).getTypeSize(getType().getPointee());
+    }
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// MemCpyOp
+//===----------------------------------------------------------------------===//
+
+def MemCpyOp : CIR_Op<"libc.memcpy"> {
+  let arguments = (ins Arg<CIR_PointerType, "", [MemWrite]>:$dst,
+                       Arg<CIR_PointerType, "", [MemRead]>:$src,
+                       PrimitiveInt:$len);
+  let summary = "Equivalent to libc's `memcpy`";
+  let description = [{
+    Given two CIR pointers, `src` and `dst`, `cir.libc.memcpy` will copy `len`
+    bytes from the memory pointed by `src` to the memory pointed by `dst`.
+
+    While `cir.copy` is meant to be used for implicit copies in the code where
+    the length of the copy is known, `cir.memcpy` copies only from and to void
+    pointers, requiring the copy length to be passed as an argument.
+
+    Examples:
+
+    ```mlir
+      // Copying 2 bytes from one array to a struct:
+      %2 = cir.const #cir.int<2> : !u32i
+      cir.libc.memcpy %2 bytes from %arr to %struct : !cir.ptr<!arr> -> !cir.ptr<!struct>
+    ```
+  }];
+
+  let assemblyFormat = [{
+    $len `bytes` `from` $src `to` $dst attr-dict
+    `:` type($len) `` `,` qualified(type($src)) `->` qualified(type($dst))
+  }];
+  let hasVerifier = 1;
+
+  let extraClassDeclaration = [{
+    /// Returns the data source pointer type.
+    mlir::cir::PointerType getSrcTy() { return getSrc().getType(); }
+
+    /// Returns the data destination pointer type.
+    mlir::cir::PointerType getDstTy() { return getDst().getType(); }
+
+    /// Returns the byte length type.
+    mlir::cir::IntType getLenTy() { return getLen().getType(); }
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// MemChrOp
+//===----------------------------------------------------------------------===//
+
+def MemChrOp : CIR_Op<"libc.memchr"> {
+  // TODO: instead of using UInt64 for len, we could make it constrained on
+  // size_t (64 or 32) and have a builder that does the right job.
+  let arguments = (ins Arg<VoidPtr, "", [MemRead]>:$src,
+                       SInt32:$pattern,
+                       UInt64:$len);
+  let summary = "libc's `memchr`";
+  let results = (outs Res<VoidPtr, "">:$result);
+
+  let description = [{
+    Search for `pattern` in data range from `src` to `src` + `len`.
+    provides a bound to the search in `src`. `result` is a pointer to found
+    `pattern` or a null pointer.
+
+    Examples:
+
+    ```mlir
+    %p = cir.libc.memchr(%src, %pattern, %len) -> !cir.ptr<!void>
+    ```
+  }];
+
+  let assemblyFormat = [{
+    `(`
+      $src `,` $pattern `,` $len `)` attr-dict
+  }];
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// StdFindOp
+//===----------------------------------------------------------------------===//
+
+def StdFindOp : CIR_Op<"std.find", [SameFirstSecondOperandAndResultType]> {
+  let arguments = (ins FlatSymbolRefAttr:$original_fn,
+                       CIR_AnyType:$first,
+                       CIR_AnyType:$last,
+                       CIR_AnyType:$pattern);
+  let summary = "std:find()";
+  let results = (outs CIR_AnyType:$result);
+
+  let description = [{
+    Search for `pattern` in data range from `first` to `last`. This currently
+    maps to only one form of `std::find`. The `original_fn` operand tracks the
+    mangled named that can be used when lowering to a `cir.call`.
+
+    Example:
+
+    ```mlir
+    ...
+    %result = cir.std.find(@original_fn,
+                           %first : !T, %last : !T, %pattern : !P) -> !T
+    ```
+  }];
+
+  let assemblyFormat = [{
+    `(`
+      $original_fn
+      `,` $first `:` type($first)
+      `,` $last `:` type($last)
+      `,` $pattern `:` type($pattern)
+    `)` `->` type($result) attr-dict
+  }];
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// IterBegin/End
+//===----------------------------------------------------------------------===//
+
+def IterBeginOp : CIR_Op<"iterator_begin"> {
+  let arguments = (ins FlatSymbolRefAttr:$original_fn, CIR_AnyType:$container);
+  let summary = "Returns an iterator to the first element of a container";
+  let results = (outs CIR_AnyType:$result);
+  let assemblyFormat = [{
+    `(`
+      $original_fn `,` $container `:` type($container)
+    `)` `->` type($result) attr-dict
+  }];
+  let hasVerifier = 0;
+}
+
+def IterEndOp : CIR_Op<"iterator_end"> {
+  let arguments = (ins FlatSymbolRefAttr:$original_fn, CIR_AnyType:$container);
+  let summary = "Returns an iterator to the element following the last element"
+                " of a container";
+  let results = (outs CIR_AnyType:$result);
+  let assemblyFormat = [{
+    `(`
+      $original_fn `,` $container `:` type($container)
+    `)` `->` type($result) attr-dict
+  }];
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Ops
+//===----------------------------------------------------------------------===//
+
+class UnaryFPToIntBuiltinOp<string mnemonic, string llvmOpName>
+    : CIR_Op<mnemonic, [Pure]> {
+  let arguments = (ins CIR_AnyFloat:$src);
+  let results = (outs CIR_IntType:$result);
+
+  let summary = [{
+    Builtin function that takes a floating-point value as input and produces an
+    integral value as output.
+  }];
+
+  let assemblyFormat = [{
+    $src `:` type($src) `->` type($result) attr-dict
+  }];
+
+  let llvmOp = llvmOpName;
+}
+
+def LroundOp : UnaryFPToIntBuiltinOp<"lround", "LroundOp">;
+def LLroundOp : UnaryFPToIntBuiltinOp<"llround", "LlroundOp">;
+def LrintOp : UnaryFPToIntBuiltinOp<"lrint", "LrintOp">;
+def LLrintOp : UnaryFPToIntBuiltinOp<"llrint", "LlrintOp">;
+
+class UnaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
+    : CIR_Op<mnemonic, [Pure, SameOperandsAndResultType]> {
+  let arguments = (ins CIR_AnyFloat:$src);
+  let results = (outs CIR_AnyFloat:$result);
+  let summary = "libc builtin equivalent ignoring "
+                "floating point exceptions and errno";
+  let assemblyFormat = "$src `:` type($src) attr-dict";
+
+  let llvmOp = llvmOpName;
+}
+
+def CeilOp : UnaryFPToFPBuiltinOp<"ceil", "FCeilOp">;
+def CosOp : UnaryFPToFPBuiltinOp<"cos", "CosOp">;
+def ExpOp : UnaryFPToFPBuiltinOp<"exp", "ExpOp">;
+def Exp2Op : UnaryFPToFPBuiltinOp<"exp2", "Exp2Op">;
+def FloorOp : UnaryFPToFPBuiltinOp<"floor", "FFloorOp">;
+def FAbsOp : UnaryFPToFPBuiltinOp<"fabs", "FAbsOp">;
+def LogOp : UnaryFPToFPBuiltinOp<"log", "LogOp">;
+def Log10Op : UnaryFPToFPBuiltinOp<"log10", "Log10Op">;
+def Log2Op : UnaryFPToFPBuiltinOp<"log2", "Log2Op">;
+def NearbyintOp : UnaryFPToFPBuiltinOp<"nearbyint", "NearbyintOp">;
+def RintOp : UnaryFPToFPBuiltinOp<"rint", "RintOp">;
+def RoundOp : UnaryFPToFPBuiltinOp<"round", "RoundOp">;
+def SinOp : UnaryFPToFPBuiltinOp<"sin", "SinOp">;
+def SqrtOp : UnaryFPToFPBuiltinOp<"sqrt", "SqrtOp">;
+def TruncOp : UnaryFPToFPBuiltinOp<"trunc", "FTruncOp">;
+
+class BinaryFPToFPBuiltinOp<string mnemonic, string llvmOpName>
+    : CIR_Op<mnemonic, [Pure, SameOperandsAndResultType]> {
+  let summary = [{
+    libc builtin equivalent ignoring floating-point exceptions and errno.
+  }];
+
+  let arguments = (ins CIR_AnyFloat:$lhs, CIR_AnyFloat:$rhs);
+  let results = (outs CIR_AnyFloat:$result);
+
+  let assemblyFormat = [{
+    $lhs `,` $rhs `:` qualified(type($lhs)) attr-dict
+  }];
+
+  let llvmOp = llvmOpName;
+}
+
+def CopysignOp : BinaryFPToFPBuiltinOp<"copysign", "CopySignOp">;
+def FMaxOp : BinaryFPToFPBuiltinOp<"fmax", "MaxNumOp">;
+def FMinOp : BinaryFPToFPBuiltinOp<"fmin", "MinNumOp">;
+def FModOp : BinaryFPToFPBuiltinOp<"fmod", "FRemOp">;
+def PowOp : BinaryFPToFPBuiltinOp<"pow", "PowOp">;
+
+//===----------------------------------------------------------------------===//
+// Assume Operations
+//===----------------------------------------------------------------------===//
+
+def AssumeOp : CIR_Op<"assume"> {
+  let summary = "Tell the optimizer that a boolean value is true";
+  let description = [{
+    The `cir.assume` operation takes a single boolean prediate as its only
+    argument and does not have any results. The operation tells the optimizer
+    that the predicate's value is true.
+
+    This operation corresponds to the `__assume` and the `__builtin_assume`
+    builtin function.
+  }];
+
+  let arguments = (ins CIR_BoolType:$predicate);
+  let results = (outs);
+
+  let assemblyFormat = [{
+    $predicate `:` type($predicate) attr-dict
+  }];
+}
+
+def AssumeAlignedOp
+    : CIR_Op<"assume.aligned", [Pure, AllTypesMatch<["pointer", "result"]>]> {
+  let summary = "Tell the optimizer that a pointer is aligned";
+  let description = [{
+    The `cir.assume.aligned` operation takes two or three arguments.
+
+    When the 3rd argument `offset` is absent, this operation tells the optimizer
+    that the pointer given by the `pointer` argument is aligned to the alignment
+    given by the `align` argument.
+
+    When the `offset` argument is given, it represents an offset from the
+    alignment. This operation then tells the optimizer that the pointer given by
+    the `pointer` argument is always misaligned by the alignment given by the
+    `align` argument by `offset` bytes, a.k.a. the pointer yielded by
+    `(char *)pointer - offset` is aligned to the specified alignment.
+
+    The `align` argument is a constant integer represented as an integer
+    attribute instead of an SSA value. It must be a positive integer.
+
+    The result of this operation has the same value as the `pointer` argument,
+    but the optimizer has additional knowledge about its alignment.
+
+    This operation corresponds to the `__builtin_assume_aligned` builtin
+    function.
+  }];
+
+  let arguments = (ins CIR_PointerType:$pointer,
+                       I64Attr:$alignment,
+                       Optional<CIR_IntType>:$offset);
+  let results = (outs CIR_PointerType:$result);
+
+  let assemblyFormat = [{
+    $pointer `:` qualified(type($pointer))
+    `[` `alignment` $alignment (`,` `offset` $offset^ `:` type($offset))? `]`
+    attr-dict
+  }];
+}
+
+def AssumeSepStorageOp : CIR_Op<"assume.separate_storage", [SameTypeOperands]> {
+  let summary =
+      "Tell the optimizer that two pointers point to different allocations";
+  let description = [{
+    The `cir.assume.separate_storage` operation takes two pointers as arguments,
+    and the operation tells the optimizer that these two pointers point to
+    different allocations.
+
+    This operation corresponds to the `__builtin_assume_separate_storage`
+    builtin function.
+  }];
+
+  let arguments = (ins VoidPtr:$ptr1, VoidPtr:$ptr2);
+
+  let assemblyFormat = [{
+    $ptr1 `,` $ptr2 `:` qualified(type($ptr1)) attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Probability Operations
+//===----------------------------------------------------------------------===//
+
+def ExpectOp : CIR_Op<"expect",
+  [Pure, AllTypesMatch<["result", "val", "expected"]>]> {
+  let summary =
+    "Compute whether expression is likely to evaluate to a specified value";
+  let description = [{
+    Provides __builtin_expect functionality in Clang IR.
+
+    If $prob is not specified, then behaviour is same as __builtin_expect.
+    If specified, then behaviour is same as __builtin_expect_with_probability,
+    where probability = $prob.
+  }];
+
+  let arguments = (ins PrimitiveInt:$val,
+                       PrimitiveInt:$expected,
+                       OptionalAttr<F64Attr>:$prob);
+  let results = (outs PrimitiveInt:$result);
+  let assemblyFormat = [{
+    `(` $val`,` $expected (`,` $prob^)? `)` `:` type($val) attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// Variadic Operations
+//===----------------------------------------------------------------------===//
+
+def VAStartOp : CIR_Op<"va.start">, Arguments<(ins CIR_PointerType:$arg_list)> {
+  let summary = "Starts a variable argument list";
+  let assemblyFormat = "$arg_list attr-dict `:` type(operands)";
+  let hasVerifier = 0;
+}
+
+def VAEndOp : CIR_Op<"va.end">, Arguments<(ins CIR_PointerType:$arg_list)> {
+  let summary = "Ends a variable argument list";
+  let assemblyFormat = "$arg_list attr-dict `:` type(operands)";
+  let hasVerifier = 0;
+}
+
+def VACopyOp : CIR_Op<"va.copy">,
+               Arguments<(ins CIR_PointerType:$dst_list,
+                              CIR_PointerType:$src_list)> {
+  let summary = "Copies a variable argument list";
+  let assemblyFormat = "$src_list `to` $dst_list attr-dict `:` type(operands)";
+  let hasVerifier = 0;
+}
+
+def VAArgOp : CIR_Op<"va.arg">,
+              Results<(outs CIR_AnyType:$result)>,
+              Arguments<(ins CIR_PointerType:$arg_list)> {
+  let summary = "Fetches next variadic element as a given type";
+  let assemblyFormat = "$arg_list attr-dict `:` functional-type(operands, $result)";
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// AllocExceptionOp
+//===----------------------------------------------------------------------===//
+
+def AllocExceptionOp : CIR_Op<"alloc.exception"> {
+  let summary = "Allocates an exception according to Itanium ABI";
+  let description = [{
+    Implements a slightly higher level __cxa_allocate_exception:
+
+    `void *__cxa_allocate_exception(size_t thrown_size);`
+
+    If operation fails, program terminates, not throw.
+
+    Example:
+
+    ```mlir
+    // if (b == 0) {
+    //   ...
+    //   throw "...";
+    cir.if %10 {
+        %11 = cir.alloc_exception 8 -> !cir.ptr<!void>
+        ... // store exception content into %11
+        cir.throw %11 : !cir.ptr<!cir.ptr<!u8i>>, ...
+    ```
+  }];
+
+  let arguments = (ins I64Attr:$size);
+  let results = (outs Res<CIR_PointerType, "",
+                      [MemAlloc<DefaultResource>]>:$addr);
+
+  let assemblyFormat = [{
+    $size `->` qualified(type($addr)) attr-dict
+  }];
+
+  // Constraints verified elsewhere.
+  let hasVerifier = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// ThrowOp
+//===----------------------------------------------------------------------===//
+
+def ThrowOp : CIR_Op<"throw"> {
+  let summary = "(Re)Throws an exception";
+  let description = [{
+    Very similar to __cxa_throw:
+
+    ```
+    void __cxa_throw(void *thrown_exception, std::type_info *tinfo,
+                     void (*dest) (void *));
+    ```
+
+    The absense of arguments for `cir.throw` means it rethrows.
+
+    For the no-rethrow version, it must have at least two operands, the RTTI
+    information, a pointer to the exception object (likely allocated via
+    `cir.cxa.allocate_exception`) and finally an optional dtor, which might
+    run as part of this operation.
+
+    ```mlir
+      // if (b == 0)
+      //   throw "Division by zero condition!";
+      cir.if %10 {
+        %11 = cir.alloc_exception 8 -> !cir.ptr<!void>
+        ...
+        cir.store %13, %11 : // Store string addr for "Division by zero condition!"
+        cir.throw %11 : !cir.ptr<!cir.ptr<!u8i>>, @"typeinfo for char const*"
+    ```
+  }];
+
+  let arguments = (ins Optional<CIR_PointerType>:$exception_ptr,
+                       OptionalAttr<FlatSymbolRefAttr>:$type_info,
+                       OptionalAttr<FlatSymbolRefAttr>:$dtor);
+
+  let assemblyFormat = [{
+    ($exception_ptr^ `:` type($exception_ptr))?
+    (`,` $type_info^)?
+    (`,` $dtor^)?
+    attr-dict
+  }];
+
+  let extraClassDeclaration = [{
+    bool rethrows() { return getNumOperands() == 0; }
+  }];
+
+  let hasVerifier = 1;
+}
+
+def StackSaveOp : CIR_Op<"stack_save"> {
+  let summary = "remembers the current state of the function stack";
+  let description = [{
+    Remembers the current state of the function stack. Returns a pointer
+    that later can be passed into cir.stack_restore.
+    Useful for implementing language features like variable length arrays.
+
+    ```mlir
+    %0 = cir.stack_save : <!u8i>
+    ```
+
+  }];
+
+  let results = (outs CIR_PointerType:$result);
+  let assemblyFormat = "attr-dict `:` qualified(type($result))";
+}
+
+def StackRestoreOp : CIR_Op<"stack_restore"> {
+  let summary = "restores the state of the function stack";
+  let description = [{
+    Restore the state of the function stack to the state it was
+    in when the corresponding cir.stack_save executed.
+    Useful for implementing language features like variable length arrays.
+
+    ```mlir
+    %0 = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["saved_stack"] {alignment = 8 : i64}
+    %1 = cir.stack_save : <!u8i>
+    cir.store %1, %0 : !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>
+    %2 = cir.load %0 : !cir.ptr<!cir.ptr<!u8i>>, !cir.ptr<!u8i>
+    cir.stack_restore %2 : !cir.ptr<!u8i>
+    ```
+    }];
+
+  let arguments = (ins CIR_PointerType:$ptr);
+  let assemblyFormat = "$ptr attr-dict `:` qualified(type($ptr))";
+
+  let llvmOp = "StackRestoreOp";
+}
+
+def AsmATT : I32EnumAttrCase<"x86_att", 0>;
+def AsmIntel : I32EnumAttrCase<"x86_intel", 1>;
+
+def AsmFlavor : I32EnumAttr<
+  "AsmFlavor",
+  "ATT or Intel",
+  [AsmATT, AsmIntel]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def CIR_InlineAsmOp : CIR_Op<"asm", [RecursiveMemoryEffects]> {
+  let description = [{
+    The `cir.asm` operation represents C/C++ asm inline.
+
+    CIR constraints strings follow barelly the same rules that are established
+    for the C level assembler constraints with several differences caused by
+    clang::AsmStmt processing.
+
+    Thus, numbers that appears in the constraint string may also refer to:
+    - the output variable index referenced by the input operands.
+    - the index of early-clobber operand
+
+    Operand attributes is a storage, where each element corresponds to the operand with
+    the same index. The first index relates to the operation result (if any).
+    Note, the operands themselves are stored as VariadicOfVariadic in the next order:
+    output, input and then in/out operands.
+
+    Note, when several output operands are present, the result type may be represented as
+    an anon struct type.
+
+    Example:
+    ```C++
+    __asm__("foo" : : : );
+    __asm__("bar $42 %[val]" : [val] "=r" (x), "+&r"(x));
+    __asm__("baz $42 %[val]" : [val] "=r" (x), "+&r"(x) : "[val]"(y));
+    ```
+
+    ```mlir
+    !ty_22anon2E022 = !cir.struct<struct "anon.0" {!cir.int<s, 32>, !cir.int<s, 32>}>
+    !ty_22anon2E122 = !cir.struct<struct "anon.1" {!cir.int<s, 32>, !cir.int<s, 32>}>
+    ...
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init]
+    ...
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+
+    cir.asm(x86_att,
+      out = [],
+      in = [],
+      in_out = [],
+      {"foo" "~{dirflag},~{fpsr},~{flags}"}) side_effects
+
+    cir.asm(x86_att,
+      out = [],
+      in = [],
+      in_out = [%2 : !s32i],
+      {"bar $$42 $0" "=r,=&r,1,~{dirflag},~{fpsr},~{flags}"}) -> !ty_22anon2E022
+
+    cir.asm(x86_att,
+      out = [],
+      in = [%3 : !s32i],
+      in_out = [%2 : !s32i],
+      {"baz $$42 $0" "=r,=&r,0,1,~{dirflag},~{fpsr},~{flags}"}) -> !ty_22anon2E122
+    ```
+  }];
+
+  let results = (outs Optional<CIR_AnyType>:$res);
+
+  let arguments = (
+    ins VariadicOfVariadic<AnyType, "operands_segments">:$operands,
+        StrAttr:$asm_string,
+        StrAttr:$constraints,
+        UnitAttr:$side_effects,
+        AsmFlavor:$asm_flavor,
+        ArrayAttr:$operand_attrs,
+        DenseI32ArrayAttr:$operands_segments
+        );
+
+  let builders = [OpBuilder<(ins
+    "ArrayRef<ValueRange>":$operands,
+    "StringRef":$asm_string,
+    "StringRef":$constraints,
+    "bool":$side_effects,
+    "AsmFlavor":$asm_flavor,
+    "ArrayRef<Attribute>":$operand_attrs
+  )>
+  ];
+
+  let hasCustomAssemblyFormat = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// UnreachableOp
+//===----------------------------------------------------------------------===//
+
+def UnreachableOp : CIR_Op<"unreachable", [Terminator]> {
+  let summary = "invoke immediate undefined behavior";
+  let description = [{
+    If the program control flow reaches a `cir.unreachable` operation, the
+    program exhibits undefined behavior immediately. This operation is useful
+    in cases where the unreachability of a program point needs to be explicitly
+    marked.
+  }];
+
+  let assemblyFormat = "attr-dict";
+}
+
+//===----------------------------------------------------------------------===//
+// TrapOp
+//===----------------------------------------------------------------------===//
+
+def TrapOp : CIR_Op<"trap", [Terminator]> {
+  let summary = "Exit the program abnormally";
+  let description = [{
+    The cir.trap operation causes the program to exit abnormally. The
+    implementations may implement this operation with different mechanisms. For
+    example, an implementation may implement this operation by calling abort,
+    while another implementation may implement this operation by executing an
+    illegal instruction.
+  }];
+
+  let assemblyFormat = "attr-dict";
+}
+
+//===----------------------------------------------------------------------===//
+// PrefetchOp
+//===----------------------------------------------------------------------===//
+
+def PrefetchOp : CIR_Op<"prefetch"> {
+  let summary = "prefetch operation";
+  let description = [{
+    The `cir.prefetch` op prefetches data from the memmory address.
+
+    ```mlir
+    cir.prefetch(%0 : !cir.ptr<!void>) locality(1) write
+    ```
+
+    This opcode has the three attributes:
+    1. The $locality is a temporal locality specifier
+    ranging from (0) - no locality, to (3) - extremely local keep in cache.
+    2. The $isWrite is the specifier determining if the prefetch is prepaired
+    for a 'read' or 'write'.
+    If $isWrite doesn't specified it means that prefetch is prepared for 'read'.
+  }];
+
+  let arguments = (
+    ins VoidPtr:$addr,
+        ConfinedAttr<I32Attr, [IntMinValue<0>,
+          IntMaxValue<3>]>:$locality,
+        UnitAttr:$isWrite);
+
+  let assemblyFormat = [{
+    `(` $addr `:` qualified(type($addr)) `)`
+        `locality``(` $locality `)`
+        (`write` $isWrite^) : (`read`)?
+        attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// ClearCacheOp
+//===----------------------------------------------------------------------===//
+
+def ClearCacheOp : CIR_Op<"clear_cache", [AllTypesMatch<["begin", "end"]>]> {
+  let summary = "clear cache operation";
+  let description = [{
+    CIR representation for `__builtin___clear_cache`.
+  }];
+
+  let arguments = (ins VoidPtr:$begin, VoidPtr:$end);
+  let assemblyFormat = [{
+    $begin `:` qualified(type($begin)) `,`
+    $end `,`
+    attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// ArrayCtor & ArrayDtor
+//===----------------------------------------------------------------------===//
+
+class CIR_ArrayInitDestroy<string mnemonic> : CIR_Op<mnemonic, []> {
+  let arguments = (ins Arg<ArrayPtr, "array address",
+                           [MemWrite, MemRead]>:$addr);
+  let regions = (region SizedRegion<1>:$body);
+  let assemblyFormat = [{
+    `(` $addr `:` qualified(type($addr)) `)` $body attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins "mlir::Value":$addr,
+      "function_ref<void(OpBuilder &, Location)>":$regionBuilder), [{
+        assert(regionBuilder && "builder callback expected");
+        OpBuilder::InsertionGuard guard($_builder);
+        Region *r = $_state.addRegion();
+        $_state.addOperands(ValueRange{addr});
+        $_builder.createBlock(r);
+        regionBuilder($_builder, $_state.location);
+    }]>
+  ];
+}
+
+def ArrayCtor : CIR_ArrayInitDestroy<"array.ctor"> {
+  let summary = "Initialize array elements with C++ constructors";
+  let description = [{
+    Initialize each array element using the same C++ constructor. This
+    operation has one region, with one single block. The block has an
+    incoming argument for the current array index to initialize.
+  }];
+}
+
+def ArrayDtor : CIR_ArrayInitDestroy<"array.dtor"> {
+  let summary = "Destroy array elements with C++ dtors";
+  let description = [{
+    Destroy each array element using the same C++ destructor. This
+    operation has one region, with one single block. The block has an
+    incoming argument for the current array index to initialize.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// IsConstantOp
+//===----------------------------------------------------------------------===//
+
+def IsConstantOp : CIR_Op<"is_constant", [Pure]> {
+  let description = [{
+    Returns `true` if the argument is known to be a compile-time constant
+    otherwise returns 'false'.
+  }];
+  let arguments = (ins CIR_AnyType:$val);
+  let results = (outs CIR_BoolType:$result);
+
+  let assemblyFormat = [{
+    `(` $val `:` type($val) `)` `:` type($result) attr-dict
+  }];
+}
+
+
+def SwitchFlatOp : CIR_Op<"switch.flat", [AttrSizedOperandSegments, Terminator]> {
+
+  let description = [{
+    The `cir.switch.flat` operation is a region-less and simplified version of the `cir.switch`.
+    It's representation is closer to LLVM IR dialect than the C/C++ language feature.
+  }];
+
+  let arguments = (ins
+    CIR_IntType:$condition,
+    Variadic<AnyType>:$defaultOperands,
+    VariadicOfVariadic<AnyType, "case_operand_segments">:$caseOperands,
+    ArrayAttr:$case_values,
+    DenseI32ArrayAttr:$case_operand_segments
+  );
+
+  let successors = (successor
+    AnySuccessor:$defaultDestination,
+    VariadicSuccessor<AnySuccessor>:$caseDestinations
+  );
+
+  let assemblyFormat = [{
+    $condition `:` type($condition) `,`
+    $defaultDestination (`(` $defaultOperands^ `:` type($defaultOperands) `)`)?
+    custom<SwitchFlatOpCases>(ref(type($condition)), $case_values, $caseDestinations,
+                                   $caseOperands, type($caseOperands))
+    attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins "Value":$condition,
+      "Block *":$defaultDestination,
+      "ValueRange":$defaultOperands,
+      CArg<"ArrayRef<APInt>", "{}">:$caseValues,
+      CArg<"BlockRange", "{}">:$caseDestinations,
+      CArg<"ArrayRef<ValueRange>", "{}">:$caseOperands)>
+  ];
+}
+
+//===----------------------------------------------------------------------===//
+// GotoOp
+//===----------------------------------------------------------------------===//
+
+def GotoOp : CIR_Op<"goto", [Terminator]> {
+  let description = [{ Transfers control to the specified label.
+
+  Example:
+  ```C++
+    void foo() {
+      goto exit;
+
+    exit:
+      return;
+    }
+    ```
+
+    ```mlir
+    cir.func @foo() {
+      cir.goto "exit"
+    ^bb1:
+      cir.label "exit"
+      cir.return
+    }
+    ```
+  }];
+  let arguments = (ins StrAttr:$label);
+  let assemblyFormat = [{ $label attr-dict }];
+}
+
+//===----------------------------------------------------------------------===//
+// LabelOp
+//===----------------------------------------------------------------------===//
+
+// The LabelOp has AlwaysSpeculatable trait in order to not to be swept by canonicalizer
+def LabelOp : CIR_Op<"label", [AlwaysSpeculatable]> {
+  let description = [{ An identifier which may be referred by cir.goto operation }];
+  let arguments = (ins StrAttr:$label);
+  let assemblyFormat = [{ $label attr-dict }];
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+// Binary opcodes for atomic fetch.
+def Atomic_Add  : I32EnumAttrCase<"Add",  0, "add">;
+def Atomic_Sub  : I32EnumAttrCase<"Sub",  1, "sub">;
+def Atomic_And  : I32EnumAttrCase<"And",  2, "and">;
+def Atomic_Xor  : I32EnumAttrCase<"Xor",  3, "xor">;
+def Atomic_Or   : I32EnumAttrCase<"Or",   4, "or">;
+def Atomic_Nand : I32EnumAttrCase<"Nand", 5, "nand">;
+def Atomic_Max  : I32EnumAttrCase<"Max",  6, "max">;
+def Atomic_Min  : I32EnumAttrCase<"Min",  7, "min">;
+
+def AtomicFetchKind : I32EnumAttr<
+    "AtomicFetchKind",
+    "Binary opcode for atomic fetch operations",
+    [Atomic_Add, Atomic_Sub, Atomic_And, Atomic_Xor, Atomic_Or, Atomic_Nand,
+     Atomic_Max, Atomic_Min]> {
+  let cppNamespace = "::mlir::cir";
+}
+
+def AtomicFetch : CIR_Op<"atomic.fetch",
+                         [AllTypesMatch<["result", "val"]>]> {
+  let summary = "Atomic fetch with unary and binary operations";
+  let description = [{
+    Represents `__atomic_<binop>_fetch` and `__atomic_fetch_<binop>` builtins,
+    where `binop` is on of the binary opcodes : `add`, `sub`, `and`, `xor`,
+    `or`, `nand`, `max` and `min`.
+
+    `ptr` is an integer or fp pointer, followed by `val`, which must be
+    an integer or fp (only supported for `add` and `sub`). The operation
+    can also be marked `volatile`.
+
+    If `fetch_first` is present, the operation works like
+    `__atomic_fetch_binop` and returns the value that had
+    previously been in *ptr, otherwise it returns the final result
+    of the computation (`__atomic_binop_fetch`).
+
+    Example:
+    %res = cir.atomic.fetch(add, %ptr : !cir.ptr<!s32i>,
+                            %val : !s32i, seq_cst) : !s32i
+  }];
+  let results = (outs CIR_AnyIntOrFloat:$result);
+  let arguments = (ins Arg<PrimitiveIntOrFPPtr, "", [MemRead, MemWrite]>:$ptr,
+                       CIR_AnyIntOrFloat:$val,
+                       AtomicFetchKind:$binop,
+                       Arg<MemOrder, "memory order">:$mem_order,
+                       UnitAttr:$is_volatile,
+                       UnitAttr:$fetch_first);
+
+  let assemblyFormat = [{
+    `(`
+    $binop `,`
+    $ptr `:` type($ptr) `,`
+    $val `:` type($val) `,`
+    $mem_order `)`
+    (`volatile` $is_volatile^)?
+    (`fetch_first` $fetch_first^)?
+    `:` type($result) attr-dict
+  }];
+
+  let hasVerifier = 1;
+}
+
+def AtomicXchg : CIR_Op<"atomic.xchg", [AllTypesMatch<["result", "val"]>]> {
+  let summary = "Atomic exchange";
+  let description = [{
+    Atomic exchange operations. Implements C/C++ builtins such as
+    `__atomic_exchange`and `__atomic_exchange_n`.
+
+    Example:
+    %res = cir.atomic.xchg(%ptr : !cir.ptr<!some_struct>,
+                           %val : !u64i, seq_cst) : !u64i
+  }];
+  let results = (outs CIR_AnyType:$result);
+  let arguments = (ins Arg<CIR_PointerType, "", [MemRead, MemWrite]>:$ptr,
+                       CIR_AnyType:$val,
+                       Arg<MemOrder, "memory order">:$mem_order,
+                       UnitAttr:$is_volatile);
+
+  let assemblyFormat = [{
+    `(`
+    $ptr `:` qualified(type($ptr)) `,`
+    $val `:` type($val) `,`
+    $mem_order `)`
+    (`volatile` $is_volatile^)?
+    `:` type($result) attr-dict
+  }];
+
+  let hasVerifier = 0;
+}
+
+def AtomicCmpXchg : CIR_Op<"atomic.cmp_xchg",
+                           [AllTypesMatch<["old", "expected", "desired"]>]> {
+  let summary = "Atomic compare exchange";
+  let description = [{
+    C/C++ Atomic compare and exchange operation. Implements builtins like
+    `__atomic_compare_exchange_n` and `__atomic_compare_exchange`.
+
+    Example:
+    %old, %cmp = cir.atomic.cmp_xchg(%ptr : !cir.ptr<!some_struct>,
+                                     %expected : !u64i,
+                                     %desired : !u64i,
+                                     success = seq_cst,
+                                     failure = seq_cst) weak
+                                     : (!u64i, !cir.bool)
+
+  }];
+  let results = (outs CIR_AnyType:$old, CIR_BoolType:$cmp);
+  let arguments = (ins Arg<CIR_PointerType, "", [MemRead, MemWrite]>:$ptr,
+                       CIR_AnyType:$expected,
+                       CIR_AnyType:$desired,
+                       Arg<MemOrder, "success memory order">:$succ_order,
+                       Arg<MemOrder, "failure memory order">:$fail_order,
+                       UnitAttr:$weak,
+                       UnitAttr:$is_volatile);
+
+  let assemblyFormat = [{
+    `(`
+    $ptr `:` qualified(type($ptr)) `,`
+    $expected `:` type($expected) `,`
+    $desired `:` type($desired) `,`
+    `success` `=`  $succ_order `,`
+    `failure` `=`  $fail_order
+    `)`
+    (`weak` $weak^)?
+    (`volatile` $is_volatile^)?
+    `:` `(` type($old) `,` type($cmp) `)` attr-dict
+  }];
+
+  let hasVerifier = 0;
+}
+
+def UndefOp : CIR_Op<"undef", [Pure]> {
+  let summary = "Creates an undefined value of CIR dialect type.";
+  let description = [{ `cir.undef` is similar to the one in the LLVM IR dialect }];
+  let results = (outs AnyType:$res);
+  let assemblyFormat = "attr-dict `:` type($res)";
+}
 
 #endif // LLVM_CLANG_CIR_DIALECT_IR_CIROPS
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROpsEnums.h b/clang/include/clang/CIR/Dialect/IR/CIROpsEnums.h
new file mode 100644
index 000000000000..06851947f24c
--- /dev/null
+++ b/clang/include/clang/CIR/Dialect/IR/CIROpsEnums.h
@@ -0,0 +1,133 @@
+//===- CIROpsEnumsDialect.h - MLIR Dialect for CIR ----------------------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Target dialect for CIR in MLIR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_CIR_CIROPSENUMS_H_
+#define MLIR_DIALECT_CIR_CIROPSENUMS_H_
+
+#include "mlir/IR/BuiltinAttributes.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h.inc"
+
+namespace mlir {
+namespace cir {
+
+static bool isExternalLinkage(GlobalLinkageKind Linkage) {
+  return Linkage == GlobalLinkageKind::ExternalLinkage;
+}
+static bool isAvailableExternallyLinkage(GlobalLinkageKind Linkage) {
+  return Linkage == GlobalLinkageKind::AvailableExternallyLinkage;
+}
+static bool isLinkOnceAnyLinkage(GlobalLinkageKind Linkage) {
+  return Linkage == GlobalLinkageKind::LinkOnceAnyLinkage;
+}
+static bool isLinkOnceODRLinkage(GlobalLinkageKind Linkage) {
+  return Linkage == GlobalLinkageKind::LinkOnceODRLinkage;
+}
+static bool isLinkOnceLinkage(GlobalLinkageKind Linkage) {
+  return isLinkOnceAnyLinkage(Linkage) || isLinkOnceODRLinkage(Linkage);
+}
+static bool isWeakAnyLinkage(GlobalLinkageKind Linkage) {
+  return Linkage == GlobalLinkageKind::WeakAnyLinkage;
+}
+static bool isWeakODRLinkage(GlobalLinkageKind Linkage) {
+  return Linkage == GlobalLinkageKind::WeakODRLinkage;
+}
+static bool isWeakLinkage(GlobalLinkageKind Linkage) {
+  return isWeakAnyLinkage(Linkage) || isWeakODRLinkage(Linkage);
+}
+static bool isInternalLinkage(GlobalLinkageKind Linkage) {
+  return Linkage == GlobalLinkageKind::InternalLinkage;
+}
+static bool isPrivateLinkage(GlobalLinkageKind Linkage) {
+  return Linkage == GlobalLinkageKind::PrivateLinkage;
+}
+static bool isLocalLinkage(GlobalLinkageKind Linkage) {
+  return isInternalLinkage(Linkage) || isPrivateLinkage(Linkage);
+}
+static bool isExternalWeakLinkage(GlobalLinkageKind Linkage) {
+  return Linkage == GlobalLinkageKind::ExternalWeakLinkage;
+}
+LLVM_ATTRIBUTE_UNUSED static bool isCommonLinkage(GlobalLinkageKind Linkage) {
+  return Linkage == GlobalLinkageKind::CommonLinkage;
+}
+LLVM_ATTRIBUTE_UNUSED static bool
+isValidDeclarationLinkage(GlobalLinkageKind Linkage) {
+  return isExternalWeakLinkage(Linkage) || isExternalLinkage(Linkage);
+}
+
+/// Whether the definition of this global may be replaced by something
+/// non-equivalent at link time. For example, if a function has weak linkage
+/// then the code defining it may be replaced by different code.
+LLVM_ATTRIBUTE_UNUSED static bool
+isInterposableLinkage(GlobalLinkageKind Linkage) {
+  switch (Linkage) {
+  case GlobalLinkageKind::WeakAnyLinkage:
+  case GlobalLinkageKind::LinkOnceAnyLinkage:
+  case GlobalLinkageKind::CommonLinkage:
+  case GlobalLinkageKind::ExternalWeakLinkage:
+    return true;
+
+  case GlobalLinkageKind::AvailableExternallyLinkage:
+  case GlobalLinkageKind::LinkOnceODRLinkage:
+  case GlobalLinkageKind::WeakODRLinkage:
+    // The above three cannot be overridden but can be de-refined.
+
+  case GlobalLinkageKind::ExternalLinkage:
+  case GlobalLinkageKind::InternalLinkage:
+  case GlobalLinkageKind::PrivateLinkage:
+    return false;
+  }
+  llvm_unreachable("Fully covered switch above!");
+}
+
+/// Whether the definition of this global may be discarded if it is not used
+/// in its compilation unit.
+LLVM_ATTRIBUTE_UNUSED static bool
+isDiscardableIfUnused(GlobalLinkageKind Linkage) {
+  return isLinkOnceLinkage(Linkage) || isLocalLinkage(Linkage) ||
+         isAvailableExternallyLinkage(Linkage);
+}
+
+/// Whether the definition of this global may be replaced at link time.  NB:
+/// Using this method outside of the code generators is almost always a
+/// mistake: when working at the IR level use isInterposable instead as it
+/// knows about ODR semantics.
+LLVM_ATTRIBUTE_UNUSED static bool isWeakForLinker(GlobalLinkageKind Linkage) {
+  return Linkage == GlobalLinkageKind::WeakAnyLinkage ||
+         Linkage == GlobalLinkageKind::WeakODRLinkage ||
+         Linkage == GlobalLinkageKind::LinkOnceAnyLinkage ||
+         Linkage == GlobalLinkageKind::LinkOnceODRLinkage ||
+         Linkage == GlobalLinkageKind::CommonLinkage ||
+         Linkage == GlobalLinkageKind::ExternalWeakLinkage;
+}
+
+LLVM_ATTRIBUTE_UNUSED static bool isValidLinkage(GlobalLinkageKind L) {
+  return isExternalLinkage(L) || isLocalLinkage(L) || isWeakLinkage(L) ||
+         isLinkOnceLinkage(L);
+}
+
+bool operator<(mlir::cir::MemOrder, mlir::cir::MemOrder) = delete;
+bool operator>(mlir::cir::MemOrder, mlir::cir::MemOrder) = delete;
+bool operator<=(mlir::cir::MemOrder, mlir::cir::MemOrder) = delete;
+bool operator>=(mlir::cir::MemOrder, mlir::cir::MemOrder) = delete;
+
+// Validate an integral value which isn't known to fit within the enum's range
+// is a valid AtomicOrderingCABI.
+template <typename Int> inline bool isValidCIRAtomicOrderingCABI(Int I) {
+  return (Int)mlir::cir::MemOrder::Relaxed <= I &&
+         I <= (Int)mlir::cir::MemOrder::SequentiallyConsistent;
+}
+
+} // namespace cir
+} // namespace mlir
+
+#endif // MLIR_DIALECT_CIR_CIROPSENUMS_H_
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.h b/clang/include/clang/CIR/Dialect/IR/CIRTypes.h
new file mode 100644
index 000000000000..79b4e77f64be
--- /dev/null
+++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.h
@@ -0,0 +1,198 @@
+//===- CIRTypes.h - MLIR CIR Types ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the types in the CIR dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_CIR_IR_CIRTYPES_H_
+#define MLIR_DIALECT_CIR_IR_CIRTYPES_H_
+
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Types.h"
+#include "mlir/Interfaces/DataLayoutInterfaces.h"
+#include "clang/CIR/Interfaces/CIRFPTypeInterface.h"
+
+#include "clang/CIR/Interfaces/ASTAttrInterfaces.h"
+
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+
+//===----------------------------------------------------------------------===//
+// CIR StructType
+//
+// The base type for all RecordDecls.
+//===----------------------------------------------------------------------===//
+
+namespace mlir {
+namespace cir {
+
+namespace detail {
+struct StructTypeStorage;
+} // namespace detail
+
+/// Each unique clang::RecordDecl is mapped to a `cir.struct` and any object in
+/// C/C++ that has a struct type will have a `cir.struct` in CIR.
+///
+/// There are three possible formats for this type:
+///
+///  - Identified and complete structs: unique name and a known body.
+///  - Identified and incomplete structs: unique name and unkonwn body.
+///  - Anonymous structs: no name and a known body.
+///
+/// Identified structs are uniqued by their name, and anonymous structs are
+/// uniqued by their body. This means that two anonymous structs with the same
+/// body will be the same type, and two identified structs with the same name
+/// will be the same type. Attempting to build a struct with a existing name,
+/// but a different body will result in an error.
+///
+/// A few examples:
+///
+/// ```mlir
+///     !complete = !cir.struct<struct "complete" {!cir.int<u, 8>}>
+///     !incomplete = !cir.struct<struct "incomplete" incomplete>
+///     !anonymous = !cir.struct<struct {!cir.int<u, 8>}>
+/// ```
+///
+/// Incomplete structs are mutable, meaning the can be later completed with a
+/// body automatically updating in place every type in the code that uses the
+/// incomplete struct. Mutability allows for recursive types to be represented,
+/// meaning the struct can have members that refer to itself. This is useful for
+/// representing recursive records and is implemented through a special syntax.
+/// In the example below, the `Node` struct has a member that is a pointer to a
+/// `Node` struct:
+///
+/// ```mlir
+///     !struct = !cir.struct<struct "Node" {!cir.ptr<!cir.struct<struct
+///     "Node">>}>
+/// ```
+class StructType
+    : public Type::TypeBase<StructType, Type, detail::StructTypeStorage,
+                            DataLayoutTypeInterface::Trait,
+                            TypeTrait::IsMutable> {
+  // FIXME(cir): migrate this type to Tablegen once mutable types are supported.
+public:
+  using Base::Base;
+  using Base::getChecked;
+  using Base::verify;
+
+  static constexpr StringLiteral name = "cir.struct";
+
+  enum RecordKind : uint32_t { Class, Union, Struct };
+
+  /// Create a identified and complete struct type.
+  static StructType get(MLIRContext *context, ArrayRef<Type> members,
+                        StringAttr name, bool packed, RecordKind kind,
+                        ASTRecordDeclInterface ast = {});
+  static StructType getChecked(function_ref<InFlightDiagnostic()> emitError,
+                               MLIRContext *context, ArrayRef<Type> members,
+                               StringAttr name, bool packed, RecordKind kind,
+                               ASTRecordDeclInterface ast = {});
+
+  /// Create a identified and incomplete struct type.
+  static StructType get(MLIRContext *context, StringAttr name, RecordKind kind);
+  static StructType getChecked(function_ref<InFlightDiagnostic()> emitError,
+                               MLIRContext *context, StringAttr name,
+                               RecordKind kind);
+
+  /// Create a anonymous struct type (always complete).
+  static StructType get(MLIRContext *context, ArrayRef<Type> members,
+                        bool packed, RecordKind kind,
+                        ASTRecordDeclInterface ast = {});
+  static StructType getChecked(function_ref<InFlightDiagnostic()> emitError,
+                               MLIRContext *context, ArrayRef<Type> members,
+                               bool packed, RecordKind kind,
+                               ASTRecordDeclInterface ast = {});
+
+  /// Validate the struct about to be constructed.
+  static LogicalResult verify(function_ref<InFlightDiagnostic()> emitError,
+                              ArrayRef<Type> members, StringAttr name,
+                              bool incomplete, bool packed,
+                              StructType::RecordKind kind,
+                              ASTRecordDeclInterface ast);
+
+  // Parse/print methods.
+  static constexpr StringLiteral getMnemonic() { return {"struct"}; }
+  static Type parse(AsmParser &odsParser);
+  void print(AsmPrinter &odsPrinter) const;
+
+  // Accessors
+  ASTRecordDeclInterface getAst() const;
+  ArrayRef<Type> getMembers() const;
+  StringAttr getName() const;
+  StructType::RecordKind getKind() const;
+  bool getIncomplete() const;
+  bool getPacked() const;
+  void dropAst();
+
+  // Predicates
+  bool isClass() const { return getKind() == RecordKind::Class; };
+  bool isStruct() const { return getKind() == RecordKind::Struct; };
+  bool isUnion() const { return getKind() == RecordKind::Union; };
+  bool isComplete() const { return !isIncomplete(); };
+  bool isIncomplete() const;
+
+  // Utilities
+  Type getLargestMember(const DataLayout &dataLayout) const;
+  size_t getNumElements() const { return getMembers().size(); };
+  std::string getKindAsStr() {
+    switch (getKind()) {
+    case RecordKind::Class:
+      return "class";
+    case RecordKind::Union:
+      return "union";
+    case RecordKind::Struct:
+      return "struct";
+    }
+  }
+  std::string getPrefixedName() {
+    return getKindAsStr() + "." + getName().getValue().str();
+  }
+
+  /// Complete the struct type by mutating its members and attributes.
+  void complete(ArrayRef<Type> members, bool packed,
+                ASTRecordDeclInterface ast = {});
+
+  /// DataLayoutTypeInterface methods.
+  llvm::TypeSize getTypeSizeInBits(const DataLayout &dataLayout,
+                                   DataLayoutEntryListRef params) const;
+  uint64_t getABIAlignment(const DataLayout &dataLayout,
+                           DataLayoutEntryListRef params) const;
+  uint64_t getPreferredAlignment(const DataLayout &dataLayout,
+                                 DataLayoutEntryListRef params) const;
+  uint64_t getElementOffset(const DataLayout &dataLayout, unsigned idx) const;
+
+  bool isLayoutIdentical(const StructType &other);
+
+  // Utilities for lazily computing and cacheing data layout info.
+private:
+  // FIXME: currently opaque because there's a cycle if CIRTypes.types include
+  // from CIRAttrs.h. The implementation operates in terms of StructLayoutAttr
+  // instead.
+  mutable mlir::Attribute layoutInfo;
+  bool isPadded(const DataLayout &dataLayout) const;
+  void computeSizeAndAlignment(const DataLayout &dataLayout) const;
+};
+
+bool isAnyFloatingPointType(mlir::Type t);
+bool isFPOrFPVectorTy(mlir::Type);
+} // namespace cir
+} // namespace mlir
+
+mlir::ParseResult parseAddrSpaceAttribute(mlir::AsmParser &p,
+                                          mlir::Attribute &addrSpaceAttr);
+void printAddrSpaceAttribute(mlir::AsmPrinter &p,
+                             mlir::Attribute addrSpaceAttr);
+
+//===----------------------------------------------------------------------===//
+// CIR Dialect Tablegen'd Types
+//===----------------------------------------------------------------------===//
+
+#define GET_TYPEDEF_CLASSES
+#include "clang/CIR/Dialect/IR/CIROpsTypes.h.inc"
+
+#endif // MLIR_DIALECT_CIR_IR_CIRTYPES_H_
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
new file mode 100644
index 000000000000..1c63fcd84c67
--- /dev/null
+++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
@@ -0,0 +1,567 @@
+//===- CIRTypes.td - CIR dialect types ---------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CIR dialect types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CIR_DIALECT_CIR_TYPES
+#define MLIR_CIR_DIALECT_CIR_TYPES
+
+include "clang/CIR/Dialect/IR/CIRDialect.td"
+include "clang/CIR/Interfaces/ASTAttrInterfaces.td"
+include "clang/CIR/Interfaces/CIRFPTypeInterface.td"
+include "mlir/Interfaces/DataLayoutInterfaces.td"
+include "mlir/IR/AttrTypeBase.td"
+include "mlir/IR/EnumAttr.td"
+
+//===----------------------------------------------------------------------===//
+// CIR Types
+//===----------------------------------------------------------------------===//
+
+class CIR_Type<string name, string typeMnemonic, list<Trait> traits = [],
+               string baseCppClass = "::mlir::Type">
+    : TypeDef<CIR_Dialect, name, traits, baseCppClass> {
+  let mnemonic = typeMnemonic;
+}
+
+//===----------------------------------------------------------------------===//
+// IntType
+//===----------------------------------------------------------------------===//
+
+def CIR_IntType : CIR_Type<"Int", "int",
+    [DeclareTypeInterfaceMethods<DataLayoutTypeInterface>]> {
+  let summary = "Integer type with arbitrary precision up to a fixed limit";
+  let description = [{
+    CIR type that represents C/C++ primitive integer types.
+    Said types are: `char`, `short`, `int`, `long`, `long long`, and their \
+    unsigned variations.
+  }];
+  let parameters = (ins "unsigned":$width, "bool":$isSigned);
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDeclaration = [{
+    /// Return true if this is a signed integer type.
+    bool isSigned() const { return getIsSigned(); }
+    /// Return true if this is an unsigned integer type.
+    bool isUnsigned() const { return !getIsSigned(); }
+    /// Return type alias.
+    std::string getAlias() const {
+      return (isSigned() ? 's' : 'u') + std::to_string(getWidth()) + 'i';
+    };
+    /// Return true if this is a primitive integer type (i.e. signed or unsigned
+    /// integer types whose bit width is 8, 16, 32, or 64).
+    bool isPrimitive() const {
+      return isValidPrimitiveIntBitwidth(getWidth());
+    }
+
+    /// Returns a minimum bitwidth of cir::IntType
+    static unsigned minBitwidth() { return 1; }
+    /// Returns a maximum bitwidth of cir::IntType
+    static unsigned maxBitwidth() { return 64; }
+
+    /// Returns true if cir::IntType that represents a primitive integer type
+    /// can be constructed from the provided bitwidth.
+    static bool isValidPrimitiveIntBitwidth(unsigned width) {
+      return width == 8 || width == 16 || width == 32 || width == 64;
+    }
+  }];
+  let genVerifyDecl = 1;
+}
+
+// Constraints
+
+// Unsigned integer type of a specific width.
+class UInt<int width>
+  : Type<And<[
+        CPred<"::mlir::isa<::mlir::cir::IntType>($_self)">,
+        CPred<"::mlir::cast<::mlir::cir::IntType>($_self).isUnsigned()">,
+        CPred<"::mlir::cast<::mlir::cir::IntType>($_self).getWidth() == " # width>
+        ]>, width # "-bit unsigned integer", "::mlir::cir::IntType">,
+    BuildableType<
+      "mlir::cir::IntType::get($_builder.getContext(), "
+      # width # ", /*isSigned=*/false)"> {
+  int bitwidth = width;
+}
+
+def UInt1  : UInt<1>;
+def UInt8  : UInt<8>;
+def UInt16 : UInt<16>;
+def UInt32 : UInt<32>;
+def UInt64 : UInt<64>;
+
+// Signed integer type of a specific width.
+class SInt<int width>
+  : Type<And<[
+        CPred<"::mlir::isa<::mlir::cir::IntType>($_self)">,
+        CPred<"::mlir::cast<::mlir::cir::IntType>($_self).isSigned()">,
+        CPred<"::mlir::cast<::mlir::cir::IntType>($_self).getWidth() == " # width>
+        ]>, width # "-bit signed integer", "::mlir::cir::IntType">,
+    BuildableType<
+      "mlir::cir::IntType::get($_builder.getContext(), "
+      # width # ", /*isSigned=*/true)"> {
+  int bitwidth = width;
+}
+
+def SInt1  : SInt<1>;
+def SInt8  : SInt<8>;
+def SInt16 : SInt<16>;
+def SInt32 : SInt<32>;
+def SInt64 : SInt<64>;
+
+def PrimitiveUInt
+    : AnyTypeOf<[UInt8, UInt16, UInt32, UInt64], "primitive unsigned int",
+                "::mlir::cir::IntType">;
+def PrimitiveSInt
+    : AnyTypeOf<[SInt8, SInt16, SInt32, SInt64], "primitive signed int",
+                "::mlir::cir::IntType">;
+def PrimitiveInt
+    : AnyTypeOf<[UInt8, UInt16, UInt32, UInt64, SInt8, SInt16, SInt32, SInt64],
+                "primitive int", "::mlir::cir::IntType">;
+
+//===----------------------------------------------------------------------===//
+// FloatType
+//===----------------------------------------------------------------------===//
+
+class CIR_FloatType<string name, string mnemonic>
+    : CIR_Type<name, mnemonic,
+          [
+            DeclareTypeInterfaceMethods<DataLayoutTypeInterface>,
+            DeclareTypeInterfaceMethods<CIRFPTypeInterface>,
+          ]> {}
+
+def CIR_Single : CIR_FloatType<"Single", "float"> {
+  let summary = "CIR single-precision float type";
+  let description = [{
+    Floating-point type that represents the `float` type in C/C++. Its
+    underlying floating-point format is the IEEE-754 binary32 format.
+  }];
+}
+
+def CIR_Double : CIR_FloatType<"Double", "double"> {
+  let summary = "CIR double-precision float type";
+  let description = [{
+    Floating-point type that represents the `double` type in C/C++. Its
+    underlying floating-point format is the IEEE-754 binar64 format.
+  }];
+}
+
+def CIR_FP16 : CIR_FloatType<"FP16", "f16"> {
+  let summary = "CIR type that represents IEEE-754 binary16 format";
+  let description = [{
+    Floating-point type that represents the IEEE-754 binary16 format.
+  }];
+}
+
+def CIR_BFloat16 : CIR_FloatType<"BF16", "bf16"> {
+  let summary = "CIR type that represents";
+  let description = [{
+    Floating-point type that represents the bfloat16 format.
+  }];
+}
+
+def CIR_FP80 : CIR_FloatType<"FP80", "f80"> {
+  let summary = "CIR type that represents x87 80-bit floating-point format";
+  let description = [{
+    Floating-point type that represents the x87 80-bit floating-point format.
+  }];
+}
+
+def CIR_LongDouble : CIR_FloatType<"LongDouble", "long_double"> {
+  let summary = "CIR extended-precision float type";
+  let description = [{
+    Floating-point type that represents the `long double` type in C/C++.
+
+    The underlying floating-point format of a long double value depends on the
+    implementation. The `underlying` parameter specifies the CIR floating-point
+    type that corresponds to this format. For now, it can only be either
+    `!cir.double` or `!cir.fp80`.
+  }];
+
+  let parameters = (ins "mlir::Type":$underlying);
+
+  let assemblyFormat = [{
+    `<` $underlying `>`
+  }];
+
+  let genVerifyDecl = 1;
+}
+
+// Constraints
+
+def CIR_AnyFloat: AnyTypeOf<[CIR_Single, CIR_Double, CIR_FP80, CIR_LongDouble]>;
+def CIR_AnyIntOrFloat: AnyTypeOf<[CIR_AnyFloat, CIR_IntType]>;
+
+//===----------------------------------------------------------------------===//
+// ComplexType
+//===----------------------------------------------------------------------===//
+
+def CIR_ComplexType : CIR_Type<"Complex", "complex",
+    [DeclareTypeInterfaceMethods<DataLayoutTypeInterface>]> {
+
+  let summary = "CIR complex type";
+  let description = [{
+    CIR type that represents a C complex number. `cir.complex` models the C type
+    `T _Complex`.
+
+    The parameter `elementTy` gives the type of the real and imaginary part of
+    the complex number. `elementTy` must be either a CIR integer type or a CIR
+    floating-point type.
+  }];
+
+  let parameters = (ins "mlir::Type":$elementTy);
+
+  let assemblyFormat = [{
+    `<` $elementTy `>`
+  }];
+
+  let genVerifyDecl = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// PointerType
+//===----------------------------------------------------------------------===//
+
+def CIR_PointerType : CIR_Type<"Pointer", "ptr",
+    [DeclareTypeInterfaceMethods<DataLayoutTypeInterface>]> {
+
+  let summary = "CIR pointer type";
+  let description = [{
+    `CIR.ptr` is a type returned by any op generating a pointer in C++.
+  }];
+
+  let parameters = (ins
+    "mlir::Type":$pointee,
+    // FIXME(cir): Currently unable to directly use AddressSpaceAttr because of
+    // cyclic dep. Workaround with the top type and verifier.
+    OptionalParameter<"mlir::Attribute">:$addrSpace
+  );
+
+  let builders = [
+    TypeBuilderWithInferredContext<(ins
+      "mlir::Type":$pointee,
+      CArg<"mlir::Attribute", "{}">:$addrSpace), [{
+      return $_get(pointee.getContext(), pointee, addrSpace);
+    }]>,
+    TypeBuilder<(ins
+      "mlir::Type":$pointee,
+      CArg<"mlir::Attribute", "{}">:$addrSpace), [{
+      return $_get($_ctxt, pointee, addrSpace);
+    }]>
+  ];
+
+  let assemblyFormat = [{
+    `<` $pointee ( `,` `addrspace` `(`
+      custom<PointerAddrSpace>($addrSpace)^
+    `)` )? `>`
+  }];
+
+  let genVerifyDecl = 1;
+
+  let skipDefaultBuilders = 1;
+
+  let extraClassDeclaration = [{
+    bool isVoidPtr() const {
+      return mlir::isa<mlir::cir::VoidType>(getPointee());
+    }
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// DataMemberType
+//===----------------------------------------------------------------------===//
+
+def CIR_DataMemberType : CIR_Type<"DataMember", "data_member",
+    [DeclareTypeInterfaceMethods<DataLayoutTypeInterface>]> {
+
+  let summary = "CIR type that represents pointer-to-data-member type in C++";
+  let description = [{
+    `cir.member_ptr` models the pointer-to-data-member type in C++. Values of
+    this type are essentially offsets of the pointed-to member within one of
+    its containing struct.
+  }];
+
+  let parameters = (ins "mlir::Type":$memberTy,
+                        "mlir::cir::StructType":$clsTy);
+
+  let assemblyFormat = [{
+    `<` $memberTy `in` $clsTy `>`
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// BoolType
+//
+// An alternative here is to represent bool as mlir::i1, but let's be more
+// generic.
+//
+//===----------------------------------------------------------------------===//
+
+def CIR_BoolType :
+    CIR_Type<"Bool", "bool",
+             [DeclareTypeInterfaceMethods<DataLayoutTypeInterface>]> {
+
+  let summary = "CIR bool type";
+  let description = [{
+    `cir.bool` represent's C++ bool type.
+  }];
+
+  let hasCustomAssemblyFormat = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ArrayType
+//===----------------------------------------------------------------------===//
+
+def CIR_ArrayType : CIR_Type<"Array", "array",
+    [DeclareTypeInterfaceMethods<DataLayoutTypeInterface>]> {
+
+  let summary = "CIR array type";
+  let description = [{
+    `CIR.array` represents C/C++ constant arrays.
+  }];
+
+  let parameters = (ins "mlir::Type":$eltType, "uint64_t":$size);
+
+  let assemblyFormat = [{
+    `<` $eltType `x` $size `>`
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// VectorType (fixed size)
+//===----------------------------------------------------------------------===//
+
+def CIR_VectorType : CIR_Type<"Vector", "vector",
+    [DeclareTypeInterfaceMethods<DataLayoutTypeInterface>]> {
+
+  let summary = "CIR vector type";
+  let description = [{
+    `cir.vector' represents fixed-size vector types.  The parameters are the
+    element type and the number of elements.
+  }];
+
+  let parameters = (ins "mlir::Type":$eltType, "uint64_t":$size);
+
+  let assemblyFormat = [{
+    `<` $eltType `x` $size `>`
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// FuncType
+//===----------------------------------------------------------------------===//
+
+def CIR_FuncType : CIR_Type<"Func", "func"> {
+  let summary = "CIR function type";
+  let description = [{
+    The `!cir.func` is a function type. It consists of a single return type, a
+    list of parameter types and can optionally be variadic.
+
+    Example:
+
+    ```mlir
+    !cir.func<!bool ()>
+    !cir.func<!s32i (!s8i, !s8i)>
+    !cir.func<!s32i (!s32i, ...)>
+    ```
+  }];
+
+  let parameters = (ins ArrayRefParameter<"Type">:$inputs, "Type":$returnType,
+                        "bool":$varArg);
+  let assemblyFormat = [{
+    `<` $returnType ` ` `(` custom<FuncTypeArgs>($inputs, $varArg) `>`
+  }];
+
+  let builders = [
+    TypeBuilderWithInferredContext<(ins
+      "ArrayRef<Type>":$inputs, "Type":$returnType,
+      CArg<"bool", "false">:$isVarArg), [{
+      return $_get(returnType.getContext(), inputs, returnType, isVarArg);
+    }]>
+  ];
+
+  let extraClassDeclaration = [{
+    /// Returns whether the function is variadic.
+    bool isVarArg() const { return getVarArg(); }
+
+    /// Returns the `i`th input operand type. Asserts if out of bounds.
+    Type getInput(unsigned i) const { return getInputs()[i]; }
+
+    /// Returns the number of arguments to the function.
+    unsigned getNumInputs() const { return getInputs().size(); }
+
+    /// Returns the result type of the function as an ArrayRef, enabling better
+    /// integration with generic MLIR utilities.
+    ArrayRef<Type> getReturnTypes() const;
+
+    /// Returns whether the function is returns void.
+    bool isVoid() const;
+
+    /// Returns a clone of this function type with the given argument
+    /// and result types.
+    FuncType clone(TypeRange inputs, TypeRange results) const;
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// MethodType
+//===----------------------------------------------------------------------===//
+
+def CIR_MethodType : CIR_Type<"Method", "method",
+    [DeclareTypeInterfaceMethods<DataLayoutTypeInterface>]> {
+  let summary = "CIR type that represents C++ pointer-to-member-function type";
+  let description = [{
+    `cir.method` models the pointer-to-member-function type in C++. The layout
+    of this type is ABI-dependent.
+  }];
+
+  let parameters = (ins "mlir::cir::FuncType":$memberFuncTy,
+                        "mlir::cir::StructType":$clsTy);
+
+  let assemblyFormat = [{
+    `<` qualified($memberFuncTy) `in` $clsTy `>`
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// Exception info type
+//
+// By introducing an exception info type, exception related operations can be
+// more descriptive.
+//
+// This basically wraps a uint8_t* and a uint32_t
+//
+//===----------------------------------------------------------------------===//
+
+def CIR_ExceptionType : CIR_Type<"ExceptionInfo", "exception"> {
+  let summary = "CIR exception info";
+  let description = [{
+    In presence of an inflight exception, this type holds all specific
+    information for an exception: the associated type id, and the exception
+    object pointer. These are materialzed from this type through other
+    specific operations.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// Void type
+//===----------------------------------------------------------------------===//
+
+def CIR_VoidType : CIR_Type<"Void", "void"> {
+  let summary = "CIR void type";
+  let description = [{
+    The `!cir.void` type represents the C/C++ `void` type.
+  }];
+  let extraClassDeclaration = [{
+    /// Returns a clone of this type with the given context.
+    std::string getAlias() const { return "void"; };
+  }];
+}
+
+// Constraints
+
+// Pointer to void
+def VoidPtr : Type<
+    And<[
+      CPred<"::mlir::isa<::mlir::cir::PointerType>($_self)">,
+      CPred<"::mlir::isa<::mlir::cir::VoidType>("
+            "::mlir::cast<::mlir::cir::PointerType>($_self).getPointee())">,
+    ]>, "void*">,
+    BuildableType<
+      "mlir::cir::PointerType::get($_builder.getContext(),"
+      "mlir::cir::VoidType::get($_builder.getContext()))"> {
+}
+
+// Pointer to a primitive int, float or double
+def PrimitiveIntOrFPPtr : Type<
+    And<[
+      CPred<"::mlir::isa<::mlir::cir::PointerType>($_self)">,
+      CPred<"::mlir::isa<::mlir::cir::IntType, ::mlir::cir::SingleType,"
+            "::mlir::cir::DoubleType>("
+            "::mlir::cast<::mlir::cir::PointerType>($_self).getPointee())">,
+    ]>, "{int,void}*"> {
+}
+
+def ComplexPtr : Type<
+    And<[
+      CPred<"::mlir::isa<::mlir::cir::PointerType>($_self)">,
+      CPred<"::mlir::isa<::mlir::cir::ComplexType>("
+        "::mlir::cast<::mlir::cir::PointerType>($_self).getPointee())">,
+    ]>, "!cir.complex*"> {
+}
+
+// Pointer to struct
+def StructPtr : Type<
+    And<[
+      CPred<"::mlir::isa<::mlir::cir::PointerType>($_self)">,
+      CPred<"::mlir::isa<::mlir::cir::StructType>("
+            "::mlir::cast<::mlir::cir::PointerType>($_self).getPointee())">
+    ]>, "!cir.struct*"> {
+}
+
+// Pointer to exception info
+def ExceptionPtr : Type<
+    And<[
+      CPred<"::mlir::isa<::mlir::cir::PointerType>($_self)">,
+      CPred<"::mlir::isa<::mlir::cir::ExceptionInfoType>("
+            "::mlir::cast<::mlir::cir::PointerType>($_self).getPointee())">
+    ]>, "!cir.eh_info*">,
+    BuildableType<
+      "mlir::cir::PointerType::get($_builder.getContext(),"
+      "mlir::cir::ExceptionInfoType::get($_builder.getContext()))"> {
+}
+
+// Vector of integral type
+def IntegerVector : Type<
+    And<[
+      CPred<"::mlir::isa<::mlir::cir::VectorType>($_self)">,
+      CPred<"::mlir::isa<::mlir::cir::IntType>("
+            "::mlir::cast<::mlir::cir::VectorType>($_self).getEltType())">,
+      CPred<"::mlir::cast<::mlir::cir::IntType>("
+            "::mlir::cast<::mlir::cir::VectorType>($_self).getEltType())"
+            ".isPrimitive()">
+    ]>, "!cir.vector of !cir.int"> {
+}
+
+// Pointer to Arrays
+def ArrayPtr : Type<
+    And<[
+      CPred<"::mlir::isa<::mlir::cir::PointerType>($_self)">,
+      CPred<"::mlir::isa<::mlir::cir::ArrayType>("
+            "::mlir::cast<::mlir::cir::PointerType>($_self).getPointee())">,
+    ]>, "!cir.ptr<!cir.eh_info>"> {
+}
+
+// Pointer to functions
+def FuncPtr : Type<
+    And<[
+      CPred<"::mlir::isa<::mlir::cir::PointerType>($_self)">,
+      CPred<"::mlir::isa<::mlir::cir::FuncType>("
+            "::mlir::cast<::mlir::cir::PointerType>($_self).getPointee())">,
+    ]>, "!cir.ptr<!cir.func>"> {
+}
+
+//===----------------------------------------------------------------------===//
+// StructType (defined in cpp files)
+//===----------------------------------------------------------------------===//
+
+def CIR_StructType : Type<CPred<"::mlir::isa<::mlir::cir::StructType>($_self)">,
+                          "CIR struct type">;
+
+//===----------------------------------------------------------------------===//
+// Global type constraints
+//===----------------------------------------------------------------------===//
+
+def CIR_AnyType : AnyTypeOf<[
+  CIR_IntType, CIR_PointerType, CIR_DataMemberType, CIR_MethodType,
+  CIR_BoolType, CIR_ArrayType, CIR_VectorType, CIR_FuncType, CIR_VoidType,
+  CIR_StructType, CIR_ExceptionType, CIR_AnyFloat, CIR_FP16, CIR_BFloat16,
+  CIR_ComplexType
+]>;
+
+#endif // MLIR_CIR_DIALECT_CIR_TYPES
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypesDetails.h b/clang/include/clang/CIR/Dialect/IR/CIRTypesDetails.h
new file mode 100644
index 000000000000..5eba4ac460a7
--- /dev/null
+++ b/clang/include/clang/CIR/Dialect/IR/CIRTypesDetails.h
@@ -0,0 +1,115 @@
+//===- CIRTypesDetails.h - Details of CIR dialect types ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains implementation details, such as storage structures, of
+// CIR dialect types.
+//
+//===----------------------------------------------------------------------===//
+#ifndef CIR_DIALECT_IR_CIRTYPESDETAILS_H
+#define CIR_DIALECT_IR_CIRTYPESDETAILS_H
+
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/Support/LogicalResult.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "llvm/ADT/Hashing.h"
+
+namespace mlir {
+namespace cir {
+namespace detail {
+
+//===----------------------------------------------------------------------===//
+// CIR StructTypeStorage
+//===----------------------------------------------------------------------===//
+
+/// Type storage for CIR record types.
+struct StructTypeStorage : public TypeStorage {
+  struct KeyTy {
+    ArrayRef<Type> members;
+    StringAttr name;
+    bool incomplete;
+    bool packed;
+    StructType::RecordKind kind;
+    ASTRecordDeclInterface ast;
+
+    KeyTy(ArrayRef<Type> members, StringAttr name, bool incomplete, bool packed,
+          StructType::RecordKind kind, ASTRecordDeclInterface ast)
+        : members(members), name(name), incomplete(incomplete), packed(packed),
+          kind(kind), ast(ast) {}
+  };
+
+  ArrayRef<Type> members;
+  StringAttr name;
+  bool incomplete;
+  bool packed;
+  StructType::RecordKind kind;
+  ASTRecordDeclInterface ast;
+
+  StructTypeStorage(ArrayRef<Type> members, StringAttr name, bool incomplete,
+                    bool packed, StructType::RecordKind kind,
+                    ASTRecordDeclInterface ast)
+      : members(members), name(name), incomplete(incomplete), packed(packed),
+        kind(kind), ast(ast) {}
+
+  KeyTy getAsKey() const {
+    return KeyTy(members, name, incomplete, packed, kind, ast);
+  }
+
+  bool operator==(const KeyTy &key) const {
+    if (name)
+      return (name == key.name) && (kind == key.kind);
+    return (members == key.members) && (name == key.name) &&
+           (incomplete == key.incomplete) && (packed == key.packed) &&
+           (kind == key.kind) && (ast == key.ast);
+  }
+
+  static llvm::hash_code hashKey(const KeyTy &key) {
+    if (key.name)
+      return llvm::hash_combine(key.name, key.kind);
+    return llvm::hash_combine(key.members, key.incomplete, key.packed, key.kind,
+                              key.ast);
+  }
+
+  static StructTypeStorage *construct(TypeStorageAllocator &allocator,
+                                      const KeyTy &key) {
+    return new (allocator.allocate<StructTypeStorage>())
+        StructTypeStorage(allocator.copyInto(key.members), key.name,
+                          key.incomplete, key.packed, key.kind, key.ast);
+  }
+
+  /// Mutates the members and attributes an identified struct.
+  ///
+  /// Once a record is mutated, it is marked as complete, preventing further
+  /// mutations. Anonymous structs are always complete and cannot be mutated.
+  /// This method does not fail if a mutation of a complete struct does not
+  /// change the struct.
+  LogicalResult mutate(TypeStorageAllocator &allocator, ArrayRef<Type> members,
+                       bool packed, ASTRecordDeclInterface ast) {
+    // Anonymous structs cannot mutate.
+    if (!name)
+      return failure();
+
+    // Mutation of complete structs are allowed if they change nothing.
+    if (!incomplete)
+      return mlir::success((this->members == members) &&
+                           (this->packed == packed) && (this->ast == ast));
+
+    // Mutate incomplete struct.
+    this->members = allocator.copyInto(members);
+    this->packed = packed;
+    this->ast = ast;
+
+    incomplete = false;
+    return success();
+  }
+};
+
+} // namespace detail
+} // namespace cir
+} // namespace mlir
+
+#endif // CIR_DIALECT_IR_CIRTYPESDETAILS_H
diff --git a/clang/include/clang/CIR/Dialect/IR/CMakeLists.txt b/clang/include/clang/CIR/Dialect/IR/CMakeLists.txt
index 28ae30dab8df..3d43b06c6217 100644
--- a/clang/include/clang/CIR/Dialect/IR/CMakeLists.txt
+++ b/clang/include/clang/CIR/Dialect/IR/CMakeLists.txt
@@ -14,3 +14,20 @@ mlir_tablegen(CIROpsDialect.cpp.inc -gen-dialect-defs)
 add_public_tablegen_target(MLIRCIROpsIncGen)
 add_dependencies(mlir-headers MLIRCIROpsIncGen)
 
+# Equivalent to add_mlir_doc
+add_clang_mlir_doc(CIROps CIROps Dialects/ -gen-op-doc)
+add_clang_mlir_doc(CIRAttrs CIRAttrs Dialects/ -gen-attrdef-doc)
+add_clang_mlir_doc(CIRTypes CIRTypes Dialects/ -gen-typedef-doc)
+
+# Generate extra headers for custom enum and attrs.
+mlir_tablegen(CIROpsEnums.h.inc -gen-enum-decls)
+mlir_tablegen(CIROpsEnums.cpp.inc -gen-enum-defs)
+mlir_tablegen(CIROpsStructs.h.inc -gen-attrdef-decls)
+mlir_tablegen(CIROpsStructs.cpp.inc -gen-attrdef-defs)
+mlir_tablegen(CIROpsAttributes.h.inc -gen-attrdef-decls)
+mlir_tablegen(CIROpsAttributes.cpp.inc -gen-attrdef-defs)
+add_public_tablegen_target(MLIRCIREnumsGen)
+
+clang_tablegen(CIRBuiltinsLowering.inc -gen-cir-builtins-lowering
+               SOURCE CIROps.td
+               TARGET CIRBuiltinsLowering)
diff --git a/clang/include/clang/CIR/Dialect/IR/FPEnv.h b/clang/include/clang/CIR/Dialect/IR/FPEnv.h
new file mode 100644
index 000000000000..aceba9ee57d0
--- /dev/null
+++ b/clang/include/clang/CIR/Dialect/IR/FPEnv.h
@@ -0,0 +1,50 @@
+//===- FPEnv.h ---- FP Environment ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// This file contains the declarations of entities that describe floating
+/// point environment and related functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CIR_DIALECT_IR_FPENV_H
+#define CLANG_CIR_DIALECT_IR_FPENV_H
+
+#include "llvm/ADT/FloatingPointMode.h"
+
+#include <optional>
+
+namespace cir {
+
+namespace fp {
+
+/// Exception behavior used for floating point operations.
+///
+/// Each of these values corresponds to some LLVMIR metadata argument value of a
+/// constrained floating point intrinsic. See the LLVM Language Reference Manual
+/// for details.
+enum ExceptionBehavior : uint8_t {
+  ebIgnore,  ///< This corresponds to "fpexcept.ignore".
+  ebMayTrap, ///< This corresponds to "fpexcept.maytrap".
+  ebStrict,  ///< This corresponds to "fpexcept.strict".
+};
+
+} // namespace fp
+
+/// For any RoundingMode enumerator, returns a string valid as input in
+/// constrained intrinsic rounding mode metadata.
+std::optional<llvm::StringRef> convertRoundingModeToStr(llvm::RoundingMode);
+
+/// For any ExceptionBehavior enumerator, returns a string valid as input in
+/// constrained intrinsic exception behavior metadata.
+std::optional<llvm::StringRef>
+    convertExceptionBehaviorToStr(fp::ExceptionBehavior);
+
+} // namespace cir
+
+#endif
diff --git a/clang/include/clang/CIR/Dialect/Passes.h b/clang/include/clang/CIR/Dialect/Passes.h
new file mode 100644
index 000000000000..67e9da2246b6
--- /dev/null
+++ b/clang/include/clang/CIR/Dialect/Passes.h
@@ -0,0 +1,57 @@
+//===- Passes.h - CIR pass entry points -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines prototypes that expose pass constructors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_CIR_PASSES_H_
+#define MLIR_DIALECT_CIR_PASSES_H_
+
+#include "mlir/Pass/Pass.h"
+
+namespace clang {
+class ASTContext;
+}
+namespace mlir {
+
+std::unique_ptr<Pass> createLifetimeCheckPass();
+std::unique_ptr<Pass> createLifetimeCheckPass(clang::ASTContext *astCtx);
+std::unique_ptr<Pass> createLifetimeCheckPass(ArrayRef<StringRef> remark,
+                                              ArrayRef<StringRef> hist,
+                                              unsigned hist_limit,
+                                              clang::ASTContext *astCtx);
+std::unique_ptr<Pass> createCIRCanonicalizePass();
+std::unique_ptr<Pass> createCIRSimplifyPass();
+std::unique_ptr<Pass> createDropASTPass();
+std::unique_ptr<Pass> createSCFPreparePass();
+std::unique_ptr<Pass> createLoweringPreparePass();
+std::unique_ptr<Pass> createLoweringPreparePass(clang::ASTContext *astCtx);
+std::unique_ptr<Pass> createIdiomRecognizerPass();
+std::unique_ptr<Pass> createIdiomRecognizerPass(clang::ASTContext *astCtx);
+std::unique_ptr<Pass> createLibOptPass();
+std::unique_ptr<Pass> createLibOptPass(clang::ASTContext *astCtx);
+std::unique_ptr<Pass> createFlattenCFGPass();
+std::unique_ptr<Pass> createGotoSolverPass();
+
+/// Create a pass to lower ABI-independent function definitions/calls.
+std::unique_ptr<Pass> createCallConvLoweringPass();
+
+void populateCIRPreLoweringPasses(mlir::OpPassManager &pm);
+
+//===----------------------------------------------------------------------===//
+// Registration
+//===----------------------------------------------------------------------===//
+
+/// Generate the code for registering passes.
+#define GEN_PASS_REGISTRATION
+#include "clang/CIR/Dialect/Passes.h.inc"
+
+} // namespace mlir
+
+#endif // MLIR_DIALECT_CIR_PASSES_H_
diff --git a/clang/include/clang/CIR/Dialect/Passes.td b/clang/include/clang/CIR/Dialect/Passes.td
new file mode 100644
index 000000000000..d72bf0bfd420
--- /dev/null
+++ b/clang/include/clang/CIR/Dialect/Passes.td
@@ -0,0 +1,184 @@
+//===-- Passes.td - CIR pass definition file ---------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_CIR_PASSES
+#define MLIR_DIALECT_CIR_PASSES
+
+include "mlir/Pass/PassBase.td"
+
+def CIRCanonicalize : Pass<"cir-canonicalize"> {
+  let summary = "Performs CIR canonicalization";
+  let description = [{
+    Perform canonicalizations on CIR and removes some redundant operations.
+
+    This pass performs basic cleanup and canonicalization transformations that
+    hopefully do not affect CIR-to-source fidelity and high-level code analysis
+    passes too much. Example transformations performed in this pass include
+    empty scope cleanup, trivial try cleanup, redundant branch cleanup, etc.
+    Those more "heavyweight" transformations and those transformations that
+    could significantly affect CIR-to-source fidelity are performed in the
+    `cir-simplify` pass.
+  }];
+
+  let constructor = "mlir::createCIRCanonicalizePass()";
+  let dependentDialects = ["cir::CIRDialect"];
+}
+
+def CIRSimplify : Pass<"cir-simplify"> {
+  let summary = "Performs CIR simplification and code optimization";
+  let description = [{
+    The pass performs code simplification and optimization on CIR.
+
+    Unlike the `cir-canonicalize` pass, this pass contains more aggresive code
+    transformations that could significantly affect CIR-to-source fidelity.
+    Example transformations performed in this pass include ternary folding,
+    code hoisting, etc.
+  }];
+  let constructor = "mlir::createCIRSimplifyPass()";
+  let dependentDialects = ["cir::CIRDialect"];
+}
+
+def LifetimeCheck : Pass<"cir-lifetime-check"> {
+  let summary = "Check lifetime safety and generate diagnostics";
+  let description = [{
+    This pass relies on a lifetime analysis pass and uses the diagnostics
+    mechanism to report to the user. It does not change any code.
+
+    A default ctor is specified but is solely in order to make
+    tablegen happy, since this pass requires the presence of an ASTContext,
+    one can set that up using `mlir::createLifetimeCheckPass(clang::ASTContext &)`
+    instead.
+  }];
+  let constructor = "mlir::createLifetimeCheckPass()";
+  let dependentDialects = ["cir::CIRDialect"];
+
+  let options = [
+    ListOption<"historyList", "history", "std::string",
+               "List of history styles to emit as part of diagnostics."
+               " Supported styles: {all|null|invalid}", "llvm::cl::ZeroOrMore">,
+    ListOption<"remarksList", "remarks", "std::string",
+               "List of remark styles to enable as part of diagnostics."
+               " Supported styles: {all|pset}", "llvm::cl::ZeroOrMore">,
+    Option<"historyLimit", "history_limit", "unsigned", /*default=*/"1",
+           "Max amount of diagnostics to emit on pointer history">
+  ];
+}
+
+def DropAST : Pass<"cir-drop-ast"> {
+  let summary = "Remove clang AST nodes attached to CIR operations";
+  let description = [{
+    Some CIR operations have references back to Clang AST, this is
+    necessary to perform lots of useful checks without having to
+    duplicate all rich AST information in CIR. As we move down in the
+    pipeline (e.g. generating LLVM or other MLIR dialects), the need
+    for such nodes diminish and AST information can be dropped.
+
+    Right now this is enabled by default in Clang prior to dialect
+    codegen from CIR, but not before lifetime check, where AST is
+    required to be present.
+  }];
+  let constructor = "mlir::createDropASTPass()";
+  let dependentDialects = ["cir::CIRDialect"];
+}
+
+def LoweringPrepare : Pass<"cir-lowering-prepare"> {
+  let summary = "Preparation work before lowering to LLVM dialect";
+  let description = [{
+    This pass does preparation work for LLVM lowering. For example, it may
+    expand the global variable initialziation in a more ABI-friendly form.
+  }];
+  let constructor = "mlir::createLoweringPreparePass()";
+  let dependentDialects = ["cir::CIRDialect"];
+}
+
+def SCFPrepare : Pass<"cir-mlir-scf-prepare"> {
+  let summary = "Preparation work before lowering to SCF dialect";
+  let description = [{
+    This pass does preparation work for SCF lowering. For example, it may
+    hoist the loop invariant or canonicalize the loop comparison. Currently,
+    the pass only be enabled for through MLIR pipeline.
+  }];
+  let constructor = "mlir::createSCFPreparePass()";
+  let dependentDialects = ["cir::CIRDialect"];
+}
+
+def FlattenCFG : Pass<"cir-flatten-cfg"> {
+  let summary = "Produces flatten cfg";
+  let description = [{ 
+    This pass transforms CIR and inline all the nested regions. Thus,
+    the next post condtions are met after the pass applied:
+    - there is not any nested region in a function body
+    - all the blocks in a function belong to the parent region
+    In other words, this pass removes such CIR operations like IfOp, LoopOp,
+    ScopeOp and etc. and produces a flat CIR.
+  }];
+  let constructor = "mlir::createFlattenCFGPass()";
+  let dependentDialects = ["cir::CIRDialect"];
+}
+
+def GotoSolver : Pass<"cir-goto-solver"> {
+  let summary = "Replaces goto operatations with branches";
+  let description = [{
+    This pass transforms CIR and replaces goto-s with branch
+    operations to the proper blocks.
+  }];
+  let constructor = "mlir::createGotoSolverPass()";
+  let dependentDialects = ["cir::CIRDialect"];
+}
+
+def IdiomRecognizer : Pass<"cir-idiom-recognizer"> {
+  let summary = "Raise calls to C/C++ libraries to CIR operations";
+  let description = [{
+    This pass recognize idiomatic C++ usage and incorporate C++ standard
+    containers, library functions calls, and types into CIR operation,
+    attributes and types.
+
+    Detections done by this pass can be inspected by users by using
+    remarks. Currently supported are `all` and `found-calls`.
+  }];
+  let constructor = "mlir::createIdiomRecognizerPass()";
+  let dependentDialects = ["cir::CIRDialect"];
+
+  let options = [
+    ListOption<"remarksList", "remarks", "std::string",
+               "Diagnostic remarks to enable"
+               " Supported styles: {all|found-calls}", "llvm::cl::ZeroOrMore">,
+  ];
+}
+
+def LibOpt : Pass<"cir-lib-opt"> {
+  let summary = "Optimize C/C++ library calls";
+  let description = [{
+    By using higher level information from `cir-idiom-recognize`, this pass
+    apply transformations to CIR based on specific C/C++ library semantics.
+
+    Transformations done by this pass can be inspected by users by using
+    remarks. Currently supported are `all` and `transforms`.
+  }];
+  let constructor = "mlir::createLibOptPass()";
+  let dependentDialects = ["cir::CIRDialect"];
+
+  let options = [
+    ListOption<"remarksList", "remarks", "std::string",
+               "Diagnostic remarks to enable"
+               " Supported styles: {all|transforms}", "llvm::cl::ZeroOrMore">,
+  ];
+}
+
+def CallConvLowering : Pass<"cir-call-conv-lowering"> {
+  let summary = "Handle calling conventions for CIR functions";
+  let description = [{
+    This pass lowers CIR function definitions and calls according to the
+    calling conventions for the target architecture. This pass is necessary
+    to properly lower CIR functions to LLVM IR.
+  }];
+  let constructor = "mlir::createCallConvLoweringPass()";
+  let dependentDialects = ["mlir::cir::CIRDialect"];
+}
+
+#endif // MLIR_DIALECT_CIR_PASSES
diff --git a/clang/include/clang/CIR/Dialect/Transforms/CMakeLists.txt b/clang/include/clang/CIR/Dialect/Transforms/CMakeLists.txt
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/clang/include/clang/CIR/FnInfoOpts.h b/clang/include/clang/CIR/FnInfoOpts.h
new file mode 100644
index 000000000000..cea4d89f4c14
--- /dev/null
+++ b/clang/include/clang/CIR/FnInfoOpts.h
@@ -0,0 +1,37 @@
+#ifndef CIR_FNINFOOPTS_H
+#define CIR_FNINFOOPTS_H
+
+#include "llvm/ADT/STLForwardCompat.h"
+
+namespace cir {
+
+enum class FnInfoOpts {
+  None = 0,
+  IsInstanceMethod = 1 << 0,
+  IsChainCall = 1 << 1,
+  IsDelegateCall = 1 << 2,
+};
+
+inline FnInfoOpts operator|(FnInfoOpts A, FnInfoOpts B) {
+  return static_cast<FnInfoOpts>(llvm::to_underlying(A) |
+                                 llvm::to_underlying(B));
+}
+
+inline FnInfoOpts operator&(FnInfoOpts A, FnInfoOpts B) {
+  return static_cast<FnInfoOpts>(llvm::to_underlying(A) &
+                                 llvm::to_underlying(B));
+}
+
+inline FnInfoOpts operator|=(FnInfoOpts A, FnInfoOpts B) {
+  A = A | B;
+  return A;
+}
+
+inline FnInfoOpts operator&=(FnInfoOpts A, FnInfoOpts B) {
+  A = A & B;
+  return A;
+}
+
+} // namespace cir
+
+#endif // CIR_FNINFOOPTS_H
diff --git a/clang/include/clang/CIR/Interfaces/ASTAttrInterfaces.h b/clang/include/clang/CIR/Interfaces/ASTAttrInterfaces.h
new file mode 100644
index 000000000000..e2f1e16eb511
--- /dev/null
+++ b/clang/include/clang/CIR/Interfaces/ASTAttrInterfaces.h
@@ -0,0 +1,45 @@
+//===- ASTAttrInterfaces.h - CIR AST Interfaces -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_CIR_AST_ATTR_INTERFACES_H_
+#define MLIR_INTERFACES_CIR_AST_ATTR_INTERFACES_H_
+
+#include "mlir/IR/Attributes.h"
+
+#include "clang/AST/Attr.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/Mangle.h"
+
+namespace mlir {
+namespace cir {
+
+mlir::Attribute makeFuncDeclAttr(const clang::Decl *decl,
+                                 mlir::MLIRContext *ctx);
+
+} // namespace cir
+} // namespace mlir
+
+/// Include the generated interface declarations.
+#include "clang/CIR/Interfaces/ASTAttrInterfaces.h.inc"
+
+namespace mlir {
+namespace cir {
+
+template <typename T> bool hasAttr(ASTDeclInterface decl) {
+  if constexpr (std::is_same_v<T, clang::OwnerAttr>)
+    return decl.hasOwnerAttr();
+  if constexpr (std::is_same_v<T, clang::PointerAttr>)
+    return decl.hasPointerAttr();
+  if constexpr (std::is_same_v<T, clang::InitPriorityAttr>)
+    return decl.hasInitPriorityAttr();
+}
+
+} // namespace cir
+} // namespace mlir
+
+#endif // MLIR_INTERFACES_CIR_AST_ATAR_INTERFACES_H_
diff --git a/clang/include/clang/CIR/Interfaces/ASTAttrInterfaces.td b/clang/include/clang/CIR/Interfaces/ASTAttrInterfaces.td
new file mode 100644
index 000000000000..fc162c11f42c
--- /dev/null
+++ b/clang/include/clang/CIR/Interfaces/ASTAttrInterfaces.td
@@ -0,0 +1,282 @@
+//===- ASTAttrInterfaces.td - CIR AST Interface Definitions -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CIR_INTERFACES_AST_ATTR_INTERFACES
+#define MLIR_CIR_INTERFACES_AST_ATTR_INTERFACES
+
+include "mlir/IR/OpBase.td"
+
+let cppNamespace = "::mlir::cir" in {
+  def ASTDeclInterface : AttrInterface<"ASTDeclInterface"> {
+    let methods = [
+      InterfaceMethod<"", "bool", "hasOwnerAttr", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->template hasAttr<clang::OwnerAttr>();
+        }]
+      >,
+      InterfaceMethod<"", "bool", "hasPointerAttr", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->template hasAttr<clang::PointerAttr>();
+        }]
+      >,
+      InterfaceMethod<"", "bool", "hasInitPriorityAttr", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->template hasAttr<clang::InitPriorityAttr>();
+        }]
+      >
+    ];
+  }
+
+  def ASTNamedDeclInterface : AttrInterface<"ASTNamedDeclInterface",
+      [ASTDeclInterface]> {
+    let methods = [
+      InterfaceMethod<"", "clang::DeclarationName", "getDeclName", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->getDeclName();
+        }]
+      >,
+      InterfaceMethod<"", "llvm::StringRef", "getName", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->getName();
+        }]
+      >
+    ];
+  }
+
+  def ASTValueDeclInterface : AttrInterface<"ASTValueDeclInterface",
+      [ASTNamedDeclInterface]>;
+
+  def ASTDeclaratorDeclInterface : AttrInterface<"ASTDeclaratorDeclInterface",
+      [ASTValueDeclInterface]>;
+
+  def ASTVarDeclInterface : AttrInterface<"ASTVarDeclInterface",
+      [ASTDeclaratorDeclInterface]> {
+    let methods = [
+      InterfaceMethod<"", "void", "mangleDynamicInitializer", (ins "llvm::raw_ostream&":$Out), [{}],
+        /*defaultImplementation=*/ [{
+          std::unique_ptr<clang::MangleContext> MangleCtx(
+            $_attr.getAst()->getASTContext().createMangleContext());
+            MangleCtx->mangleDynamicInitializer($_attr.getAst(), Out);
+          }]
+      >,
+      InterfaceMethod<"", "clang::VarDecl::TLSKind", "getTLSKind", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->getTLSKind();
+        }]
+      >
+    ];
+  }
+
+  def ASTFunctionDeclInterface : AttrInterface<"ASTFunctionDeclInterface",
+      [ASTDeclaratorDeclInterface]> {
+    let methods = [
+      InterfaceMethod<"", "bool", "isOverloadedOperator", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->isOverloadedOperator();
+        }]
+      >,
+      InterfaceMethod<"", "bool", "isStatic", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->isStatic();
+        }]
+      >
+    ];
+  }
+
+  def ASTCXXMethodDeclInterface : AttrInterface<"ASTCXXMethodDeclInterface",
+      [ASTFunctionDeclInterface]> {
+    let methods = [
+      InterfaceMethod<"", "bool", "isCopyAssignmentOperator", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          if (auto decl = dyn_cast<clang::CXXMethodDecl>($_attr.getAst()))
+            return decl->isCopyAssignmentOperator();
+          return false;
+        }]
+      >,
+      InterfaceMethod<"", "bool", "isMoveAssignmentOperator", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          if (auto decl = dyn_cast<clang::CXXMethodDecl>($_attr.getAst()))
+            return decl->isMoveAssignmentOperator();
+          return false;
+        }]
+      >,
+      InterfaceMethod<"", "bool", "isConst", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->isConst();
+      }]
+      >
+    ];
+  }
+
+  def ASTCXXConstructorDeclInterface : AttrInterface<"ASTCXXConstructorDeclInterface",
+      [ASTCXXMethodDeclInterface]> {
+    let methods = [
+      InterfaceMethod<"", "bool", "isDefaultConstructor", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->isDefaultConstructor();
+        }]
+      >,
+      InterfaceMethod<"", "bool", "isCopyConstructor", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->isCopyConstructor();
+        }]
+      >
+    ];
+  }
+
+  def ASTCXXConversionDeclInterface : AttrInterface<"ASTCXXConversionDeclInterface",
+      [ASTCXXMethodDeclInterface]>;
+
+  def ASTCXXDestructorDeclInterface : AttrInterface<"ASTCXXDestructorDeclInterface",
+      [ASTCXXMethodDeclInterface]>;
+
+  def ASTTypeDeclInterface : AttrInterface<"ASTTypeDeclInterface",
+      [ASTNamedDeclInterface]>;
+
+  def ASTTagDeclInterface : AttrInterface<"ASTTagDeclInterface",
+      [ASTTypeDeclInterface]> {
+    let methods = [
+      InterfaceMethod<"", "clang::TagTypeKind", "getTagKind", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->getTagKind();
+        }]
+      >
+    ];
+  }
+
+  def ASTRecordDeclInterface : AttrInterface<"ASTRecordDeclInterface",
+      [ASTTagDeclInterface]> {
+    let methods = [
+      InterfaceMethod<"", "bool", "isLambda", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          if (auto ast = clang::dyn_cast<clang::CXXRecordDecl>($_attr.getAst()))
+            return ast->isLambda();
+          return false;
+        }]
+      >,
+      InterfaceMethod<"", "bool", "hasPromiseType", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          if (!clang::isa<clang::ClassTemplateSpecializationDecl>($_attr.getAst()))
+            return false;
+          for (const auto *sub : $_attr.getAst()->decls()) {
+            if (auto subRec = clang::dyn_cast<clang::CXXRecordDecl>(sub)) {
+              if (subRec->getDeclName().isIdentifier() &&
+                  subRec->getName() == "promise_type") {
+                return true;
+              }
+            }
+          }
+          return false;
+        }]
+      >,
+      InterfaceMethod<"", "bool", "isInStdNamespace", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->getDeclContext()->isStdNamespace();
+        }]
+      >,
+      // Note: `getRawDecl` is useful for debugging because it allows dumping
+      // the RecordDecl - it should not be used in regular code.
+      InterfaceMethod<"", "const clang::RecordDecl *", "getRawDecl", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst();
+        }]
+      >
+    ];
+  }
+
+  def AnyASTFunctionDeclAttr : Attr<
+      CPred<"::mlir::isa<::mlir::cir::ASTFunctionDeclInterface>($_self)">,
+            "AST Function attribute"> {
+    let storageType = "::mlir::Attribute";
+    let returnType = "::mlir::Attribute";
+    let convertFromStorage = "$_self";
+    let constBuilderCall = "$0";
+  }
+
+  def ASTExprInterface : AttrInterface<"ASTExprInterface"> {}
+
+  def ASTCallExprInterface : AttrInterface<"ASTCallExprInterface",
+      [ASTExprInterface]> {
+    let methods = [
+      InterfaceMethod<"", "bool", "isCalleeInStdNamespace",
+                      (ins), [{}], /*defaultImplementation=*/ [{
+          // Check that the entity being called is in standard
+          // "std" namespace.
+          auto callee = $_attr.getAst()->getCallee();
+          if (!callee)
+            return false;
+          auto *ice = dyn_cast<clang::ImplicitCastExpr>(callee);
+          if (!ice)
+            return false;
+
+          auto *dre = dyn_cast_or_null<clang::DeclRefExpr>(ice->getSubExpr());
+          if (!dre)
+            return false;
+          auto qual = dre->getQualifier();
+          if (!qual)
+            return false;
+
+          // FIXME: should we check NamespaceAlias as well?
+          auto nqual = qual->getAsNamespace();
+          if (!nqual || !nqual->getIdentifier() ||
+              nqual->getName().compare("std") != 0)
+            return false;
+
+          return true;
+        }]
+      >,
+      InterfaceMethod<"", "bool", "isStdFunctionCall",
+                      (ins "llvm::StringRef":$fn),
+                      [{}], /*defaultImplementation=*/ [{
+          if (!isCalleeInStdNamespace())
+            return false;
+          auto fnDecl = $_attr.getAst()->getDirectCallee();
+          if (!fnDecl)
+            return false;
+          // We're looking for `std::<name>`.
+          if (!fnDecl->getIdentifier() ||
+              fnDecl->getName().compare(fn) != 0)
+            return false;
+          return true;
+        }]
+      >,
+      InterfaceMethod<"", "bool", "isMemberCallTo",
+                      (ins "llvm::StringRef":$fn),
+                      [{}], /*defaultImplementation=*/ [{
+          auto memberCall = dyn_cast<clang::CXXMemberCallExpr>($_attr.getAst());
+          if (!memberCall)
+            return false;
+          auto methodDecl = memberCall->getMethodDecl();
+          if (!methodDecl)
+            return false;
+          if (!methodDecl->getIdentifier() ||
+              methodDecl->getName().compare(fn) != 0)
+            return false;
+          return true;
+        }]
+      >,
+      InterfaceMethod<"", "bool", "isIteratorBeginCall",
+                      (ins),
+                      [{}], /*defaultImplementation=*/ [{
+          return isMemberCallTo("begin");
+        }]
+      >,
+      InterfaceMethod<"", "bool", "isIteratorEndCall",
+                      (ins),
+                      [{}], /*defaultImplementation=*/ [{
+          return isMemberCallTo("end");
+        }]
+      >
+    ];
+
+  }
+
+
+} // namespace mlir::cir
+
+#endif // MLIR_CIR_INTERFACES_AST_ATTR_INTERFACES
diff --git a/clang/include/clang/CIR/Interfaces/CIRFPTypeInterface.h b/clang/include/clang/CIR/Interfaces/CIRFPTypeInterface.h
new file mode 100644
index 000000000000..b2d75d40496f
--- /dev/null
+++ b/clang/include/clang/CIR/Interfaces/CIRFPTypeInterface.h
@@ -0,0 +1,22 @@
+//===- CIRFPTypeInterface.h - Interface for CIR FP types -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// Defines the interface to generically handle CIR floating-point types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_INTERFACES_CIR_CIR_FPTYPEINTERFACE_H
+#define CLANG_INTERFACES_CIR_CIR_FPTYPEINTERFACE_H
+
+#include "mlir/IR/Types.h"
+#include "llvm/ADT/APFloat.h"
+
+/// Include the tablegen'd interface declarations.
+#include "clang/CIR/Interfaces/CIRFPTypeInterface.h.inc"
+
+#endif // CLANG_INTERFACES_CIR_CIR_FPTYPEINTERFACE_H
diff --git a/clang/include/clang/CIR/Interfaces/CIRFPTypeInterface.td b/clang/include/clang/CIR/Interfaces/CIRFPTypeInterface.td
new file mode 100644
index 000000000000..7438c8be52d9
--- /dev/null
+++ b/clang/include/clang/CIR/Interfaces/CIRFPTypeInterface.td
@@ -0,0 +1,52 @@
+//===- CIRFPTypeInterface.td - CIR FP Interface Definitions -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CIR_INTERFACES_CIR_FP_TYPE_INTERFACE
+#define MLIR_CIR_INTERFACES_CIR_FP_TYPE_INTERFACE
+
+include "mlir/IR/OpBase.td"
+
+def CIRFPTypeInterface : TypeInterface<"CIRFPTypeInterface"> {
+  let description = [{
+    Contains helper functions to query properties about a floating-point type.
+  }];
+  let cppNamespace = "::mlir::cir";
+
+  let methods = [
+    InterfaceMethod<[{
+        Returns the bit width of this floating-point type.
+      }],
+      /*retTy=*/"unsigned",
+      /*methodName=*/"getWidth",
+      /*args=*/(ins),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+          return llvm::APFloat::semanticsSizeInBits($_type.getFloatSemantics());
+        }]
+    >,
+    InterfaceMethod<[{
+        Return the mantissa width.
+      }],
+      /*retTy=*/"unsigned",
+      /*methodName=*/"getFPMantissaWidth",
+      /*args=*/(ins),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+          return llvm::APFloat::semanticsPrecision($_type.getFloatSemantics());
+        }]
+    >,
+    InterfaceMethod<[{
+        Return the float semantics of this floating-point type.
+      }],
+      /*retTy=*/"const llvm::fltSemantics &",
+      /*methodName=*/"getFloatSemantics"
+    >,
+  ];
+}
+
+#endif // MLIR_CIR_INTERFACES_CIR_FP_TYPE_INTERFACE
diff --git a/clang/include/clang/CIR/Interfaces/CIRLoopOpInterface.h b/clang/include/clang/CIR/Interfaces/CIRLoopOpInterface.h
new file mode 100644
index 000000000000..2e8a0c8e8a94
--- /dev/null
+++ b/clang/include/clang/CIR/Interfaces/CIRLoopOpInterface.h
@@ -0,0 +1,36 @@
+//===- CIRLoopOpInterface.h - Interface for CIR loop-like ops --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// Defines the interface to generically handle CIR loop operations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_INTERFACES_CIR_CIRLOOPOPINTERFACE_H_
+#define CLANG_INTERFACES_CIR_CIRLOOPOPINTERFACE_H_
+
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Interfaces/LoopLikeInterface.h"
+
+namespace mlir {
+namespace cir {
+namespace detail {
+
+/// Verify invariants of the LoopOpInterface.
+::mlir::LogicalResult verifyLoopOpInterface(::mlir::Operation *op);
+
+} // namespace detail
+} // namespace cir
+} // namespace mlir
+
+/// Include the tablegen'd interface declarations.
+#include "clang/CIR/Interfaces/CIRLoopOpInterface.h.inc"
+
+#endif // CLANG_INTERFACES_CIR_CIRLOOPOPINTERFACE_H_
diff --git a/clang/include/clang/CIR/Interfaces/CIRLoopOpInterface.td b/clang/include/clang/CIR/Interfaces/CIRLoopOpInterface.td
new file mode 100644
index 000000000000..bac30dac3d82
--- /dev/null
+++ b/clang/include/clang/CIR/Interfaces/CIRLoopOpInterface.td
@@ -0,0 +1,99 @@
+//===- CIRLoopOpInterface.td - Interface for CIR loop-like ops -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef CLANG_CIR_INTERFACES_CIRLOOPOPINTERFACE
+#define CLANG_CIR_INTERFACES_CIRLOOPOPINTERFACE
+
+include "mlir/IR/OpBase.td"
+include "mlir/Interfaces/ControlFlowInterfaces.td"
+include "mlir/Interfaces/LoopLikeInterface.td"
+
+def LoopOpInterface : OpInterface<"LoopOpInterface", [
+  DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+  DeclareOpInterfaceMethods<LoopLikeOpInterface>
+]> {
+  let description = [{
+    Contains helper functions to query properties and perform transformations
+    on a loop.
+  }];
+  let cppNamespace = "::mlir::cir";
+
+  let methods = [
+    InterfaceMethod<[{
+        Returns the loop's conditional region.
+      }],
+      /*retTy=*/"mlir::Region &",
+      /*methodName=*/"getCond"
+    >,
+    InterfaceMethod<[{
+        Returns the loop's body region.
+      }],
+      /*retTy=*/"mlir::Region &",
+      /*methodName=*/"getBody"
+    >,
+    InterfaceMethod<[{
+        Returns a pointer to the loop's step region or nullptr.
+      }],
+      /*retTy=*/"mlir::Region *",
+      /*methodName=*/"maybeGetStep",
+      /*args=*/(ins),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/"return nullptr;"
+    >,
+    InterfaceMethod<[{
+        Returns the first region to be executed in the loop.
+      }],
+      /*retTy=*/"mlir::Region &",
+      /*methodName=*/"getEntry",
+      /*args=*/(ins),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/"return $_op.getCond();"
+    >,
+    InterfaceMethod<[{
+        Returns a list of regions in order of execution.
+      }],
+      /*retTy=*/"llvm::SmallVector<mlir::Region *>",
+      /*methodName=*/"getRegionsInExecutionOrder",
+      /*args=*/(ins),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        return llvm::SmallVector<mlir::Region *, 2>{&$_op.getRegion(0), &$_op.getRegion(1)};
+      }]
+    >,
+    InterfaceMethod<[{
+        Recursively walks the body of the loop in pre-order while skipping
+        nested loops and executing a callback on every other operation.
+      }],
+      /*retTy=*/"mlir::WalkResult",
+      /*methodName=*/"walkBodySkippingNestedLoops",
+      /*args=*/(ins "::llvm::function_ref<mlir::WalkResult (Operation *)>":$callback),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        return $_op.getBody().template walk<WalkOrder::PreOrder>([&](Operation *op) {
+          if (isa<LoopOpInterface>(op))
+            return mlir::WalkResult::skip();
+          return callback(op);
+        });
+      }]
+    >
+  ];
+
+  let extraClassDeclaration = [{
+    /// Generic method to retrieve the successors of a LoopOpInterface operation.
+    static void getLoopOpSuccessorRegions(
+        ::mlir::cir::LoopOpInterface op, ::mlir::RegionBranchPoint point,
+        ::mlir::SmallVectorImpl<::mlir::RegionSuccessor> &regions);
+  }];
+
+  let verify = [{
+    /// Verify invariants of the LoopOpInterface.
+    return detail::verifyLoopOpInterface($_op);
+  }];
+}
+
+#endif // CLANG_CIR_INTERFACES_CIRLOOPOPINTERFACE
diff --git a/clang/include/clang/CIR/Interfaces/CIROpInterfaces.h b/clang/include/clang/CIR/Interfaces/CIROpInterfaces.h
new file mode 100644
index 000000000000..2cd4d9e42524
--- /dev/null
+++ b/clang/include/clang/CIR/Interfaces/CIROpInterfaces.h
@@ -0,0 +1,33 @@
+//===- CIROpInterfaces.h - CIR Op Interfaces --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_CIR_OP_H_
+#define MLIR_INTERFACES_CIR_OP_H_
+
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Interfaces/CallInterfaces.h"
+
+#include "clang/AST/Attr.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/Mangle.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+
+namespace mlir {
+namespace cir {} // namespace cir
+} // namespace mlir
+
+/// Include the generated interface declarations.
+#include "clang/CIR/Interfaces/CIROpInterfaces.h.inc"
+
+namespace mlir {
+namespace cir {} // namespace cir
+} // namespace mlir
+
+#endif // MLIR_INTERFACES_CIR_OP_H_
diff --git a/clang/include/clang/CIR/Interfaces/CIROpInterfaces.td b/clang/include/clang/CIR/Interfaces/CIROpInterfaces.td
new file mode 100644
index 000000000000..fd9c20687c3a
--- /dev/null
+++ b/clang/include/clang/CIR/Interfaces/CIROpInterfaces.td
@@ -0,0 +1,98 @@
+//===- CIROpInterfaces.td - CIR Op Interface Definitions --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CIR_OP_INTERFACES
+#define MLIR_CIR_OP_INTERFACES
+
+include "mlir/IR/OpBase.td"
+include "mlir/IR/SymbolInterfaces.td"
+include "mlir/Interfaces/CallInterfaces.td"
+
+let cppNamespace = "::mlir::cir" in {
+  // The CIRCallOpInterface must be used instead of CallOpInterface when looking
+  // at arguments and other bits of CallOp. This creates a level of abstraction
+  // that's useful for handling indirect calls and other details.
+  def CIRCallOpInterface
+      : OpInterface<"CIRCallOpInterface", [CallOpInterface]> {
+    let methods = [
+      InterfaceMethod<"", "mlir::Operation::operand_iterator",
+                      "arg_operand_begin", (ins)>,
+      InterfaceMethod<"", "mlir::Operation::operand_iterator",
+                      "arg_operand_end", (ins)>,
+      InterfaceMethod<
+          "Return the operand at index 'i', accounts for indirect call or "
+          "exception info",
+          "mlir::Value", "getArgOperand",
+          (ins "unsigned"
+           : $i)>,
+      InterfaceMethod<
+          "Return the number of operands, accounts for indirect call or "
+          "exception info",
+          "unsigned", "getNumArgOperands", (ins)>,
+      InterfaceMethod<
+          "Return the calling convention of the call operation",
+          "mlir::cir::CallingConv", "getCallingConv", (ins)>,
+    ];
+  }
+
+  def CIRGlobalValueInterface
+      : OpInterface<"CIRGlobalValueInterface", [Symbol]> {
+
+    let methods = [
+      InterfaceMethod<"",
+      "bool", "hasAvailableExternallyLinkage", (ins), [{}],
+      /*defaultImplementation=*/[{ return false; }]
+      >,
+      InterfaceMethod<"",
+      "bool", "hasLocalLinkage", (ins), [{}],
+      /*defaultImplementation=*/[{ 
+        return mlir::cir::isLocalLinkage($_op.getLinkage()); 
+      }]
+      >,
+      InterfaceMethod<"",
+      "bool", "hasExternalWeakLinkage", (ins), [{}],
+      /*defaultImplementation=*/[{ 
+        return mlir::cir::isExternalWeakLinkage($_op.getLinkage()); 
+      }]
+      >,
+      InterfaceMethod<"",
+      "bool", "isExternalLinkage", (ins), [{}],
+      /*defaultImplementation=*/[{ 
+        return mlir::cir::isExternalLinkage($_op.getLinkage()); 
+      }]
+      >,
+      InterfaceMethod<"",
+      "bool", "isDeclarationForLinker", (ins), [{}],
+      /*defaultImplementation=*/[{
+        if ($_op.hasAvailableExternallyLinkage())
+          return true;
+        return $_op.isDeclaration();
+      }]
+      >,
+      InterfaceMethod<"",
+      "bool", "hasComdat", (ins), [{}],
+      /*defaultImplementation=*/[{
+        return $_op.getComdat();
+      }]
+      >,
+      InterfaceMethod<"",
+      "void", "setDSOLocal", (ins "bool":$val), [{}],
+      /*defaultImplementation=*/[{
+        $_op.setDsolocal(val);
+      }]
+      >,
+    ];
+    let extraClassDeclaration = [{ 
+    bool hasDefaultVisibility();  
+    bool canBenefitFromLocalAlias();
+  }];
+  }
+
+} // namespace mlir::cir
+
+#endif // MLIR_CIR_OP_INTERFACES
diff --git a/clang/include/clang/CIR/Interfaces/CMakeLists.txt b/clang/include/clang/CIR/Interfaces/CMakeLists.txt
new file mode 100644
index 000000000000..86fffa3f9307
--- /dev/null
+++ b/clang/include/clang/CIR/Interfaces/CMakeLists.txt
@@ -0,0 +1,34 @@
+# This replicates part of the add_mlir_interface cmake function from MLIR that
+# cannot be used here. This happens because it expects to be run inside MLIR
+# directory which is not the case for CIR (and also FIR, both have similar
+# workarounds).
+
+# Declare a dialect in the include directory
+function(add_clang_mlir_attr_interface interface)
+  set(LLVM_TARGET_DEFINITIONS ${interface}.td)
+  mlir_tablegen(${interface}.h.inc -gen-attr-interface-decls)
+  mlir_tablegen(${interface}.cpp.inc -gen-attr-interface-defs)
+  add_public_tablegen_target(MLIRCIR${interface}IncGen)
+  add_dependencies(mlir-generic-headers MLIRCIR${interface}IncGen)
+endfunction()
+
+function(add_clang_mlir_op_interface interface)
+  set(LLVM_TARGET_DEFINITIONS ${interface}.td)
+  mlir_tablegen(${interface}.h.inc -gen-op-interface-decls)
+  mlir_tablegen(${interface}.cpp.inc -gen-op-interface-defs)
+  add_public_tablegen_target(MLIR${interface}IncGen)
+  add_dependencies(mlir-generic-headers MLIR${interface}IncGen)
+endfunction()
+
+function(add_clang_mlir_type_interface interface)
+  set(LLVM_TARGET_DEFINITIONS ${interface}.td)
+  mlir_tablegen(${interface}.h.inc -gen-type-interface-decls)
+  mlir_tablegen(${interface}.cpp.inc -gen-type-interface-defs)
+  add_public_tablegen_target(MLIR${interface}IncGen)
+  add_dependencies(mlir-generic-headers MLIR${interface}IncGen)
+endfunction()
+
+add_clang_mlir_attr_interface(ASTAttrInterfaces)
+add_clang_mlir_op_interface(CIROpInterfaces)
+add_clang_mlir_op_interface(CIRLoopOpInterface)
+add_clang_mlir_type_interface(CIRFPTypeInterface)
diff --git a/clang/include/clang/CIR/LowerToLLVM.h b/clang/include/clang/CIR/LowerToLLVM.h
new file mode 100644
index 000000000000..88713bf6e07f
--- /dev/null
+++ b/clang/include/clang/CIR/LowerToLLVM.h
@@ -0,0 +1,48 @@
+//====- LowerToLLVM.h- Lowering from CIR to LLVM --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares an interface for converting CIR modules to LLVM IR.
+//
+//===----------------------------------------------------------------------===//
+#ifndef CLANG_CIR_LOWERTOLLVM_H
+#define CLANG_CIR_LOWERTOLLVM_H
+
+#include "mlir/Pass/Pass.h"
+
+#include <memory>
+
+namespace llvm {
+class LLVMContext;
+class Module;
+} // namespace llvm
+
+namespace mlir {
+class MLIRContext;
+class ModuleOp;
+} // namespace mlir
+
+namespace cir {
+
+namespace direct {
+std::unique_ptr<llvm::Module>
+lowerDirectlyFromCIRToLLVMIR(mlir::ModuleOp theModule,
+                             llvm::LLVMContext &llvmCtx,
+                             bool disableVerifier = false);
+}
+
+// Lower directly from pristine CIR to LLVMIR.
+std::unique_ptr<llvm::Module>
+lowerFromCIRToMLIRToLLVMIR(mlir::ModuleOp theModule,
+                           std::unique_ptr<mlir::MLIRContext> mlirCtx,
+                           llvm::LLVMContext &llvmCtx);
+
+mlir::ModuleOp lowerFromCIRToMLIR(mlir::ModuleOp theModule,
+                                  mlir::MLIRContext *mlirCtx);
+} // namespace cir
+
+#endif // CLANG_CIR_LOWERTOLLVM_H_
diff --git a/clang/include/clang/CIR/LowerToMLIR.h b/clang/include/clang/CIR/LowerToMLIR.h
new file mode 100644
index 000000000000..567deb7abc7d
--- /dev/null
+++ b/clang/include/clang/CIR/LowerToMLIR.h
@@ -0,0 +1,21 @@
+//====- LowerToMLIR.h- Lowering from CIR to MLIR --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares functions for lowering CIR modules to MLIR.
+//
+//===----------------------------------------------------------------------===//
+#ifndef CLANG_CIR_LOWERTOMLIR_H
+#define CLANG_CIR_LOWERTOMLIR_H
+
+namespace cir {
+
+void populateCIRLoopToSCFConversionPatterns(mlir::RewritePatternSet &patterns,
+                                            mlir::TypeConverter &converter);
+} // namespace cir
+
+#endif // CLANG_CIR_LOWERTOMLIR_H_
diff --git a/clang/include/clang/CIR/LoweringHelpers.h b/clang/include/clang/CIR/LoweringHelpers.h
new file mode 100644
index 000000000000..01b9b4301c3a
--- /dev/null
+++ b/clang/include/clang/CIR/LoweringHelpers.h
@@ -0,0 +1,43 @@
+//====- LoweringHelpers.h - Lowering helper functions ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares helper functions for lowering from CIR to LLVM or MLIR.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_CIR_LOWERINGHELPERS_H
+#define LLVM_CLANG_CIR_LOWERINGHELPERS_H
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+
+mlir::DenseElementsAttr
+convertStringAttrToDenseElementsAttr(mlir::cir::ConstArrayAttr attr,
+                                     mlir::Type type);
+
+template <typename StorageTy> StorageTy getZeroInitFromType(mlir::Type Ty);
+template <> mlir::APInt getZeroInitFromType(mlir::Type Ty);
+template <> mlir::APFloat getZeroInitFromType(mlir::Type Ty);
+
+mlir::Type getNestedTypeAndElemQuantity(mlir::Type Ty, unsigned &elemQuantity);
+
+template <typename AttrTy, typename StorageTy>
+void convertToDenseElementsAttrImpl(mlir::cir::ConstArrayAttr attr,
+                                    llvm::SmallVectorImpl<StorageTy> &values);
+
+template <typename AttrTy, typename StorageTy>
+mlir::DenseElementsAttr
+convertToDenseElementsAttr(mlir::cir::ConstArrayAttr attr,
+                           const llvm::SmallVectorImpl<int64_t> &dims,
+                           mlir::Type type);
+
+std::optional<mlir::Attribute>
+lowerConstArrayAttr(mlir::cir::ConstArrayAttr constArr,
+                    const mlir::TypeConverter *converter);
+#endif
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
new file mode 100644
index 000000000000..2577af98e5e5
--- /dev/null
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -0,0 +1,345 @@
+//===---- MissingFeatures.h - Checks for unimplemented features -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file introduces some helper classes to guard against features that
+// CIR dialect supports that we do not have and also do not have great ways to
+// assert against.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CIR_MISSINGFEATURES_H
+#define CLANG_CIR_MISSINGFEATURES_H
+
+namespace cir {
+
+struct MissingFeatures {
+  // TODO(CIR): Implement the CIRGenFunction::buildTypeCheck method that handles
+  // sanitizer related type check features
+  static bool buildTypeCheck() { return false; }
+  static bool tbaa() { return false; }
+  static bool cleanups() { return false; }
+  static bool emitNullabilityCheck() { return false; }
+
+  // GNU vectors are done, but other kinds of vectors haven't been implemented.
+  static bool scalableVectors() { return false; }
+  static bool vectorConstants() { return false; }
+
+  // Address space related
+  static bool addressSpace() { return false; }
+  static bool addressSpaceInGlobalVar() { return false; }
+
+  // Clang codegen options
+  static bool strictVTablePointers() { return false; }
+
+  // Unhandled global/linkage information.
+  static bool unnamedAddr() { return false; }
+  static bool setComdat() { return false; }
+  static bool setDSOLocal() { return false; }
+  static bool threadLocal() { return false; }
+  static bool setDLLStorageClass() { return false; }
+  static bool setDLLImportDLLExport() { return false; }
+  static bool setPartition() { return false; }
+  static bool setGlobalVisibility() { return false; }
+  static bool hiddenVisibility() { return false; }
+  static bool protectedVisibility() { return false; }
+  static bool addCompilerUsedGlobal() { return false; }
+  static bool supportIFuncAttr() { return false; }
+  static bool setDefaultVisibility() { return false; }
+  static bool addUsedOrCompilerUsedGlobal() { return false; }
+  static bool addUsedGlobal() { return false; }
+  static bool addSectionAttributes() { return false; }
+  static bool setSectionForFuncOp() { return false; }
+  static bool updateCPUAndFeaturesAttributes() { return false; }
+
+  // Sanitizers
+  static bool reportGlobalToASan() { return false; }
+  static bool emitAsanPrologueOrEpilogue() { return false; }
+  static bool emitCheckedInBoundsGEP() { return false; }
+  static bool pointerOverflowSanitizer() { return false; }
+  static bool sanitizeDtor() { return false; }
+  static bool sanitizeVLABound() { return false; }
+  static bool sanitizerBuiltin() { return false; }
+  static bool sanitizerReturn() { return false; }
+  static bool sanitizeOther() { return false; }
+
+  // ObjC
+  static bool setObjCGCLValueClass() { return false; }
+  static bool objCLifetime() { return false; }
+  static bool objCIvarDecls() { return false; }
+
+  // Debug info
+  static bool generateDebugInfo() { return false; }
+  static bool noDebugInfo() { return false; }
+
+  // LLVM Attributes
+  static bool setFunctionAttributes() { return false; }
+  static bool attributeBuiltin() { return false; }
+  static bool attributeNoBuiltin() { return false; }
+  static bool parameterAttributes() { return false; }
+  static bool minLegalVectorWidthAttr() { return false; }
+  static bool vscaleRangeAttr() { return false; }
+
+  // Coroutines
+  static bool unhandledException() { return false; }
+
+  // Missing Emissions
+  static bool variablyModifiedTypeEmission() { return false; }
+  static bool buildLValueAlignmentAssumption() { return false; }
+  static bool buildDerivedToBaseCastForDevirt() { return false; }
+  static bool emitFunctionEpilog() { return false; }
+
+  // References related stuff
+  static bool ARC() { return false; } // Automatic reference counting
+
+  // Clang early optimizations or things defered to LLVM lowering.
+  static bool shouldUseBZeroPlusStoresToInitialize() { return false; }
+  static bool shouldUseMemSetToInitialize() { return false; }
+  static bool shouldSplitConstantStore() { return false; }
+  static bool shouldCreateMemCpyFromGlobal() { return false; }
+  static bool shouldReverseUnaryCondOnBoolExpr() { return false; }
+  static bool fieldMemcpyizerBuildMemcpy() { return false; }
+  static bool isTrivialAndisDefaultConstructor() { return false; }
+  static bool isMemcpyEquivalentSpecialMember() { return false; }
+  static bool constructABIArgDirectExtend() { return false; }
+  static bool mayHaveIntegerOverflow() { return false; }
+  static bool llvmLoweringPtrDiffConsidersPointee() { return false; }
+  static bool emitNullCheckForDeleteCalls() { return false; }
+
+  // Folding methods.
+  static bool foldBinOpFMF() { return false; }
+
+  // Fast math.
+  static bool fastMathGuard() { return false; }
+  // Should be implemented with a moduleOp level attribute and directly
+  // mapped to LLVM - those can be set directly for every relevant LLVM IR
+  // dialect operation (log10, ...).
+  static bool fastMathFlags() { return false; }
+  static bool fastMathFuncAttributes() { return false; }
+
+  // Exception handling
+  static bool isSEHTryScope() { return false; }
+  static bool ehStack() { return false; }
+  static bool emitStartEHSpec() { return false; }
+  static bool emitEndEHSpec() { return false; }
+  static bool simplifyCleanupEntry() { return false; }
+
+  // Type qualifiers.
+  static bool atomicTypes() { return false; }
+  static bool volatileTypes() { return false; }
+  static bool syncScopeID() { return false; }
+
+  // AArch64 Neon builtin related.
+  static bool buildNeonShiftVector() { return false; }
+
+  // Misc
+  static bool cacheRecordLayouts() { return false; }
+  static bool capturedByInit() { return false; }
+  static bool tryEmitAsConstant() { return false; }
+  static bool incrementProfileCounter() { return false; }
+  static bool createProfileWeightsForLoop() { return false; }
+  static bool getProfileCount() { return false; }
+  static bool emitCondLikelihoodViaExpectIntrinsic() { return false; }
+  static bool requiresReturnValueCheck() { return false; }
+  static bool shouldEmitLifetimeMarkers() { return false; }
+  static bool peepholeProtection() { return false; }
+  static bool CGCapturedStmtInfo() { return false; }
+  static bool CGFPOptionsRAII() { return false; }
+  static bool getFPFeaturesInEffect() { return false; }
+  static bool cxxABI() { return false; }
+  static bool openCL() { return false; }
+  static bool openCLBuiltinTypes() { return false; }
+  static bool CUDA() { return false; }
+  static bool openMP() { return false; }
+  static bool openMPRuntime() { return false; }
+  static bool openMPRegionInfo() { return false; }
+  static bool openMPTarget() { return false; }
+  static bool isVarArg() { return false; }
+  static bool setNonGC() { return false; }
+  static bool volatileLoadOrStore() { return false; }
+  static bool armComputeVolatileBitfields() { return false; }
+  static bool insertBuiltinUnpredictable() { return false; }
+  static bool createInvariantGroup() { return false; }
+  static bool createInvariantIntrinsic() { return false; }
+  static bool addAutoInitAnnotation() { return false; }
+  static bool addHeapAllocSiteMetadata() { return false; }
+  static bool loopInfoStack() { return false; }
+  static bool requiresCleanups() { return false; }
+  static bool constantFoldsToSimpleInteger() { return false; }
+  static bool checkFunctionCallABI() { return false; }
+  static bool zeroInitializer() { return false; }
+  static bool targetCodeGenInfoIsProtoCallVariadic() { return false; }
+  static bool targetCodeGenInfoGetNullPointer() { return false; }
+  static bool operandBundles() { return false; }
+  static bool exceptions() { return false; }
+  static bool metaDataNode() { return false; }
+  static bool emitDeclMetadata() { return false; }
+  static bool emitScalarRangeCheck() { return false; }
+  static bool stmtExprEvaluation() { return false; }
+  static bool setCallingConv() { return false; }
+  static bool tryMarkNoThrow() { return false; }
+  static bool indirectBranch() { return false; }
+  static bool escapedLocals() { return false; }
+  static bool deferredReplacements() { return false; }
+  static bool shouldInstrumentFunction() { return false; }
+  static bool xray() { return false; }
+  static bool buildConstrainedFPCall() { return false; }
+
+  // Inline assembly
+  static bool asmGoto() { return false; }
+  static bool asmUnwindClobber() { return false; }
+  static bool asmMemoryEffects() { return false; }
+  static bool asmVectorType() { return false; }
+  static bool asmLLVMAssume() { return false; }
+
+  // C++ ABI support
+  static bool handleBigEndian() { return false; }
+  static bool handleAArch64Indirect() { return false; }
+  static bool classifyArgumentTypeForAArch64() { return false; }
+  static bool supportgetCoerceToTypeForAArch64() { return false; }
+  static bool supportTySizeQueryForAArch64() { return false; }
+  static bool supportTyAlignQueryForAArch64() { return false; }
+  static bool supportisHomogeneousAggregateQueryForAArch64() { return false; }
+  static bool supportisEndianQueryForAArch64() { return false; }
+  static bool supportisAggregateTypeForABIAArch64() { return false; }
+
+  //===--- ABI lowering --===//
+
+  //-- Missing AST queries
+
+  static bool CXXRecordDeclIsEmptyCXX11() { return false; }
+  static bool CXXRecordDeclIsPOD() { return false; }
+  static bool CXXRecordIsDynamicClass() { return false; }
+  static bool astContextGetExternalSource() { return false; }
+  static bool declGetMaxAlignment() { return false; }
+  static bool declHasAlignMac68kAttr() { return false; }
+  static bool declHasAlignNaturalAttr() { return false; }
+  static bool declHasMaxFieldAlignmentAttr() { return false; }
+  static bool fieldDeclIsBitfield() { return false; }
+  static bool fieldDeclIsPotentiallyOverlapping() { return false; }
+  static bool fieldDeclGetMaxFieldAlignment() { return false; }
+  static bool fieldDeclisUnnamedBitField() { return false; }
+  static bool funcDeclIsCXXConstructorDecl() { return false; }
+  static bool funcDeclIsCXXDestructorDecl() { return false; }
+  static bool funcDeclIsCXXMethodDecl() { return false; }
+  static bool funcDeclIsInlineBuiltinDeclaration() { return false; }
+  static bool funcDeclIsReplaceableGlobalAllocationFunction() { return false; }
+  static bool isCXXRecordDecl() { return false; }
+  static bool qualTypeIsReferenceType() { return false; }
+  static bool recordDeclCanPassInRegisters() { return false; }
+  static bool recordDeclHasAlignmentAttr() { return false; }
+  static bool recordDeclHasFlexibleArrayMember() { return false; }
+  static bool recordDeclIsCXXDecl() { return false; }
+  static bool recordDeclIsMSStruct() { return false; }
+  static bool recordDeclIsPacked() { return false; }
+  static bool recordDeclMayInsertExtraPadding() { return false; }
+  static bool typeGetAsBuiltinType() { return false; }
+  static bool typeGetAsEnumType() { return false; }
+  static bool typeIsCXXRecordDecl() { return false; }
+  static bool typeIsScalableType() { return false; }
+  static bool typeIsSized() { return false; }
+  static bool varDeclIsKNRPromoted() { return false; }
+
+  // We need to track parent (base) classes to determine the layout of a class.
+  static bool getCXXRecordBases() { return false; }
+
+  //-- Missing types
+
+  static bool fixedWidthIntegers() { return false; }
+  static bool vectorType() { return false; }
+  static bool functionMemberPointerType() { return false; }
+  static bool fixedSizeIntType() { return false; }
+
+  //-- Missing LLVM attributes
+
+  static bool noReturn() { return false; }
+  static bool csmeCall() { return false; }
+  static bool undef() { return false; }
+  static bool noFPClass() { return false; }
+  static bool llvmIntrinsicElementTypeSupport() { return false; }
+
+  //-- Other missing features
+
+  // We need to track the parent record types that represent a field
+  // declaration. This is necessary to determine the layout of a class.
+  static bool fieldDeclAbstraction() { return false; }
+
+  // There are some padding diagnostic features for Itanium ABI that we might
+  // wanna add later.
+  static bool bitFieldPaddingDiagnostics() { return false; }
+
+  // Clang considers both enums and records as tag types. We don't have a way to
+  // transparently handle both these types yet. Might need an interface here.
+  static bool tagTypeClassAbstraction() { return false; }
+
+  // Empty values might be passed as arguments to serve as padding, ensuring
+  // alignment and compliance (e.g. MIPS). We do not yet support this.
+  static bool argumentPadding() { return false; }
+
+  // Clang has evaluation kinds which determines how code is emitted for certain
+  // group of type classes. We don't have a way to identify type classes.
+  static bool evaluationKind() { return false; }
+
+  // Calls with a static chain pointer argument may be optimized (p.e. freeing
+  // up argument registers), but we do not yet track such cases.
+  static bool chainCall() { return false; }
+
+  // ARM-specific feature that can be specified as a function attribute in C.
+  static bool cmseNonSecureCallAttr() { return false; }
+
+  // ABI-lowering has special handling for regcall calling convention (tries to
+  // pass every argument in regs). We don't support it just yet.
+  static bool regCall() { return false; }
+
+  // Some ABIs (e.g. x86) require special handling for returning large structs
+  // by value. The sret argument parameter aids in this, but it is current NYI.
+  static bool sretArgs() { return false; }
+
+  // Inalloca parameter attributes are mostly used for Windows x86_32 ABI. We
+  // do not yet support this yet.
+  static bool inallocaArgs() { return false; }
+
+  // Parameters may have additional attributes (e.g. [[noescape]]) that affect
+  // the compiler. This is not yet supported in CIR.
+  static bool extParamInfo() { return false; }
+
+  // LangOpts may affect lowering, but we do not carry this information into CIR
+  // just yet. Right now, it only instantiates the default lang options.
+  static bool langOpts() { return false; }
+
+  // Several type qualifiers are not yet supported in CIR, but important when
+  // evaluating ABI-specific lowering.
+  static bool qualifiedTypes() { return false; }
+
+  // We're ignoring several details regarding ABI-handling for Swift.
+  static bool swift() { return false; }
+
+  // The AppleARM64 is using ItaniumCXXABI, which is not quite right.
+  static bool appleArm64CXXABI() { return false; }
+
+  // Despite carrying some information about variadics, we are currently
+  // ignoring this to focus only on the code necessary to lower non-variadics.
+  static bool variadicFunctions() { return false; }
+
+  // If a store op is guaranteed to execute before the retun value load op, we
+  // can optimize away the store and load ops. Seems like an early optimization.
+  static bool returnValueDominatingStoreOptmiization() { return false; }
+
+  // Globals (vars and functions) may have attributes that are target depedent.
+  static bool setTargetAttributes() { return false; }
+
+  // CIR modules parsed from text form may not carry the triple or data layout
+  // specs. We should make it always present.
+  static bool makeTripleAlwaysPresent() { return false; }
+
+  // This Itanium bit is currently being skipped in cir.
+  static bool itaniumRecordLayoutBuilderFinishLayout() { return false; }
+};
+
+} // namespace cir
+
+#endif // CLANG_CIR_MISSINGFEATURES_H
diff --git a/clang/include/clang/CIR/Passes.h b/clang/include/clang/CIR/Passes.h
new file mode 100644
index 000000000000..6b1d2fdc75c4
--- /dev/null
+++ b/clang/include/clang/CIR/Passes.h
@@ -0,0 +1,38 @@
+//===- Passes.h - CIR Passes Definition -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes the entry points to create compiler passes for ClangIR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CIR_PASSES_H
+#define CLANG_CIR_PASSES_H
+
+#include "mlir/Pass/Pass.h"
+
+#include <memory>
+
+namespace cir {
+/// Create a pass for lowering from MLIR builtin dialects such as `Affine` and
+/// `Std`, to the LLVM dialect for codegen.
+std::unique_ptr<mlir::Pass> createConvertMLIRToLLVMPass();
+
+/// Create a pass that fully lowers CIR to the MLIR in-tree dialects.
+std::unique_ptr<mlir::Pass> createConvertCIRToMLIRPass();
+
+namespace direct {
+/// Create a pass that fully lowers CIR to the LLVMIR dialect.
+std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass();
+
+/// Adds passes that fully lower CIR to the LLVMIR dialect.
+void populateCIRToLLVMPasses(mlir::OpPassManager &pm);
+
+} // namespace direct
+} // end namespace cir
+
+#endif // CLANG_CIR_PASSES_H
diff --git a/clang/include/clang/CIR/Target/AArch64.h b/clang/include/clang/CIR/Target/AArch64.h
new file mode 100644
index 000000000000..0788cab1fa71
--- /dev/null
+++ b/clang/include/clang/CIR/Target/AArch64.h
@@ -0,0 +1,17 @@
+
+#ifndef CIR_AAARCH64_H
+#define CIR_AAARCH64_H
+
+namespace cir {
+
+/// The ABI kind for AArch64 targets.
+enum class AArch64ABIKind {
+  AAPCS = 0,
+  DarwinPCS,
+  Win64,
+  AAPCSSoft,
+};
+
+} // namespace cir
+
+#endif // CIR_AAARCH64_H
diff --git a/clang/include/clang/CIR/Target/x86.h b/clang/include/clang/CIR/Target/x86.h
new file mode 100644
index 000000000000..08c6cae7b94f
--- /dev/null
+++ b/clang/include/clang/CIR/Target/x86.h
@@ -0,0 +1,39 @@
+//==-- x86.h - Definitions common to all x86 ABI variants ------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Definitions common to any X86 ABI implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CIR_X86_H
+#define CIR_X86_H
+
+namespace cir {
+
+/// The AVX ABI level for X86 targets.
+enum class X86AVXABILevel {
+  None,
+  AVX,
+  AVX512,
+};
+
+// Possible argument classifications according to the x86 ABI documentation.
+enum X86ArgClass {
+  Integer = 0,
+  SSE,
+  SSEUp,
+  X87,
+  X87Up,
+  ComplexX87,
+  NoClass,
+  Memory
+};
+
+} // namespace cir
+
+#endif // CIR_X86_H
diff --git a/clang/include/clang/CIR/TypeEvaluationKind.h b/clang/include/clang/CIR/TypeEvaluationKind.h
new file mode 100644
index 000000000000..4926727dae40
--- /dev/null
+++ b/clang/include/clang/CIR/TypeEvaluationKind.h
@@ -0,0 +1,12 @@
+#ifndef CLANG_CIR_TYPEEVALUATIONKIND_H
+#define CLANG_CIR_TYPEEVALUATIONKIND_H
+
+namespace cir {
+
+// FIXME: for now we are reusing this from lib/Clang/CIRGenFunction.h, which
+// isn't available in the include dir. Same for getEvaluationKind below.
+enum TypeEvaluationKind { TEK_Scalar, TEK_Complex, TEK_Aggregate };
+
+} // namespace cir
+
+#endif // CLANG_CIR_TYPEEVALUATIONKIND_H
diff --git a/clang/include/clang/CIRFrontendAction/.clang-tidy b/clang/include/clang/CIRFrontendAction/.clang-tidy
new file mode 100644
index 000000000000..ffcc0a2903fe
--- /dev/null
+++ b/clang/include/clang/CIRFrontendAction/.clang-tidy
@@ -0,0 +1,52 @@
+InheritParentConfig: true
+Checks: >
+        -misc-const-correctness,
+        bugprone-argument-comment,
+        bugprone-assert-side-effect,
+        bugprone-branch-clone,
+        bugprone-copy-constructor-init,
+        bugprone-dangling-handle,
+        bugprone-dynamic-static-initializers,
+        bugprone-macro-parentheses,
+        bugprone-macro-repeated-side-effects,
+        bugprone-misplaced-widening-cast,
+        bugprone-move-forwarding-reference,
+        bugprone-multiple-statement-macro,
+        bugprone-suspicious-semicolon,
+        bugprone-swapped-arguments,
+        bugprone-terminating-continue,
+        bugprone-unused-raii,
+        bugprone-unused-return-value,
+        misc-redundant-expression,
+        misc-static-assert,
+        misc-unused-using-decls,
+        modernize-use-bool-literals,
+        modernize-loop-convert,
+        modernize-make-unique,
+        modernize-raw-string-literal,
+        modernize-use-equals-default,
+        modernize-use-default-member-init,
+        modernize-use-emplace,
+        modernize-use-nullptr,
+        modernize-use-override,
+        modernize-use-using,
+        performance-for-range-copy,
+        performance-implicit-conversion-in-loop,
+        performance-inefficient-algorithm,
+        performance-inefficient-vector-operation,
+        performance-move-const-arg,
+        performance-no-automatic-move,
+        performance-trivially-destructible,
+        performance-unnecessary-copy-initialization,
+        performance-unnecessary-value-param,
+        readability-avoid-const-params-in-decls,
+        readability-const-return-type,
+        readability-container-size-empty,
+        readability-identifier-naming,
+        readability-inconsistent-declaration-parameter-name,
+        readability-misleading-indentation,
+        readability-redundant-control-flow,
+        readability-redundant-smartptr-get,
+        readability-simplify-boolean-expr,
+        readability-simplify-subscript-expr,
+        readability-use-anyofallof
diff --git a/clang/include/clang/CIRFrontendAction/CIRGenAction.h b/clang/include/clang/CIRFrontendAction/CIRGenAction.h
new file mode 100644
index 000000000000..13c2a4381573
--- /dev/null
+++ b/clang/include/clang/CIRFrontendAction/CIRGenAction.h
@@ -0,0 +1,188 @@
+//===---- CIRGenAction.h - CIR Code Generation Frontend Action -*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_CIR_CIRGENACTION_H
+#define LLVM_CLANG_CIR_CIRGENACTION_H
+
+#include "clang/CodeGen/CodeGenAction.h"
+#include "clang/Frontend/FrontendAction.h"
+#include <memory>
+
+namespace llvm {
+class LLVMIRContext;
+class Module;
+} // namespace llvm
+
+namespace mlir {
+class MLIRContext;
+class ModuleOp;
+template <typename T> class OwningOpRef;
+} // namespace mlir
+
+namespace cir {
+class CIRGenConsumer;
+class CIRGenerator;
+
+class CIRGenAction : public clang::ASTFrontendAction {
+public:
+  enum class OutputType {
+    EmitAssembly,
+    EmitCIR,
+    EmitCIRFlat,
+    EmitLLVM,
+    EmitBC,
+    EmitMLIR,
+    EmitObj,
+    None
+  };
+
+private:
+  friend class CIRGenConsumer;
+
+  // TODO: this is redundant but just using the OwningModuleRef requires more of
+  // clang against MLIR. Hide this somewhere else.
+  std::unique_ptr<mlir::OwningOpRef<mlir::ModuleOp>> mlirModule;
+  std::unique_ptr<llvm::Module> llvmModule;
+
+  mlir::MLIRContext *mlirContext;
+
+  mlir::OwningOpRef<mlir::ModuleOp> loadModule(llvm::MemoryBufferRef mbRef);
+
+protected:
+  CIRGenAction(OutputType action, mlir::MLIRContext *_MLIRContext = nullptr);
+
+  std::unique_ptr<clang::ASTConsumer>
+  CreateASTConsumer(clang::CompilerInstance &CI,
+                    llvm::StringRef InFile) override;
+
+  void ExecuteAction() override;
+
+  void EndSourceFileAction() override;
+
+public:
+  ~CIRGenAction() override;
+
+  virtual bool hasCIRSupport() const override { return true; }
+
+  CIRGenConsumer *cgConsumer;
+  OutputType action;
+};
+
+class EmitCIRAction : public CIRGenAction {
+  virtual void anchor();
+
+public:
+  EmitCIRAction(mlir::MLIRContext *mlirCtx = nullptr);
+};
+
+class EmitCIRFlatAction : public CIRGenAction {
+  virtual void anchor();
+
+public:
+  EmitCIRFlatAction(mlir::MLIRContext *mlirCtx = nullptr);
+};
+
+class EmitCIROnlyAction : public CIRGenAction {
+  virtual void anchor();
+
+public:
+  EmitCIROnlyAction(mlir::MLIRContext *mlirCtx = nullptr);
+};
+
+class EmitMLIRAction : public CIRGenAction {
+  virtual void anchor();
+
+public:
+  EmitMLIRAction(mlir::MLIRContext *mlirCtx = nullptr);
+};
+
+class EmitLLVMAction : public CIRGenAction {
+  virtual void anchor();
+
+public:
+  EmitLLVMAction(mlir::MLIRContext *mlirCtx = nullptr);
+};
+
+class EmitBCAction : public CIRGenAction {
+  virtual void anchor();
+
+public:
+  EmitBCAction(mlir::MLIRContext *mlirCtx = nullptr);
+};
+
+class EmitAssemblyAction : public CIRGenAction {
+  virtual void anchor();
+
+public:
+  EmitAssemblyAction(mlir::MLIRContext *mlirCtx = nullptr);
+};
+
+class EmitObjAction : public CIRGenAction {
+  virtual void anchor();
+
+public:
+  EmitObjAction(mlir::MLIRContext *mlirCtx = nullptr);
+};
+
+// Used for -fclangir-analysis-only: use CIR analysis but still use original LLVM codegen path
+class AnalysisOnlyActionBase : public clang::CodeGenAction {
+  virtual void anchor();
+
+protected:
+  std::unique_ptr<clang::ASTConsumer>
+  CreateASTConsumer(clang::CompilerInstance &CI,
+                    llvm::StringRef InFile) override;
+
+  AnalysisOnlyActionBase(unsigned _Act,
+                         llvm::LLVMContext *_VMContext = nullptr);
+};
+
+class AnalysisOnlyAndEmitAssemblyAction : public AnalysisOnlyActionBase {
+  virtual void anchor() override;
+
+public:
+  AnalysisOnlyAndEmitAssemblyAction(llvm::LLVMContext *_VMContext = nullptr);
+};
+
+class AnalysisOnlyAndEmitBCAction : public AnalysisOnlyActionBase {
+  virtual void anchor() override;
+
+public:
+  AnalysisOnlyAndEmitBCAction(llvm::LLVMContext *_VMContext = nullptr);
+};
+
+class AnalysisOnlyAndEmitLLVMAction : public AnalysisOnlyActionBase {
+  virtual void anchor() override;
+
+public:
+  AnalysisOnlyAndEmitLLVMAction(llvm::LLVMContext *_VMContext = nullptr);
+};
+
+class AnalysisOnlyAndEmitLLVMOnlyAction : public AnalysisOnlyActionBase {
+  virtual void anchor() override;
+
+public:
+  AnalysisOnlyAndEmitLLVMOnlyAction(llvm::LLVMContext *_VMContext = nullptr);
+};
+
+class AnalysisOnlyAndEmitCodeGenOnlyAction : public AnalysisOnlyActionBase {
+  virtual void anchor() override;
+
+public:
+  AnalysisOnlyAndEmitCodeGenOnlyAction(llvm::LLVMContext *_VMContext = nullptr);
+};
+
+class AnalysisOnlyAndEmitObjAction : public AnalysisOnlyActionBase {
+  virtual void anchor() override;
+
+public:
+  AnalysisOnlyAndEmitObjAction(llvm::LLVMContext *_VMContext = nullptr);
+};
+} // namespace cir
+
+#endif
diff --git a/clang/include/clang/CIRFrontendAction/CIRGenConsumer.h b/clang/include/clang/CIRFrontendAction/CIRGenConsumer.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index bbf860aa491e..70b467346b9f 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2931,24 +2931,98 @@ def flimited_precision_EQ : Joined<["-"], "flimited-precision=">, Group<f_Group>
 def fapple_link_rtlib : Flag<["-"], "fapple-link-rtlib">, Group<f_Group>,
   HelpText<"Force linking the clang builtins runtime library">;
 
+def flto_EQ : Joined<["-"], "flto=">,
+  Visibility<[ClangOption, CLOption, CC1Option, FC1Option, FlangOption]>,
+  Group<f_Group>,
+  HelpText<"Set LTO mode">, Values<"thin,full">;
+def flto_EQ_auto : Flag<["-"], "flto=auto">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>,
+  Alias<flto_EQ>, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">;
+def flto_EQ_jobserver : Flag<["-"], "flto=jobserver">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>,
+  Alias<flto_EQ>, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">;
+
 /// ClangIR-specific options - BEGIN
 defm clangir : BoolFOption<"clangir",
   FrontendOpts<"UseClangIRPipeline">, DefaultFalse,
   PosFlag<SetTrue, [], [ClangOption, CC1Option], "Use the ClangIR pipeline to compile">,
   NegFlag<SetFalse, [], [ClangOption, CC1Option], "Use the AST -> LLVM pipeline to compile">,
   BothFlags<[], [ClangOption, CC1Option], "">>;
+def fclangir_disable_deferred_EQ : Joined<["-"], "fclangir-build-deferred-threshold=">,
+  Visibility<[ClangOption, CC1Option]>, Group<f_Group>,
+  HelpText<"ClangIR (internal): Control the recursion level for calls to buildDeferred (defaults to 500)">,
+  MarshallingInfoInt<CodeGenOpts<"ClangIRBuildDeferredThreshold">, "500u">;
+def fclangir_skip_system_headers : Joined<["-"], "fclangir-skip-system-headers">,
+  Visibility<[ClangOption, CC1Option]>, Group<f_Group>,
+  HelpText<"ClangIR (internal): buildDeferred skip functions defined in system headers">,
+  MarshallingInfoFlag<CodeGenOpts<"ClangIRSkipFunctionsFromSystemHeaders">>;
+def fclangir_lifetime_check_EQ : Joined<["-"], "fclangir-lifetime-check=">,
+  Visibility<[ClangOption, CC1Option]>, Group<f_Group>,
+  HelpText<"Run lifetime checker">,
+  MarshallingInfoString<FrontendOpts<"ClangIRLifetimeCheckOpts">>;
+def fclangir_lifetime_check : Flag<["-"], "fclangir-lifetime-check">,
+  Visibility<[ClangOption, CC1Option]>, Group<f_Group>,
+  Alias<fclangir_lifetime_check_EQ>, AliasArgs<["history=invalid,null"]>,
+  HelpText<"Run lifetime checker">;
+def fclangir_idiom_recognizer_EQ : Joined<["-"], "fclangir-idiom-recognizer=">,
+  Visibility<[ClangOption, CC1Option]>, Group<f_Group>,
+  HelpText<"Enable C/C++ idiom recognizer">,
+  MarshallingInfoString<FrontendOpts<"ClangIRIdiomRecognizerOpts">>;
+def fclangir_idiom_recognizer : Flag<["-"], "fclangir-idiom-recognizer">,
+  Visibility<[ClangOption, CC1Option]>, Group<f_Group>,
+  Alias<fclangir_idiom_recognizer_EQ>,
+  HelpText<"Enable C/C++ idiom recognizer">;
+def fclangir_lib_opt_EQ : Joined<["-"], "fclangir-lib-opt=">,
+  Visibility<[ClangOption, CC1Option]>, Group<f_Group>,
+  HelpText<"Enable C/C++ library based optimizations (with options)">,
+  MarshallingInfoString<FrontendOpts<"ClangIRLibOptOpts">>;
+def fclangir_lib_opt : Flag<["-"], "fclangir-lib-opt">,
+  Visibility<[ClangOption, CC1Option]>, Group<f_Group>,
+  Alias<fclangir_lib_opt_EQ>,
+  HelpText<"Enable C/C++ library based optimizations">;
+def fclangir_call_conv_lowering : Flag<["-"], "fclangir-call-conv-lowering">,
+  Visibility<[ClangOption, CC1Option]>, Group<f_Group>,
+  HelpText<"Enable ClangIR calling convention lowering">,
+  MarshallingInfoFlag<FrontendOpts<"ClangIREnableCallConvLowering">>;
+def fclangir_mem2reg : Flag<["-"], "fclangir-mem2reg">,
+  Visibility<[ClangOption, CC1Option]>, Group<f_Group>,
+  HelpText<"Enable mem2reg on the flat ClangIR">,
+  MarshallingInfoFlag<FrontendOpts<"ClangIREnableMem2Reg">>;
+
+def clangir_disable_passes : Flag<["-"], "clangir-disable-passes">,
+  Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Disable CIR transformations pipeline">,
+  MarshallingInfoFlag<FrontendOpts<"ClangIRDisablePasses">>;
+def clangir_disable_verifier : Flag<["-"], "clangir-disable-verifier">,
+  Visibility<[ClangOption, CC1Option]>,
+  HelpText<"ClangIR: Disable MLIR module verifier">,
+  MarshallingInfoFlag<FrontendOpts<"ClangIRDisableCIRVerifier">>;
+def clangir_disable_emit_cxx_default : Flag<["-"], "clangir-disable-emit-cxx-default">,
+  Visibility<[ClangOption, CC1Option]>,
+  HelpText<"ClangIR: Disable emission of c++ default (compiler implemented) methods.">,
+  MarshallingInfoFlag<FrontendOpts<"ClangIRDisableEmitCXXDefault">>;
+def clangir_verify_diagnostics : Flag<["-"], "clangir-verify-diagnostics">,
+  Visibility<[ClangOption, CC1Option]>,
+  HelpText<"ClangIR: Enable diagnostic verification in MLIR, similar to clang's -verify">,
+  MarshallingInfoFlag<FrontendOpts<"ClangIRVerifyDiags">>;
+defm clangir_direct_lowering : BoolFOption<"clangir-direct-lowering",
+  FrontendOpts<"ClangIRDirectLowering">, DefaultTrue,
+  PosFlag<SetTrue, [], [ClangOption, CC1Option], "Lower directly from ClangIR to LLVM">,
+  NegFlag<SetFalse, [], [ClangOption, CC1Option],  "Lower through MLIR to LLVM">>;
+defm clangir_analysis_only : BoolFOption<"clangir-analysis-only",
+  FrontendOpts<"ClangIRAnalysisOnly">, DefaultFalse,
+  PosFlag<SetTrue, [], [ClangOption, CC1Option],
+    "Enable CIR analysis but keep traditional LLVM codegen (not through CIR)">,
+  NegFlag<SetFalse, [], [ClangOption, CC1Option],  "">>;
+
 def emit_cir : Flag<["-"], "emit-cir">, Visibility<[CC1Option]>,
-  Group<Action_Group>, HelpText<"Build ASTs and then lower to ClangIR">;
+  Group<Action_Group>, HelpText<"Build ASTs and then lower to ClangIR, emit the .cir file">;
+def emit_cir_only : Flag<["-"], "emit-cir-only">,
+  HelpText<"Build ASTs and convert to CIR, discarding output">;
+def emit_cir_flat : Flag<["-"], "emit-cir-flat">, Visibility<[ClangOption, CC1Option]>,
+  Group<Action_Group>, HelpText<"Similar to -emit-cir but also lowers structured CFG into basic blocks.">;
+def emit_mlir : Flag<["-"], "emit-mlir">, Visibility<[CC1Option]>, Group<Action_Group>,
+  HelpText<"Build ASTs and then lower through ClangIR to MLIR, emit the .milr file">;
 /// ClangIR-specific options - END
 
-def flto_EQ : Joined<["-"], "flto=">,
-  Visibility<[ClangOption, CLOption, CC1Option, FC1Option, FlangOption]>,
-  Group<f_Group>,
-  HelpText<"Set LTO mode">, Values<"thin,full">;
-def flto_EQ_jobserver : Flag<["-"], "flto=jobserver">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>,
-  Alias<flto_EQ>, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">;
-def flto_EQ_auto : Flag<["-"], "flto=auto">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>,
-  Alias<flto_EQ>, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">;
 def flto : Flag<["-"], "flto">,
   Visibility<[ClangOption, CLOption, CC1Option, FC1Option, FlangOption]>,
   Group<f_Group>,
@@ -4680,9 +4754,9 @@ def mllvm : Separate<["-"], "mllvm">,
 def : Joined<["-"], "mllvm=">,
   Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>, Alias<mllvm>,
   HelpText<"Alias for -mllvm">, MetaVarName<"<arg>">;
-def mmlir : Separate<["-"], "mmlir">,
-  Visibility<[ClangOption, CLOption, FC1Option, FlangOption]>,
-  HelpText<"Additional arguments to forward to MLIR's option processing">;
+def mmlir : Separate<["-"], "mmlir">, Visibility<[ClangOption,CC1Option,FC1Option,FlangOption]>,
+  HelpText<"Additional arguments to forward to MLIR's option processing">,
+  MarshallingInfoStringVector<FrontendOpts<"MLIRArgs">>;
 def ffuchsia_api_level_EQ : Joined<["-"], "ffuchsia-api-level=">,
   Group<m_Group>, Visibility<[ClangOption, CC1Option]>,
   HelpText<"Set Fuchsia API level">,
@@ -6727,7 +6801,7 @@ defm analyzed_objects_for_unparse : OptOutFC1FFlag<"analyzed-objects-for-unparse
 
 def emit_fir : Flag<["-"], "emit-fir">, Group<Action_Group>,
   HelpText<"Build the parse tree, then lower it to FIR">;
-def emit_mlir : Flag<["-"], "emit-mlir">, Alias<emit_fir>;
+// def emit_mlir : Flag<["-"], "emit-mlir">, Alias<emit_fir>;
 
 def emit_hlfir : Flag<["-"], "emit-hlfir">, Group<Action_Group>,
   HelpText<"Build the parse tree, then lower it to HLFIR">;
diff --git a/clang/include/clang/Driver/Types.def b/clang/include/clang/Driver/Types.def
index 0e0cae5fb706..9d76e949d4ac 100644
--- a/clang/include/clang/Driver/Types.def
+++ b/clang/include/clang/Driver/Types.def
@@ -91,6 +91,9 @@ TYPE("lto-ir",                   LTO_IR,       INVALID,         "s",      phases
 TYPE("lto-bc",                   LTO_BC,       INVALID,         "o",      phases::Compile, phases::Backend, phases::Assemble, phases::Link)
 
 TYPE("cir",                      CIR,          INVALID,         "cir",    phases::Compile, phases::Backend, phases::Assemble, phases::Link)
+TYPE("cir-flat",                 CIR_FLAT,     INVALID,         "cir",    phases::Compile, phases::Backend, phases::Assemble, phases::Link)
+TYPE("mlir",                     MLIR,         INVALID,         "mlir",   phases::Compile, phases::Backend, phases::Assemble, phases::Link)
+
 // Misc.
 TYPE("ast",                      AST,          INVALID,         "ast",    phases::Compile, phases::Backend, phases::Assemble, phases::Link)
 TYPE("ifs",                      IFS,          INVALID,         "ifs",    phases::IfsMerge)
diff --git a/clang/include/clang/Frontend/FrontendAction.h b/clang/include/clang/Frontend/FrontendAction.h
index 039f6f247b6d..effc505d9a3e 100644
--- a/clang/include/clang/Frontend/FrontendAction.h
+++ b/clang/include/clang/Frontend/FrontendAction.h
@@ -196,6 +196,9 @@ class FrontendAction {
   /// Does this action support use with IR files?
   virtual bool hasIRSupport() const { return false; }
 
+  /// Does this action support use with CIR files?
+  virtual bool hasCIRSupport() const { return false; }
+
   /// Does this action support use with code completion?
   virtual bool hasCodeCompletionSupport() const { return false; }
 
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index ebb8e9e59c6b..36ce7fcc02a8 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -68,6 +68,15 @@ enum ActionKind {
   /// Emit a .cir file
   EmitCIR,
 
+  /// Emit a .cir file with flat ClangIR
+  EmitCIRFlat,
+
+  /// Generate CIR, bud don't emit anything.
+  EmitCIROnly,
+
+  /// Emit a .mlir file
+  EmitMLIR,
+
   /// Emit a .ll file.
   EmitLLVM,
 
@@ -157,11 +166,7 @@ enum ActionKind {
 class InputKind {
 public:
   /// The input file format.
-  enum Format {
-    Source,
-    ModuleMap,
-    Precompiled
-  };
+  enum Format { Source, ModuleMap, Precompiled };
 
   // If we are building a header unit, what kind it is; this affects whether
   // we look for the file in the user or system include search paths before
@@ -415,6 +420,40 @@ class FrontendOptions {
   LLVM_PREFERRED_TYPE(bool)
   unsigned UseClangIRPipeline : 1;
 
+  /// Lower directly from ClangIR to LLVM
+  unsigned ClangIRDirectLowering : 1;
+
+  /// Disable Clang IR specific (CIR) passes
+  unsigned ClangIRDisablePasses : 1;
+
+  /// Disable Clang IR (CIR) verifier
+  unsigned ClangIRDisableCIRVerifier : 1;
+
+  /// Disable ClangIR emission for CXX default (compiler generated methods).
+  unsigned ClangIRDisableEmitCXXDefault : 1;
+
+  /// Enable diagnostic verification for CIR
+  unsigned ClangIRVerifyDiags : 1;
+
+  // Enable Clang IR based lifetime check
+  unsigned ClangIRLifetimeCheck : 1;
+
+  // Enable Clang IR idiom recognizer
+  unsigned ClangIRIdiomRecognizer : 1;
+
+  // Enable Clang IR library optimizations
+  unsigned ClangIRLibOpt : 1;
+
+  // Enable Clang IR call conv lowering pass.
+  unsigned ClangIREnableCallConvLowering : 1;
+
+  // Enable Clang IR mem2reg pass on the flat CIR.
+  unsigned ClangIREnableMem2Reg : 1;
+
+  // Enable Clang IR analysis only pipeline that uses tranditional code gen
+  // pipeline.
+  unsigned ClangIRAnalysisOnly : 1;
+
   CodeCompleteOptions CodeCompleteOpts;
 
   /// Specifies the output format of the AST.
@@ -472,11 +511,11 @@ class FrontendOptions {
     /// Enable converting setter/getter expressions to property-dot syntx.
     ObjCMT_PropertyDotSyntax = 0x1000,
 
-    ObjCMT_MigrateDecls = (ObjCMT_ReadonlyProperty | ObjCMT_ReadwriteProperty |
-                           ObjCMT_Annotation | ObjCMT_Instancetype |
-                           ObjCMT_NsMacros | ObjCMT_ProtocolConformance |
-                           ObjCMT_NsAtomicIOSOnlyProperty |
-                           ObjCMT_DesignatedInitializer),
+    ObjCMT_MigrateDecls =
+        (ObjCMT_ReadonlyProperty | ObjCMT_ReadwriteProperty |
+         ObjCMT_Annotation | ObjCMT_Instancetype | ObjCMT_NsMacros |
+         ObjCMT_ProtocolConformance | ObjCMT_NsAtomicIOSOnlyProperty |
+         ObjCMT_DesignatedInitializer),
     ObjCMT_MigrateAll = (ObjCMT_Literals | ObjCMT_Subscripting |
                          ObjCMT_MigrateDecls | ObjCMT_PropertyDotSyntax)
   };
@@ -486,6 +525,10 @@ class FrontendOptions {
   std::string MTMigrateDir;
   std::string ARCMTMigrateReportOut;
 
+  std::string ClangIRLifetimeCheckOpts;
+  std::string ClangIRIdiomRecognizerOpts;
+  std::string ClangIRLibOptOpts;
+
   /// The input kind, either specified via -x argument or deduced from the input
   /// file name.
   InputKind DashX;
@@ -557,6 +600,10 @@ class FrontendOptions {
   /// should only be used for debugging and experimental features.
   std::vector<std::string> LLVMArgs;
 
+  /// A list of arguments to forward to MLIR's option processing; this
+  /// should only be used for debugging and experimental features.
+  std::vector<std::string> MLIRArgs;
+
   /// File name of the file that will provide record layouts
   /// (in the format produced by -fdump-record-layouts).
   std::string OverrideRecordLayoutsFile;
@@ -597,7 +644,11 @@ class FrontendOptions {
         EmitSymbolGraph(false), EmitExtensionSymbolGraphs(false),
         EmitSymbolGraphSymbolLabelsForTesting(false),
         EmitPrettySymbolGraphs(false), GenReducedBMI(false),
-        UseClangIRPipeline(false), TimeTraceGranularity(500) {}
+        UseClangIRPipeline(false), ClangIRDirectLowering(false),
+        ClangIRDisablePasses(false), ClangIRDisableCIRVerifier(false),
+        ClangIRDisableEmitCXXDefault(false), ClangIRLifetimeCheck(false),
+        ClangIRIdiomRecognizer(false), ClangIRLibOpt(false),
+        ClangIRAnalysisOnly(false), TimeTraceGranularity(500) {}
 
   /// getInputKindForExtension - Return the appropriate input kind for a file
   /// extension. For example, "c" would return Language::C.
diff --git a/clang/include/clang/Sema/AnalysisBasedWarnings.h b/clang/include/clang/Sema/AnalysisBasedWarnings.h
index aafe227b8408..6aac70021ec7 100644
--- a/clang/include/clang/Sema/AnalysisBasedWarnings.h
+++ b/clang/include/clang/Sema/AnalysisBasedWarnings.h
@@ -23,6 +23,7 @@ class Decl;
 class FunctionDecl;
 class QualType;
 class Sema;
+
 namespace sema {
   class FunctionScopeInfo;
 }
diff --git a/clang/lib/Basic/Targets.h b/clang/lib/Basic/Targets.h
index b4d2486b5d2b..886fb6a1d753 100644
--- a/clang/lib/Basic/Targets.h
+++ b/clang/lib/Basic/Targets.h
@@ -23,7 +23,6 @@
 namespace clang {
 namespace targets {
 
-LLVM_LIBRARY_VISIBILITY
 std::unique_ptr<clang::TargetInfo>
 AllocateTarget(const llvm::Triple &Triple, const clang::TargetOptions &Opts);
 
diff --git a/clang/lib/CIR/.clang-tidy b/clang/lib/CIR/.clang-tidy
new file mode 100644
index 000000000000..dfbcf9ccf7c2
--- /dev/null
+++ b/clang/lib/CIR/.clang-tidy
@@ -0,0 +1,61 @@
+InheritParentConfig: true
+Checks: >
+        -misc-const-correctness,
+        bugprone-argument-comment,
+        bugprone-assert-side-effect,
+        bugprone-branch-clone,
+        bugprone-copy-constructor-init,
+        bugprone-dangling-handle,
+        bugprone-dynamic-static-initializers,
+        bugprone-macro-parentheses,
+        bugprone-macro-repeated-side-effects,
+        bugprone-misplaced-widening-cast,
+        bugprone-move-forwarding-reference,
+        bugprone-multiple-statement-macro,
+        bugprone-suspicious-semicolon,
+        bugprone-swapped-arguments,
+        bugprone-terminating-continue,
+        bugprone-unused-raii,
+        bugprone-unused-return-value,
+        misc-redundant-expression,
+        misc-static-assert,
+        misc-unused-using-decls,
+        modernize-use-bool-literals,
+        modernize-loop-convert,
+        modernize-make-unique,
+        modernize-raw-string-literal,
+        modernize-use-equals-default,
+        modernize-use-default-member-init,
+        modernize-use-emplace,
+        modernize-use-nullptr,
+        modernize-use-override,
+        modernize-use-using,
+        performance-for-range-copy,
+        performance-implicit-conversion-in-loop,
+        performance-inefficient-algorithm,
+        performance-inefficient-vector-operation,
+        performance-move-const-arg,
+        performance-no-automatic-move,
+        performance-trivially-destructible,
+        performance-unnecessary-copy-initialization,
+        performance-unnecessary-value-param,
+        readability-avoid-const-params-in-decls,
+        readability-const-return-type,
+        readability-container-size-empty,
+        readability-identifier-naming,
+        readability-inconsistent-declaration-parameter-name,
+        readability-misleading-indentation,
+        readability-redundant-control-flow,
+        readability-redundant-smartptr-get,
+        readability-simplify-boolean-expr,
+        readability-simplify-subscript-expr,
+        readability-use-anyofallof
+
+
+CheckOptions:
+  - key:             readability-identifier-naming.MemberCase
+    value:           camelBack
+  - key:             readability-identifier-naming.ParameterCase
+    value:           camelBack
+  - key:             readability-identifier-naming.VariableCase
+    value:           camelBack
diff --git a/clang/lib/CIR/CMakeLists.txt b/clang/lib/CIR/CMakeLists.txt
index d2ff200e0da5..1812b6669e19 100644
--- a/clang/lib/CIR/CMakeLists.txt
+++ b/clang/lib/CIR/CMakeLists.txt
@@ -1,4 +1,15 @@
 include_directories(${LLVM_MAIN_SRC_DIR}/../mlir/include)
 include_directories(${CMAKE_BINARY_DIR}/tools/mlir/include)
 
+# Report use of deprecated APIs as errors.
+# TODO: Consider adding `/we4996` for MSVC when upstream MLIR resolves
+#       https://github.com/llvm/llvm-project/issues/65255.
+if (NOT MSVC)
+  add_compile_options("-Werror=deprecated-declarations")
+endif()
+
 add_subdirectory(Dialect)
+add_subdirectory(CodeGen)
+add_subdirectory(FrontendAction)
+add_subdirectory(Lowering)
+add_subdirectory(Interfaces)
diff --git a/clang/lib/CIR/CodeGen/ABIInfo.h b/clang/lib/CIR/CodeGen/ABIInfo.h
new file mode 100644
index 000000000000..5a2e3ff56ca4
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/ABIInfo.h
@@ -0,0 +1,45 @@
+//===----- ABIInfo.h - ABI information access & encapsulation ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_ABIINFO_H
+#define LLVM_CLANG_LIB_CIR_ABIINFO_H
+
+#include "clang/AST/Type.h"
+
+namespace cir {
+
+class ABIArgInfo;
+class CIRGenCXXABI;
+class CIRGenFunctionInfo;
+class CIRGenTypes;
+
+/// ABIInfo - Target specific hooks for defining how a type should be passed or
+/// returned from functions.
+class ABIInfo {
+  ABIInfo() = delete;
+
+public:
+  CIRGenTypes &CGT;
+
+  ABIInfo(CIRGenTypes &cgt) : CGT{cgt} {}
+
+  virtual ~ABIInfo();
+
+  CIRGenCXXABI &getCXXABI() const;
+  clang::ASTContext &getContext() const;
+
+  virtual void computeInfo(CIRGenFunctionInfo &FI) const = 0;
+
+  // Implement the Type::IsPromotableIntegerType for ABI specific needs. The
+  // only difference is that this consideres bit-precise integer types as well.
+  bool isPromotableIntegerTypeForABI(clang::QualType Ty) const;
+};
+
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h
new file mode 100644
index 000000000000..fdddf6fae500
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/Address.h
@@ -0,0 +1,141 @@
+//===-- Address.h - An aligned address -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class provides a simple wrapper for a pair of a pointer and an
+// alignment.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_ADDRESS_H
+#define LLVM_CLANG_LIB_CIR_ADDRESS_H
+
+#include "clang/AST/CharUnits.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+
+#include "llvm/IR/Constants.h"
+
+#include "mlir/IR/Value.h"
+
+namespace cir {
+
+// Indicates whether a pointer is known not to be null.
+enum KnownNonNull_t { NotKnownNonNull, KnownNonNull };
+
+/// Like RawAddress, an abstract representation of an aligned address, but the
+/// pointer contained in this class is possibly signed.
+class Address {
+  llvm::PointerIntPair<mlir::Value, 1, bool> PointerAndKnownNonNull;
+  mlir::Type ElementType;
+  clang::CharUnits Alignment;
+
+protected:
+  Address(std::nullptr_t) : ElementType(nullptr) {}
+
+public:
+  Address(mlir::Value pointer, mlir::Type elementType,
+          clang::CharUnits alignment,
+          KnownNonNull_t IsKnownNonNull = NotKnownNonNull)
+      : PointerAndKnownNonNull(pointer, IsKnownNonNull),
+        ElementType(elementType), Alignment(alignment) {
+    assert(mlir::isa<mlir::cir::PointerType>(pointer.getType()) &&
+           "Expected cir.ptr type");
+
+    assert(pointer && "Pointer cannot be null");
+    assert(elementType && "Element type cannot be null");
+    assert(!alignment.isZero() && "Alignment cannot be zero");
+  }
+  Address(mlir::Value pointer, clang::CharUnits alignment)
+      : Address(
+            pointer,
+            mlir::cast<mlir::cir::PointerType>(pointer.getType()).getPointee(),
+            alignment) {
+
+    assert((!alignment.isZero() || pointer == nullptr) &&
+           "creating valid address with invalid alignment");
+  }
+
+  static Address invalid() { return Address(nullptr); }
+  bool isValid() const {
+    return PointerAndKnownNonNull.getPointer() != nullptr;
+  }
+
+  /// Return address with different pointer, but same element type and
+  /// alignment.
+  Address withPointer(mlir::Value NewPointer,
+                      KnownNonNull_t IsKnownNonNull = NotKnownNonNull) const {
+    return Address(NewPointer, getElementType(), getAlignment(),
+                   IsKnownNonNull);
+  }
+
+  /// Return address with different alignment, but same pointer and element
+  /// type.
+  Address withAlignment(clang::CharUnits NewAlignment) const {
+    return Address(getPointer(), getElementType(), NewAlignment,
+                   isKnownNonNull());
+  }
+
+  /// Return address with different element type, but same pointer and
+  /// alignment.
+  Address withElementType(mlir::Type ElemTy) const {
+    // TODO(cir): hasOffset() check
+    return Address(getPointer(), ElemTy, getAlignment(), isKnownNonNull());
+  }
+
+  mlir::Value getPointer() const {
+    assert(isValid());
+    return PointerAndKnownNonNull.getPointer();
+  }
+
+  /// Return the alignment of this pointer.
+  clang::CharUnits getAlignment() const {
+    // assert(isValid());
+    return Alignment;
+  }
+
+  /// Return the pointer contained in this class after authenticating it and
+  /// adding offset to it if necessary.
+  mlir::Value emitRawPointer() const {
+    // TODO(cir): update this class with latest traditional LLVM codegen bits
+    // and the replace the call below to getBasePointer().
+    return getPointer();
+  }
+
+  /// Return the type of the pointer value.
+  mlir::cir::PointerType getType() const {
+    return mlir::cast<mlir::cir::PointerType>(getPointer().getType());
+  }
+
+  mlir::Type getElementType() const {
+    assert(isValid());
+    return ElementType;
+  }
+
+  /// Whether the pointer is known not to be null.
+  KnownNonNull_t isKnownNonNull() const {
+    assert(isValid());
+    return (KnownNonNull_t)PointerAndKnownNonNull.getInt();
+  }
+
+  /// Set the non-null bit.
+  Address setKnownNonNull() {
+    assert(isValid());
+    PointerAndKnownNonNull.setInt(true);
+    return *this;
+  }
+
+  /// Get the operation which defines this address.
+  mlir::Operation *getDefiningOp() const {
+    if (!isValid())
+      return nullptr;
+    return getPointer().getDefiningOp();
+  }
+};
+
+} // namespace cir
+
+#endif // LLVM_CLANG_LIB_CIR_ADDRESS_H
diff --git a/clang/lib/CIR/CodeGen/CIRAsm.cpp b/clang/lib/CIR/CodeGen/CIRAsm.cpp
new file mode 100644
index 000000000000..e88eb1da098f
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRAsm.cpp
@@ -0,0 +1,700 @@
+#include "clang/Basic/DiagnosticSema.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include "CIRGenFunction.h"
+#include "TargetInfo.h"
+#include "clang/CIR/MissingFeatures.h"
+
+using namespace cir;
+using namespace clang;
+using namespace mlir::cir;
+
+static bool isAggregateType(mlir::Type typ) {
+  return isa<mlir::cir::StructType, mlir::cir::ArrayType>(typ);
+}
+
+static AsmFlavor inferFlavor(const CIRGenModule &cgm, const AsmStmt &S) {
+  AsmFlavor GnuAsmFlavor =
+      cgm.getCodeGenOpts().getInlineAsmDialect() == CodeGenOptions::IAD_ATT
+          ? AsmFlavor::x86_att
+          : AsmFlavor::x86_intel;
+
+  return isa<MSAsmStmt>(&S) ? AsmFlavor::x86_intel : GnuAsmFlavor;
+}
+
+// FIXME(cir): This should be a common helper between CIRGen
+// and traditional CodeGen
+static std::string SimplifyConstraint(
+    const char *Constraint, const TargetInfo &Target,
+    SmallVectorImpl<TargetInfo::ConstraintInfo> *OutCons = nullptr) {
+  std::string Result;
+
+  while (*Constraint) {
+    switch (*Constraint) {
+    default:
+      Result += Target.convertConstraint(Constraint);
+      break;
+    // Ignore these
+    case '*':
+    case '?':
+    case '!':
+    case '=': // Will see this and the following in mult-alt constraints.
+    case '+':
+      break;
+    case '#': // Ignore the rest of the constraint alternative.
+      while (Constraint[1] && Constraint[1] != ',')
+        Constraint++;
+      break;
+    case '&':
+    case '%':
+      Result += *Constraint;
+      while (Constraint[1] && Constraint[1] == *Constraint)
+        Constraint++;
+      break;
+    case ',':
+      Result += "|";
+      break;
+    case 'g':
+      Result += "imr";
+      break;
+    case '[': {
+      assert(OutCons &&
+             "Must pass output names to constraints with a symbolic name");
+      unsigned Index;
+      bool result = Target.resolveSymbolicName(Constraint, *OutCons, Index);
+      assert(result && "Could not resolve symbolic name");
+      (void)result;
+      Result += llvm::utostr(Index);
+      break;
+    }
+    }
+
+    Constraint++;
+  }
+
+  return Result;
+}
+
+// FIXME(cir): This should be a common helper between CIRGen
+// and traditional CodeGen
+/// Look at AsmExpr and if it is a variable declared
+/// as using a particular register add that as a constraint that will be used
+/// in this asm stmt.
+static std::string
+AddVariableConstraints(const std::string &Constraint, const Expr &AsmExpr,
+                       const TargetInfo &Target, CIRGenModule &CGM,
+                       const AsmStmt &Stmt, const bool EarlyClobber,
+                       std::string *GCCReg = nullptr) {
+  const DeclRefExpr *AsmDeclRef = dyn_cast<DeclRefExpr>(&AsmExpr);
+  if (!AsmDeclRef)
+    return Constraint;
+  const ValueDecl &Value = *AsmDeclRef->getDecl();
+  const VarDecl *Variable = dyn_cast<VarDecl>(&Value);
+  if (!Variable)
+    return Constraint;
+  if (Variable->getStorageClass() != SC_Register)
+    return Constraint;
+  AsmLabelAttr *Attr = Variable->getAttr<AsmLabelAttr>();
+  if (!Attr)
+    return Constraint;
+  StringRef Register = Attr->getLabel();
+  assert(Target.isValidGCCRegisterName(Register));
+  // We're using validateOutputConstraint here because we only care if
+  // this is a register constraint.
+  TargetInfo::ConstraintInfo Info(Constraint, "");
+  if (Target.validateOutputConstraint(Info) && !Info.allowsRegister()) {
+    CGM.ErrorUnsupported(&Stmt, "__asm__");
+    return Constraint;
+  }
+  // Canonicalize the register here before returning it.
+  Register = Target.getNormalizedGCCRegisterName(Register);
+  if (GCCReg != nullptr)
+    *GCCReg = Register.str();
+  return (EarlyClobber ? "&{" : "{") + Register.str() + "}";
+}
+
+static void collectClobbers(const CIRGenFunction &cgf, const AsmStmt &S,
+                            std::string &constraints, bool &hasUnwindClobber,
+                            bool &readOnly, bool readNone) {
+
+  hasUnwindClobber = false;
+  auto &cgm = cgf.getCIRGenModule();
+
+  // Clobbers
+  for (unsigned i = 0, e = S.getNumClobbers(); i != e; i++) {
+    StringRef clobber = S.getClobber(i);
+    if (clobber == "memory")
+      readOnly = readNone = false;
+    else if (clobber == "unwind") {
+      hasUnwindClobber = true;
+      continue;
+    } else if (clobber != "cc") {
+      clobber = cgf.getTarget().getNormalizedGCCRegisterName(clobber);
+      if (cgm.getCodeGenOpts().StackClashProtector &&
+          cgf.getTarget().isSPRegName(clobber)) {
+        cgm.getDiags().Report(S.getAsmLoc(),
+                              diag::warn_stack_clash_protection_inline_asm);
+      }
+    }
+
+    if (isa<MSAsmStmt>(&S)) {
+      if (clobber == "eax" || clobber == "edx") {
+        if (constraints.find("=&A") != std::string::npos)
+          continue;
+        std::string::size_type position1 =
+            constraints.find("={" + clobber.str() + "}");
+        if (position1 != std::string::npos) {
+          constraints.insert(position1 + 1, "&");
+          continue;
+        }
+        std::string::size_type position2 = constraints.find("=A");
+        if (position2 != std::string::npos) {
+          constraints.insert(position2 + 1, "&");
+          continue;
+        }
+      }
+    }
+    if (!constraints.empty())
+      constraints += ',';
+
+    constraints += "~{";
+    constraints += clobber;
+    constraints += '}';
+  }
+
+  // Add machine specific clobbers
+  std::string_view machineClobbers = cgf.getTarget().getClobbers();
+  if (!machineClobbers.empty()) {
+    if (!constraints.empty())
+      constraints += ',';
+    constraints += machineClobbers;
+  }
+}
+
+using constraintInfos = SmallVector<TargetInfo::ConstraintInfo, 4>;
+
+static void collectInOutConstrainsInfos(const CIRGenFunction &cgf,
+                                        const AsmStmt &S, constraintInfos &out,
+                                        constraintInfos &in) {
+
+  for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) {
+    StringRef Name;
+    if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
+      Name = GAS->getOutputName(i);
+    TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name);
+    bool IsValid = cgf.getTarget().validateOutputConstraint(Info);
+    (void)IsValid;
+    assert(IsValid && "Failed to parse output constraint");
+    out.push_back(Info);
+  }
+
+  for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) {
+    StringRef Name;
+    if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
+      Name = GAS->getInputName(i);
+    TargetInfo::ConstraintInfo Info(S.getInputConstraint(i), Name);
+    bool IsValid = cgf.getTarget().validateInputConstraint(out, Info);
+    assert(IsValid && "Failed to parse input constraint");
+    (void)IsValid;
+    in.push_back(Info);
+  }
+}
+
+std::pair<mlir::Value, mlir::Type> CIRGenFunction::buildAsmInputLValue(
+    const TargetInfo::ConstraintInfo &Info, LValue InputValue,
+    QualType InputType, std::string &ConstraintStr, SourceLocation Loc) {
+
+  if (Info.allowsRegister() || !Info.allowsMemory()) {
+    if (hasScalarEvaluationKind(InputType))
+      return {buildLoadOfLValue(InputValue, Loc).getScalarVal(), mlir::Type()};
+
+    mlir::Type Ty = convertType(InputType);
+    uint64_t Size = CGM.getDataLayout().getTypeSizeInBits(Ty);
+    if ((Size <= 64 && llvm::isPowerOf2_64(Size)) ||
+        getTargetHooks().isScalarizableAsmOperand(*this, Ty)) {
+      Ty = mlir::cir::IntType::get(builder.getContext(), Size, false);
+
+      return {builder.createLoad(getLoc(Loc),
+                                 InputValue.getAddress().withElementType(Ty)),
+              mlir::Type()};
+    }
+  }
+
+  Address Addr = InputValue.getAddress();
+  ConstraintStr += '*';
+  return {Addr.getPointer(), Addr.getElementType()};
+}
+
+std::pair<mlir::Value, mlir::Type>
+CIRGenFunction::buildAsmInput(const TargetInfo::ConstraintInfo &Info,
+                              const Expr *InputExpr,
+                              std::string &ConstraintStr) {
+  auto loc = getLoc(InputExpr->getExprLoc());
+
+  // If this can't be a register or memory, i.e., has to be a constant
+  // (immediate or symbolic), try to emit it as such.
+  if (!Info.allowsRegister() && !Info.allowsMemory()) {
+    if (Info.requiresImmediateConstant()) {
+      Expr::EvalResult EVResult;
+      InputExpr->EvaluateAsRValue(EVResult, getContext(), true);
+
+      llvm::APSInt IntResult;
+      if (EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(),
+                                          getContext()))
+        return {builder.getConstAPSInt(loc, IntResult), mlir::Type()};
+    }
+
+    Expr::EvalResult Result;
+    if (InputExpr->EvaluateAsInt(Result, getContext()))
+      return {builder.getConstAPSInt(loc, Result.Val.getInt()), mlir::Type()};
+  }
+
+  if (Info.allowsRegister() || !Info.allowsMemory())
+    if (CIRGenFunction::hasScalarEvaluationKind(InputExpr->getType()))
+      return {buildScalarExpr(InputExpr), mlir::Type()};
+  if (InputExpr->getStmtClass() == Expr::CXXThisExprClass)
+    return {buildScalarExpr(InputExpr), mlir::Type()};
+  InputExpr = InputExpr->IgnoreParenNoopCasts(getContext());
+  LValue Dest = buildLValue(InputExpr);
+  return buildAsmInputLValue(Info, Dest, InputExpr->getType(), ConstraintStr,
+                             InputExpr->getExprLoc());
+}
+
+static void buildAsmStores(CIRGenFunction &CGF, const AsmStmt &S,
+                           const llvm::ArrayRef<mlir::Value> RegResults,
+                           const llvm::ArrayRef<mlir::Type> ResultRegTypes,
+                           const llvm::ArrayRef<mlir::Type> ResultTruncRegTypes,
+                           const llvm::ArrayRef<LValue> ResultRegDests,
+                           const llvm::ArrayRef<QualType> ResultRegQualTys,
+                           const llvm::BitVector &ResultTypeRequiresCast,
+                           const llvm::BitVector &ResultRegIsFlagReg) {
+  CIRGenBuilderTy &Builder = CGF.getBuilder();
+  CIRGenModule &CGM = CGF.CGM;
+  auto CTX = Builder.getContext();
+
+  assert(RegResults.size() == ResultRegTypes.size());
+  assert(RegResults.size() == ResultTruncRegTypes.size());
+  assert(RegResults.size() == ResultRegDests.size());
+  // ResultRegDests can be also populated by addReturnRegisterOutputs() above,
+  // in which case its size may grow.
+  assert(ResultTypeRequiresCast.size() <= ResultRegDests.size());
+  assert(ResultRegIsFlagReg.size() <= ResultRegDests.size());
+
+  for (unsigned i = 0, e = RegResults.size(); i != e; ++i) {
+    mlir::Value Tmp = RegResults[i];
+    mlir::Type TruncTy = ResultTruncRegTypes[i];
+
+    if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) {
+      assert(!MissingFeatures::asmLLVMAssume());
+    }
+
+    // If the result type of the LLVM IR asm doesn't match the result type of
+    // the expression, do the conversion.
+    if (ResultRegTypes[i] != TruncTy) {
+
+      // Truncate the integer result to the right size, note that TruncTy can be
+      // a pointer.
+      if (mlir::isa<mlir::FloatType>(TruncTy))
+        Tmp = Builder.createFloatingCast(Tmp, TruncTy);
+      else if (isa<mlir::cir::PointerType>(TruncTy) &&
+               isa<mlir::cir::IntType>(Tmp.getType())) {
+        uint64_t ResSize = CGM.getDataLayout().getTypeSizeInBits(TruncTy);
+        Tmp = Builder.createIntCast(
+            Tmp, mlir::cir::IntType::get(CTX, (unsigned)ResSize, false));
+        Tmp = Builder.createIntToPtr(Tmp, TruncTy);
+      } else if (isa<mlir::cir::PointerType>(Tmp.getType()) &&
+                 isa<mlir::cir::IntType>(TruncTy)) {
+        uint64_t TmpSize = CGM.getDataLayout().getTypeSizeInBits(Tmp.getType());
+        Tmp = Builder.createPtrToInt(
+            Tmp, mlir::cir::IntType::get(CTX, (unsigned)TmpSize, false));
+        Tmp = Builder.createIntCast(Tmp, TruncTy);
+      } else if (isa<mlir::cir::IntType>(TruncTy)) {
+        Tmp = Builder.createIntCast(Tmp, TruncTy);
+      } else if (false /*TruncTy->isVectorTy()*/) {
+        assert(!MissingFeatures::asmVectorType());
+      }
+    }
+
+    LValue Dest = ResultRegDests[i];
+    // ResultTypeRequiresCast elements correspond to the first
+    // ResultTypeRequiresCast.size() elements of RegResults.
+    if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) {
+      unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
+      Address A = Dest.getAddress().withElementType(ResultRegTypes[i]);
+      if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
+        Builder.createStore(CGF.getLoc(S.getAsmLoc()), Tmp, A);
+        continue;
+      }
+
+      QualType Ty =
+          CGF.getContext().getIntTypeForBitwidth(Size, /*Signed=*/false);
+      if (Ty.isNull()) {
+        const Expr *OutExpr = S.getOutputExpr(i);
+        CGM.getDiags().Report(OutExpr->getExprLoc(),
+                              diag::err_store_value_to_reg);
+        return;
+      }
+      Dest = CGF.makeAddrLValue(A, Ty);
+    }
+
+    CGF.buildStoreThroughLValue(RValue::get(Tmp), Dest);
+  }
+}
+
+mlir::LogicalResult CIRGenFunction::buildAsmStmt(const AsmStmt &S) {
+  // Assemble the final asm string.
+  std::string AsmString = S.generateAsmString(getContext());
+
+  // Get all the output and input constraints together.
+  constraintInfos OutputConstraintInfos;
+  constraintInfos InputConstraintInfos;
+  collectInOutConstrainsInfos(*this, S, OutputConstraintInfos,
+                              InputConstraintInfos);
+
+  std::string Constraints;
+  std::vector<LValue> ResultRegDests;
+  std::vector<QualType> ResultRegQualTys;
+  std::vector<mlir::Type> ResultRegTypes;
+  std::vector<mlir::Type> ResultTruncRegTypes;
+  std::vector<mlir::Type> ArgTypes;
+  std::vector<mlir::Type> ArgElemTypes;
+  std::vector<mlir::Value> OutArgs;
+  std::vector<mlir::Value> InArgs;
+  std::vector<mlir::Value> InOutArgs;
+  std::vector<mlir::Value> Args;
+  llvm::BitVector ResultTypeRequiresCast;
+  llvm::BitVector ResultRegIsFlagReg;
+
+  // Keep track of input constraints.
+  std::string InOutConstraints;
+  std::vector<mlir::Type> InOutArgTypes;
+  std::vector<mlir::Type> InOutArgElemTypes;
+
+  // Keep track of out constraints for tied input operand.
+  std::vector<std::string> OutputConstraints;
+
+  // Keep track of defined physregs.
+  llvm::SmallSet<std::string, 8> PhysRegOutputs;
+
+  // An inline asm can be marked readonly if it meets the following conditions:
+  //  - it doesn't have any sideeffects
+  //  - it doesn't clobber memory
+  //  - it doesn't return a value by-reference
+  // It can be marked readnone if it doesn't have any input memory constraints
+  // in addition to meeting the conditions listed above.
+  bool ReadOnly = true, ReadNone = true;
+
+  for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) {
+    TargetInfo::ConstraintInfo &Info = OutputConstraintInfos[i];
+
+    // Simplify the output constraint.
+    std::string OutputConstraint(S.getOutputConstraint(i));
+    OutputConstraint = SimplifyConstraint(OutputConstraint.c_str() + 1,
+                                          getTarget(), &OutputConstraintInfos);
+
+    const Expr *OutExpr = S.getOutputExpr(i);
+    OutExpr = OutExpr->IgnoreParenNoopCasts(getContext());
+
+    std::string GCCReg;
+    OutputConstraint =
+        AddVariableConstraints(OutputConstraint, *OutExpr, getTarget(), CGM, S,
+                               Info.earlyClobber(), &GCCReg);
+
+    // Give an error on multiple outputs to same physreg.
+    if (!GCCReg.empty() && !PhysRegOutputs.insert(GCCReg).second)
+      CGM.Error(S.getAsmLoc(), "multiple outputs to hard register: " + GCCReg);
+
+    OutputConstraints.push_back(OutputConstraint);
+    LValue Dest = buildLValue(OutExpr);
+
+    if (!Constraints.empty())
+      Constraints += ',';
+
+    // If this is a register output, then make the inline a sm return it
+    // by-value.  If this is a memory result, return the value by-reference.
+    QualType QTy = OutExpr->getType();
+    const bool IsScalarOrAggregate =
+        hasScalarEvaluationKind(QTy) || hasAggregateEvaluationKind(QTy);
+    if (!Info.allowsMemory() && IsScalarOrAggregate) {
+      Constraints += "=" + OutputConstraint;
+      ResultRegQualTys.push_back(QTy);
+      ResultRegDests.push_back(Dest);
+
+      bool IsFlagReg = llvm::StringRef(OutputConstraint).starts_with("{@cc");
+      ResultRegIsFlagReg.push_back(IsFlagReg);
+
+      mlir::Type Ty = convertTypeForMem(QTy);
+      const bool RequiresCast =
+          Info.allowsRegister() &&
+          (getTargetHooks().isScalarizableAsmOperand(*this, Ty) ||
+           isAggregateType(Ty));
+
+      ResultTruncRegTypes.push_back(Ty);
+      ResultTypeRequiresCast.push_back(RequiresCast);
+
+      if (RequiresCast) {
+        unsigned Size = getContext().getTypeSize(QTy);
+        Ty = mlir::cir::IntType::get(builder.getContext(), Size, false);
+      }
+      ResultRegTypes.push_back(Ty);
+      // If this output is tied to an input, and if the input is larger, then
+      // we need to set the actual result type of the inline asm node to be the
+      // same as the input type.
+      if (Info.hasMatchingInput()) {
+        unsigned InputNo;
+        for (InputNo = 0; InputNo != S.getNumInputs(); ++InputNo) {
+          TargetInfo::ConstraintInfo &Input = InputConstraintInfos[InputNo];
+          if (Input.hasTiedOperand() && Input.getTiedOperand() == i)
+            break;
+        }
+        assert(InputNo != S.getNumInputs() && "Didn't find matching input!");
+
+        QualType InputTy = S.getInputExpr(InputNo)->getType();
+        QualType OutputType = OutExpr->getType();
+
+        uint64_t InputSize = getContext().getTypeSize(InputTy);
+        if (getContext().getTypeSize(OutputType) < InputSize) {
+          // Form the asm to return the value as a larger integer or fp type.
+          ResultRegTypes.back() = ConvertType(InputTy);
+        }
+      }
+      if (mlir::Type AdjTy = getTargetHooks().adjustInlineAsmType(
+              *this, OutputConstraint, ResultRegTypes.back()))
+        ResultRegTypes.back() = AdjTy;
+      else {
+        CGM.getDiags().Report(S.getAsmLoc(),
+                              diag::err_asm_invalid_type_in_input)
+            << OutExpr->getType() << OutputConstraint;
+      }
+
+      // Update largest vector width for any vector types.
+      assert(!MissingFeatures::asmVectorType());
+    } else {
+      Address DestAddr = Dest.getAddress();
+
+      // Matrix types in memory are represented by arrays, but accessed through
+      // vector pointers, with the alignment specified on the access operation.
+      // For inline assembly, update pointer arguments to use vector pointers.
+      // Otherwise there will be a mis-match if the matrix is also an
+      // input-argument which is represented as vector.
+      if (isa<MatrixType>(OutExpr->getType().getCanonicalType()))
+        DestAddr = DestAddr.withElementType(ConvertType(OutExpr->getType()));
+
+      ArgTypes.push_back(DestAddr.getType());
+      ArgElemTypes.push_back(DestAddr.getElementType());
+      OutArgs.push_back(DestAddr.getPointer());
+      Args.push_back(DestAddr.getPointer());
+      Constraints += "=*";
+      Constraints += OutputConstraint;
+      ReadOnly = ReadNone = false;
+    }
+
+    if (Info.isReadWrite()) {
+      InOutConstraints += ',';
+      const Expr *InputExpr = S.getOutputExpr(i);
+
+      mlir::Value Arg;
+      mlir::Type ArgElemType;
+      std::tie(Arg, ArgElemType) =
+          buildAsmInputLValue(Info, Dest, InputExpr->getType(),
+                              InOutConstraints, InputExpr->getExprLoc());
+
+      if (mlir::Type AdjTy = getTargetHooks().adjustInlineAsmType(
+              *this, OutputConstraint, Arg.getType()))
+        Arg = builder.createBitcast(Arg, AdjTy);
+
+      // Update largest vector width for any vector types.
+      assert(!MissingFeatures::asmVectorType());
+
+      // Only tie earlyclobber physregs.
+      if (Info.allowsRegister() && (GCCReg.empty() || Info.earlyClobber()))
+        InOutConstraints += llvm::utostr(i);
+      else
+        InOutConstraints += OutputConstraint;
+
+      InOutArgTypes.push_back(Arg.getType());
+      InOutArgElemTypes.push_back(ArgElemType);
+      InOutArgs.push_back(Arg);
+    }
+  } // iterate over output operands
+
+  // If this is a Microsoft-style asm blob, store the return registers (EAX:EDX)
+  // to the return value slot. Only do this when returning in registers.
+  if (isa<MSAsmStmt>(&S)) {
+    const ABIArgInfo &RetAI = CurFnInfo->getReturnInfo();
+    if (RetAI.isDirect() || RetAI.isExtend()) {
+      // Make a fake lvalue for the return value slot.
+      LValue ReturnSlot = makeAddrLValue(ReturnValue, FnRetTy);
+      CGM.getTargetCIRGenInfo().addReturnRegisterOutputs(
+          *this, ReturnSlot, Constraints, ResultRegTypes, ResultTruncRegTypes,
+          ResultRegDests, AsmString, S.getNumOutputs());
+      SawAsmBlock = true;
+    }
+  }
+
+  for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) {
+    const Expr *InputExpr = S.getInputExpr(i);
+
+    TargetInfo::ConstraintInfo &Info = InputConstraintInfos[i];
+
+    if (Info.allowsMemory())
+      ReadNone = false;
+
+    if (!Constraints.empty())
+      Constraints += ',';
+
+    // Simplify the input constraint.
+    std::string InputConstraint(S.getInputConstraint(i));
+    InputConstraint = SimplifyConstraint(InputConstraint.c_str(), getTarget(),
+                                         &OutputConstraintInfos);
+
+    InputConstraint = AddVariableConstraints(
+        InputConstraint, *InputExpr->IgnoreParenNoopCasts(getContext()),
+        getTarget(), CGM, S, false /* No EarlyClobber */);
+
+    std::string ReplaceConstraint(InputConstraint);
+    mlir::Value Arg;
+    mlir::Type ArgElemType;
+    std::tie(Arg, ArgElemType) = buildAsmInput(Info, InputExpr, Constraints);
+
+    // If this input argument is tied to a larger output result, extend the
+    // input to be the same size as the output.  The LLVM backend wants to see
+    // the input and output of a matching constraint be the same size.  Note
+    // that GCC does not define what the top bits are here.  We use zext because
+    // that is usually cheaper, but LLVM IR should really get an anyext someday.
+    if (Info.hasTiedOperand()) {
+      unsigned Output = Info.getTiedOperand();
+      QualType OutputType = S.getOutputExpr(Output)->getType();
+      QualType InputTy = InputExpr->getType();
+
+      if (getContext().getTypeSize(OutputType) >
+          getContext().getTypeSize(InputTy)) {
+        // Use ptrtoint as appropriate so that we can do our extension.
+        if (isa<mlir::cir::PointerType>(Arg.getType()))
+          Arg = builder.createPtrToInt(Arg, UIntPtrTy);
+        mlir::Type OutputTy = convertType(OutputType);
+        if (isa<mlir::cir::IntType>(OutputTy))
+          Arg = builder.createIntCast(Arg, OutputTy);
+        else if (isa<mlir::cir::PointerType>(OutputTy))
+          Arg = builder.createIntCast(Arg, UIntPtrTy);
+        else if (isa<mlir::FloatType>(OutputTy))
+          Arg = builder.createFloatingCast(Arg, OutputTy);
+      }
+
+      // Deal with the tied operands' constraint code in adjustInlineAsmType.
+      ReplaceConstraint = OutputConstraints[Output];
+    }
+
+    if (mlir::Type AdjTy = getTargetHooks().adjustInlineAsmType(
+            *this, ReplaceConstraint, Arg.getType()))
+      Arg = builder.createBitcast(Arg, AdjTy);
+    else
+      CGM.getDiags().Report(S.getAsmLoc(), diag::err_asm_invalid_type_in_input)
+          << InputExpr->getType() << InputConstraint;
+
+    // Update largest vector width for any vector types.
+    assert(!MissingFeatures::asmVectorType());
+
+    ArgTypes.push_back(Arg.getType());
+    ArgElemTypes.push_back(ArgElemType);
+    InArgs.push_back(Arg);
+    Args.push_back(Arg);
+    Constraints += InputConstraint;
+  } // iterate over input operands
+
+  // Append the "input" part of inout constraints.
+  for (unsigned i = 0, e = InOutArgs.size(); i != e; i++) {
+    Args.push_back(InOutArgs[i]);
+    ArgTypes.push_back(InOutArgTypes[i]);
+    ArgElemTypes.push_back(InOutArgElemTypes[i]);
+  }
+  Constraints += InOutConstraints;
+
+  bool HasUnwindClobber = false;
+  collectClobbers(*this, S, Constraints, HasUnwindClobber, ReadOnly, ReadNone);
+
+  mlir::Type ResultType;
+
+  if (ResultRegTypes.size() == 1)
+    ResultType = ResultRegTypes[0];
+  else if (ResultRegTypes.size() > 1) {
+    auto sname = builder.getUniqueAnonRecordName();
+    ResultType =
+        builder.getCompleteStructTy(ResultRegTypes, sname, false, nullptr);
+  }
+
+  bool HasSideEffect = S.isVolatile() || S.getNumOutputs() == 0;
+  std::vector<mlir::Value> RegResults;
+
+  llvm::SmallVector<mlir::ValueRange, 8> operands;
+  operands.push_back(OutArgs);
+  operands.push_back(InArgs);
+  operands.push_back(InOutArgs);
+
+  auto IA = builder.create<mlir::cir::InlineAsmOp>(
+      getLoc(S.getAsmLoc()), ResultType, operands, AsmString, Constraints,
+      HasSideEffect, inferFlavor(CGM, S), mlir::ArrayAttr());
+
+  if (false /*IsGCCAsmGoto*/) {
+    assert(!MissingFeatures::asmGoto());
+  } else if (HasUnwindClobber) {
+    assert(!MissingFeatures::asmUnwindClobber());
+  } else {
+    assert(!MissingFeatures::asmMemoryEffects());
+
+    mlir::Value result;
+    if (IA.getNumResults())
+      result = IA.getResult(0);
+
+    llvm::SmallVector<mlir::Attribute> operandAttrs;
+
+    int i = 0;
+    for (auto typ : ArgElemTypes) {
+      if (typ) {
+        auto op = Args[i++];
+        assert(mlir::isa<mlir::cir::PointerType>(op.getType()) &&
+               "pointer type expected");
+        assert(cast<mlir::cir::PointerType>(op.getType()).getPointee() == typ &&
+               "element type differs from pointee type!");
+
+        operandAttrs.push_back(mlir::UnitAttr::get(builder.getContext()));
+      } else {
+        // We need to add an attribute for every arg since later, during
+        // the lowering to LLVM IR the attributes will be assigned to the
+        // CallInsn argument by index, i.e. we can't skip null type here
+        operandAttrs.push_back(mlir::Attribute());
+      }
+    }
+
+    assert(Args.size() == operandAttrs.size() &&
+           "The number of attributes is not even with the number of operands");
+
+    IA.setOperandAttrsAttr(builder.getArrayAttr(operandAttrs));
+
+    if (ResultRegTypes.size() == 1) {
+      RegResults.push_back(result);
+    } else if (ResultRegTypes.size() > 1) {
+      auto alignment = CharUnits::One();
+      auto sname = cast<mlir::cir::StructType>(ResultType).getName();
+      auto dest = buildAlloca(sname, ResultType, getLoc(S.getAsmLoc()),
+                              alignment, false);
+      auto addr = Address(dest, alignment);
+      builder.createStore(getLoc(S.getAsmLoc()), result, addr);
+
+      for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) {
+        auto typ = builder.getPointerTo(ResultRegTypes[i]);
+        auto ptr =
+            builder.createGetMember(getLoc(S.getAsmLoc()), typ, dest, "", i);
+        auto tmp =
+            builder.createLoad(getLoc(S.getAsmLoc()), Address(ptr, alignment));
+        RegResults.push_back(tmp);
+      }
+    }
+  }
+
+  buildAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes,
+                 ResultRegDests, ResultRegQualTys, ResultTypeRequiresCast,
+                 ResultRegIsFlagReg);
+
+  return mlir::success();
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
new file mode 100644
index 000000000000..f852af7ca979
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
@@ -0,0 +1,1519 @@
+//===--- CIRGenAtomic.cpp - Emit CIR for atomic operations ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the code for emitting atomic operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Address.h"
+
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "CIRGenOpenMPRuntime.h"
+#include "TargetInfo.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
+
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Value.h"
+
+using namespace cir;
+using namespace clang;
+
+namespace {
+class AtomicInfo {
+  CIRGenFunction &CGF;
+  QualType AtomicTy;
+  QualType ValueTy;
+  uint64_t AtomicSizeInBits;
+  uint64_t ValueSizeInBits;
+  CharUnits AtomicAlign;
+  CharUnits ValueAlign;
+  TypeEvaluationKind EvaluationKind;
+  bool UseLibcall;
+  LValue LVal;
+  CIRGenBitFieldInfo BFI;
+  mlir::Location loc;
+
+public:
+  AtomicInfo(CIRGenFunction &CGF, LValue &lvalue, mlir::Location l)
+      : CGF(CGF), AtomicSizeInBits(0), ValueSizeInBits(0),
+        EvaluationKind(TEK_Scalar), UseLibcall(true), loc(l) {
+    assert(!lvalue.isGlobalReg());
+    ASTContext &C = CGF.getContext();
+    if (lvalue.isSimple()) {
+      AtomicTy = lvalue.getType();
+      if (auto *ATy = AtomicTy->getAs<AtomicType>())
+        ValueTy = ATy->getValueType();
+      else
+        ValueTy = AtomicTy;
+      EvaluationKind = CGF.getEvaluationKind(ValueTy);
+
+      uint64_t ValueAlignInBits;
+      uint64_t AtomicAlignInBits;
+      TypeInfo ValueTI = C.getTypeInfo(ValueTy);
+      ValueSizeInBits = ValueTI.Width;
+      ValueAlignInBits = ValueTI.Align;
+
+      TypeInfo AtomicTI = C.getTypeInfo(AtomicTy);
+      AtomicSizeInBits = AtomicTI.Width;
+      AtomicAlignInBits = AtomicTI.Align;
+
+      assert(ValueSizeInBits <= AtomicSizeInBits);
+      assert(ValueAlignInBits <= AtomicAlignInBits);
+
+      AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits);
+      ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits);
+      if (lvalue.getAlignment().isZero())
+        lvalue.setAlignment(AtomicAlign);
+
+      LVal = lvalue;
+    } else if (lvalue.isBitField()) {
+      llvm_unreachable("NYI");
+    } else if (lvalue.isVectorElt()) {
+      ValueTy = lvalue.getType()->castAs<VectorType>()->getElementType();
+      ValueSizeInBits = C.getTypeSize(ValueTy);
+      AtomicTy = lvalue.getType();
+      AtomicSizeInBits = C.getTypeSize(AtomicTy);
+      AtomicAlign = ValueAlign = lvalue.getAlignment();
+      LVal = lvalue;
+    } else {
+      llvm_unreachable("NYI");
+    }
+    UseLibcall = !C.getTargetInfo().hasBuiltinAtomic(
+        AtomicSizeInBits, C.toBits(lvalue.getAlignment()));
+  }
+
+  QualType getAtomicType() const { return AtomicTy; }
+  QualType getValueType() const { return ValueTy; }
+  CharUnits getAtomicAlignment() const { return AtomicAlign; }
+  uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
+  uint64_t getValueSizeInBits() const { return ValueSizeInBits; }
+  TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
+  bool shouldUseLibcall() const { return UseLibcall; }
+  const LValue &getAtomicLValue() const { return LVal; }
+  mlir::Value getAtomicPointer() const {
+    if (LVal.isSimple())
+      return LVal.getPointer();
+    else if (LVal.isBitField())
+      return LVal.getBitFieldPointer();
+    else if (LVal.isVectorElt())
+      return LVal.getVectorPointer();
+    assert(LVal.isExtVectorElt());
+    // TODO(cir): return LVal.getExtVectorPointer();
+    llvm_unreachable("NYI");
+  }
+  Address getAtomicAddress() const {
+    mlir::Type ElTy;
+    if (LVal.isSimple())
+      ElTy = LVal.getAddress().getElementType();
+    else if (LVal.isBitField())
+      ElTy = LVal.getBitFieldAddress().getElementType();
+    else if (LVal.isVectorElt())
+      ElTy = LVal.getVectorAddress().getElementType();
+    else // TODO(cir): ElTy = LVal.getExtVectorAddress().getElementType();
+      llvm_unreachable("NYI");
+    return Address(getAtomicPointer(), ElTy, getAtomicAlignment());
+  }
+
+  Address getAtomicAddressAsAtomicIntPointer() const {
+    return castToAtomicIntPointer(getAtomicAddress());
+  }
+
+  /// Is the atomic size larger than the underlying value type?
+  ///
+  /// Note that the absence of padding does not mean that atomic
+  /// objects are completely interchangeable with non-atomic
+  /// objects: we might have promoted the alignment of a type
+  /// without making it bigger.
+  bool hasPadding() const { return (ValueSizeInBits != AtomicSizeInBits); }
+
+  bool emitMemSetZeroIfNecessary() const;
+
+  mlir::Value getAtomicSizeValue() const { llvm_unreachable("NYI"); }
+
+  mlir::Value getScalarRValValueOrNull(RValue RVal) const;
+
+  /// Cast the given pointer to an integer pointer suitable for atomic
+  /// operations if the source.
+  Address castToAtomicIntPointer(Address Addr) const;
+
+  /// If Addr is compatible with the iN that will be used for an atomic
+  /// operation, bitcast it. Otherwise, create a temporary that is suitable
+  /// and copy the value across.
+  Address convertToAtomicIntPointer(Address Addr) const;
+
+  /// Turn an atomic-layout object into an r-value.
+  RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot,
+                                   SourceLocation loc, bool AsValue) const;
+
+  /// Converts a rvalue to integer value.
+  mlir::Value convertRValueToInt(RValue RVal, bool CmpXchg = false) const;
+
+  RValue ConvertIntToValueOrAtomic(mlir::Value IntVal, AggValueSlot ResultSlot,
+                                   SourceLocation Loc, bool AsValue) const;
+
+  /// Copy an atomic r-value into atomic-layout memory.
+  void emitCopyIntoMemory(RValue rvalue) const;
+
+  /// Project an l-value down to the value field.
+  LValue projectValue() const {
+    assert(LVal.isSimple());
+    Address addr = getAtomicAddress();
+    if (hasPadding())
+      llvm_unreachable("NYI");
+
+    return LValue::makeAddr(addr, getValueType(), CGF.getContext(),
+                            LVal.getBaseInfo(), LVal.getTBAAInfo());
+  }
+
+  /// Emits atomic load.
+  /// \returns Loaded value.
+  RValue EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
+                        bool AsValue, llvm::AtomicOrdering AO, bool IsVolatile);
+
+  /// Emits atomic compare-and-exchange sequence.
+  /// \param Expected Expected value.
+  /// \param Desired Desired value.
+  /// \param Success Atomic ordering for success operation.
+  /// \param Failure Atomic ordering for failed operation.
+  /// \param IsWeak true if atomic operation is weak, false otherwise.
+  /// \returns Pair of values: previous value from storage (value type) and
+  /// boolean flag (i1 type) with true if success and false otherwise.
+  std::pair<RValue, mlir::Value>
+  EmitAtomicCompareExchange(RValue Expected, RValue Desired,
+                            llvm::AtomicOrdering Success =
+                                llvm::AtomicOrdering::SequentiallyConsistent,
+                            llvm::AtomicOrdering Failure =
+                                llvm::AtomicOrdering::SequentiallyConsistent,
+                            bool IsWeak = false);
+
+  /// Emits atomic update.
+  /// \param AO Atomic ordering.
+  /// \param UpdateOp Update operation for the current lvalue.
+  void EmitAtomicUpdate(llvm::AtomicOrdering AO,
+                        const llvm::function_ref<RValue(RValue)> &UpdateOp,
+                        bool IsVolatile);
+  /// Emits atomic update.
+  /// \param AO Atomic ordering.
+  void EmitAtomicUpdate(llvm::AtomicOrdering AO, RValue UpdateRVal,
+                        bool IsVolatile);
+
+  /// Materialize an atomic r-value in atomic-layout memory.
+  Address materializeRValue(RValue rvalue) const;
+
+  /// Creates temp alloca for intermediate operations on atomic value.
+  Address CreateTempAlloca() const;
+
+private:
+  bool requiresMemSetZero(mlir::Type ty) const;
+
+  /// Emits atomic load as a libcall.
+  void EmitAtomicLoadLibcall(mlir::Value AddForLoaded, llvm::AtomicOrdering AO,
+                             bool IsVolatile);
+  /// Emits atomic load as LLVM instruction.
+  mlir::Value EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile);
+  /// Emits atomic compare-and-exchange op as a libcall.
+  mlir::Value EmitAtomicCompareExchangeLibcall(
+      mlir::Value ExpectedAddr, mlir::Value DesiredAddr,
+      llvm::AtomicOrdering Success =
+          llvm::AtomicOrdering::SequentiallyConsistent,
+      llvm::AtomicOrdering Failure =
+          llvm::AtomicOrdering::SequentiallyConsistent);
+  /// Emits atomic compare-and-exchange op as LLVM instruction.
+  std::pair<mlir::Value, mlir::Value>
+  EmitAtomicCompareExchangeOp(mlir::Value ExpectedVal, mlir::Value DesiredVal,
+                              llvm::AtomicOrdering Success =
+                                  llvm::AtomicOrdering::SequentiallyConsistent,
+                              llvm::AtomicOrdering Failure =
+                                  llvm::AtomicOrdering::SequentiallyConsistent,
+                              bool IsWeak = false);
+  /// Emit atomic update as libcalls.
+  void
+  EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO,
+                          const llvm::function_ref<RValue(RValue)> &UpdateOp,
+                          bool IsVolatile);
+  /// Emit atomic update as LLVM instructions.
+  void EmitAtomicUpdateOp(llvm::AtomicOrdering AO,
+                          const llvm::function_ref<RValue(RValue)> &UpdateOp,
+                          bool IsVolatile);
+  /// Emit atomic update as libcalls.
+  void EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO, RValue UpdateRVal,
+                               bool IsVolatile);
+  /// Emit atomic update as LLVM instructions.
+  void EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRal,
+                          bool IsVolatile);
+};
+} // namespace
+
+// This function emits any expression (scalar, complex, or aggregate)
+// into a temporary alloca.
+static Address buildValToTemp(CIRGenFunction &CGF, Expr *E) {
+  Address DeclPtr = CGF.CreateMemTemp(
+      E->getType(), CGF.getLoc(E->getSourceRange()), ".atomictmp");
+  CGF.buildAnyExprToMem(E, DeclPtr, E->getType().getQualifiers(),
+                        /*Init*/ true);
+  return DeclPtr;
+}
+
+/// Does a store of the given IR type modify the full expected width?
+static bool isFullSizeType(CIRGenModule &CGM, mlir::Type ty,
+                           uint64_t expectedSize) {
+  return (CGM.getDataLayout().getTypeStoreSize(ty) * 8 == expectedSize);
+}
+
+/// Does the atomic type require memsetting to zero before initialization?
+///
+/// The IR type is provided as a way of making certain queries faster.
+bool AtomicInfo::requiresMemSetZero(mlir::Type ty) const {
+  // If the atomic type has size padding, we definitely need a memset.
+  if (hasPadding())
+    return true;
+
+  // Otherwise, do some simple heuristics to try to avoid it:
+  switch (getEvaluationKind()) {
+  // For scalars and complexes, check whether the store size of the
+  // type uses the full size.
+  case TEK_Scalar:
+    return !isFullSizeType(CGF.CGM, ty, AtomicSizeInBits);
+  case TEK_Complex:
+    llvm_unreachable("NYI");
+
+  // Padding in structs has an undefined bit pattern.  User beware.
+  case TEK_Aggregate:
+    return false;
+  }
+  llvm_unreachable("bad evaluation kind");
+}
+
+Address AtomicInfo::castToAtomicIntPointer(Address addr) const {
+  auto intTy = mlir::dyn_cast<mlir::cir::IntType>(addr.getElementType());
+  // Don't bother with int casts if the integer size is the same.
+  if (intTy && intTy.getWidth() == AtomicSizeInBits)
+    return addr;
+  auto ty = CGF.getBuilder().getUIntNTy(AtomicSizeInBits);
+  return addr.withElementType(ty);
+}
+
+Address AtomicInfo::convertToAtomicIntPointer(Address Addr) const {
+  auto Ty = Addr.getElementType();
+  uint64_t SourceSizeInBits = CGF.CGM.getDataLayout().getTypeSizeInBits(Ty);
+  if (SourceSizeInBits != AtomicSizeInBits) {
+    llvm_unreachable("NYI");
+  }
+
+  return castToAtomicIntPointer(Addr);
+}
+
+Address AtomicInfo::CreateTempAlloca() const {
+  Address TempAlloca = CGF.CreateMemTemp(
+      (LVal.isBitField() && ValueSizeInBits > AtomicSizeInBits) ? ValueTy
+                                                                : AtomicTy,
+      getAtomicAlignment(), loc, "atomic-temp");
+  // Cast to pointer to value type for bitfields.
+  if (LVal.isBitField()) {
+    llvm_unreachable("NYI");
+  }
+  return TempAlloca;
+}
+
+// If the value comes from a ConstOp + IntAttr, retrieve and skip a series
+// of casts if necessary.
+//
+// FIXME(cir): figure out warning issue and move this to CIRBaseBuilder.h
+static mlir::cir::IntAttr getConstOpIntAttr(mlir::Value v) {
+  mlir::Operation *op = v.getDefiningOp();
+  mlir::cir::IntAttr constVal;
+  while (auto c = dyn_cast<mlir::cir::CastOp>(op))
+    op = c.getOperand().getDefiningOp();
+  if (auto c = dyn_cast<mlir::cir::ConstantOp>(op)) {
+    if (mlir::isa<mlir::cir::IntType>(c.getType()))
+      constVal = mlir::cast<mlir::cir::IntAttr>(c.getValue());
+  }
+  return constVal;
+}
+
+// Inspect a value that is the strong/weak flag for a compare-exchange.  If it
+// is a constant of intergral or boolean type, set `val` to the constant's
+// boolean value and return true.  Otherwise leave `val` unchanged and return
+// false.
+static bool isCstWeak(mlir::Value weakVal, bool &val) {
+  mlir::Operation *op = weakVal.getDefiningOp();
+  while (auto c = dyn_cast<mlir::cir::CastOp>(op)) {
+    op = c.getOperand().getDefiningOp();
+  }
+  if (auto c = dyn_cast<mlir::cir::ConstantOp>(op)) {
+    if (mlir::isa<mlir::cir::IntType>(c.getType())) {
+      val = mlir::cast<mlir::cir::IntAttr>(c.getValue()).getUInt() != 0;
+      return true;
+    } else if (mlir::isa<mlir::cir::BoolType>(c.getType())) {
+      val = mlir::cast<mlir::cir::BoolAttr>(c.getValue()).getValue();
+      return true;
+    }
+  }
+  return false;
+}
+
+// Functions that help with the creation of compiler-generated switch
+// statements that are used to implement non-constant memory order parameters.
+
+// Create a new region.  Create a block within the region.  Add a "break"
+// statement to the block.  Set the builder's insertion point to before the
+// "break" statement.  Add the new region to the given container.
+template <typename RegionsCont>
+static void startRegion(mlir::OpBuilder &builder, RegionsCont &Regions,
+                        mlir::Location loc) {
+
+  Regions.push_back(std::make_unique<mlir::Region>());
+  mlir::Region *Region = Regions.back().get();
+  mlir::Block *Block = builder.createBlock(Region);
+  builder.setInsertionPointToEnd(Block);
+  auto Break = builder.create<mlir::cir::BreakOp>(loc);
+  builder.setInsertionPoint(Break);
+}
+
+// Create a "default:" label and add it to the given collection of case labels.
+// Create the region that will hold the body of the "default:" block.
+template <typename CaseAttrsCont, typename RegionsCont>
+static void buildDefaultCase(mlir::OpBuilder &builder, CaseAttrsCont &CaseAttrs,
+                             RegionsCont &Regions, mlir::Location loc) {
+
+  auto Context = builder.getContext();
+  auto EmptyArrayAttr = builder.getArrayAttr({});
+  auto DefaultKind =
+      mlir::cir::CaseOpKindAttr::get(Context, mlir::cir::CaseOpKind::Default);
+  auto DefaultAttr =
+      mlir::cir::CaseAttr::get(Context, EmptyArrayAttr, DefaultKind);
+  CaseAttrs.push_back(DefaultAttr);
+  startRegion(builder, Regions, loc);
+}
+
+// Create a single "case" label with the given MemOrder as its value.  Add the
+// "case" label to the given collection of case labels.  Create the region that
+// will hold the body of the "case" block.
+template <typename CaseAttrsCont, typename RegionsCont>
+static void
+buildSingleMemOrderCase(mlir::OpBuilder &builder, CaseAttrsCont &CaseAttrs,
+                        RegionsCont &Regions, mlir::Location loc,
+                        mlir::Type Type, mlir::cir::MemOrder Order) {
+
+  auto Context = builder.getContext();
+  SmallVector<mlir::Attribute, 1> OneOrder{
+      mlir::cir::IntAttr::get(Type, static_cast<int>(Order))};
+  auto OneAttribute = builder.getArrayAttr(OneOrder);
+  auto CaseKind =
+      mlir::cir::CaseOpKindAttr::get(Context, mlir::cir::CaseOpKind::Equal);
+  auto CaseAttr = mlir::cir::CaseAttr::get(Context, OneAttribute, CaseKind);
+  CaseAttrs.push_back(CaseAttr);
+  startRegion(builder, Regions, loc);
+}
+
+// Create a pair of "case" labels with the given MemOrders as their values.
+// Add the combined "case" attribute to the given collection of case labels.
+// Create the region that will hold the body of the "case" block.
+template <typename CaseAttrsCont, typename RegionsCont>
+static void buildDoubleMemOrderCase(mlir::OpBuilder &builder,
+                                    CaseAttrsCont &CaseAttrs,
+                                    RegionsCont &Regions, mlir::Location loc,
+                                    mlir::Type Type, mlir::cir::MemOrder Order1,
+                                    mlir::cir::MemOrder Order2) {
+
+  auto Context = builder.getContext();
+  SmallVector<mlir::Attribute, 2> TwoOrders{
+      mlir::cir::IntAttr::get(Type, static_cast<int>(Order1)),
+      mlir::cir::IntAttr::get(Type, static_cast<int>(Order2))};
+  auto TwoAttributes = builder.getArrayAttr(TwoOrders);
+  auto CaseKind =
+      mlir::cir::CaseOpKindAttr::get(Context, mlir::cir::CaseOpKind::Anyof);
+  auto CaseAttr = mlir::cir::CaseAttr::get(Context, TwoAttributes, CaseKind);
+  CaseAttrs.push_back(CaseAttr);
+  startRegion(builder, Regions, loc);
+}
+
+static void buildAtomicCmpXchg(CIRGenFunction &CGF, AtomicExpr *E, bool IsWeak,
+                               Address Dest, Address Ptr, Address Val1,
+                               Address Val2, uint64_t Size,
+                               mlir::cir::MemOrder SuccessOrder,
+                               mlir::cir::MemOrder FailureOrder,
+                               llvm::SyncScope::ID Scope) {
+  auto &builder = CGF.getBuilder();
+  auto loc = CGF.getLoc(E->getSourceRange());
+  auto Expected = builder.createLoad(loc, Val1);
+  auto Desired = builder.createLoad(loc, Val2);
+  auto boolTy = builder.getBoolTy();
+  auto cmpxchg = builder.create<mlir::cir::AtomicCmpXchg>(
+      loc, Expected.getType(), boolTy, Ptr.getPointer(), Expected, Desired,
+      SuccessOrder, FailureOrder);
+  cmpxchg.setIsVolatile(E->isVolatile());
+  cmpxchg.setWeak(IsWeak);
+
+  auto cmp = builder.createNot(cmpxchg.getCmp());
+  builder.create<mlir::cir::IfOp>(
+      loc, cmp, false, [&](mlir::OpBuilder &, mlir::Location) {
+        auto ptrTy =
+            mlir::cast<mlir::cir::PointerType>(Val1.getPointer().getType());
+        if (Val1.getElementType() != ptrTy.getPointee()) {
+          Val1 = Val1.withPointer(builder.createPtrBitcast(
+              Val1.getPointer(), Val1.getElementType()));
+        }
+        builder.createStore(loc, cmpxchg.getOld(), Val1);
+        builder.createYield(loc);
+      });
+
+  // Update the memory at Dest with Cmp's value.
+  CGF.buildStoreOfScalar(cmpxchg.getCmp(),
+                         CGF.makeAddrLValue(Dest, E->getType()));
+}
+
+/// Given an ordering required on success, emit all possible cmpxchg
+/// instructions to cope with the provided (but possibly only dynamically known)
+/// FailureOrder.
+static void buildAtomicCmpXchgFailureSet(
+    CIRGenFunction &CGF, AtomicExpr *E, bool IsWeak, Address Dest, Address Ptr,
+    Address Val1, Address Val2, mlir::Value FailureOrderVal, uint64_t Size,
+    mlir::cir::MemOrder SuccessOrder, llvm::SyncScope::ID Scope) {
+
+  mlir::cir::MemOrder FailureOrder;
+  if (auto ordAttr = getConstOpIntAttr(FailureOrderVal)) {
+    // We should not ever get to a case where the ordering isn't a valid CABI
+    // value, but it's hard to enforce that in general.
+    auto ord = ordAttr.getUInt();
+    if (!mlir::cir::isValidCIRAtomicOrderingCABI(ord)) {
+      FailureOrder = mlir::cir::MemOrder::Relaxed;
+    } else {
+      switch ((mlir::cir::MemOrder)ord) {
+      case mlir::cir::MemOrder::Relaxed:
+        // 31.7.2.18: "The failure argument shall not be memory_order_release
+        // nor memory_order_acq_rel". Fallback to monotonic.
+      case mlir::cir::MemOrder::Release:
+      case mlir::cir::MemOrder::AcquireRelease:
+        FailureOrder = mlir::cir::MemOrder::Relaxed;
+        break;
+      case mlir::cir::MemOrder::Consume:
+      case mlir::cir::MemOrder::Acquire:
+        FailureOrder = mlir::cir::MemOrder::Acquire;
+        break;
+      case mlir::cir::MemOrder::SequentiallyConsistent:
+        FailureOrder = mlir::cir::MemOrder::SequentiallyConsistent;
+        break;
+      }
+    }
+    // Prior to c++17, "the failure argument shall be no stronger than the
+    // success argument". This condition has been lifted and the only
+    // precondition is 31.7.2.18. Effectively treat this as a DR and skip
+    // language version checks.
+    buildAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size,
+                       SuccessOrder, FailureOrder, Scope);
+    return;
+  }
+
+  // The failure memory order is not a compile-time value. The CIR atomic ops
+  // can't handle a runtime value; all memory orders must be hard coded.
+  // Generate a "switch" statement that converts the runtime value into a
+  // compile-time value.
+  CGF.getBuilder().create<mlir::cir::SwitchOp>(
+      FailureOrderVal.getLoc(), FailureOrderVal,
+      [&](mlir::OpBuilder &builder, mlir::Location loc,
+          mlir::OperationState &os) {
+        SmallVector<mlir::Attribute, 3> CaseAttrs;
+        SmallVector<std::unique_ptr<mlir::Region>, 3> Regions;
+
+        // default:
+        // Unsupported memory orders get generated as memory_order_relaxed,
+        // because there is no practical way to report an error at runtime.
+        buildDefaultCase(builder, CaseAttrs, Regions, loc);
+        buildAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size,
+                           SuccessOrder, mlir::cir::MemOrder::Relaxed, Scope);
+
+        // case consume:
+        // case acquire:
+        // memory_order_consume is not implemented and always falls back to
+        // memory_order_acquire
+        buildDoubleMemOrderCase(
+            builder, CaseAttrs, Regions, loc, FailureOrderVal.getType(),
+            mlir::cir::MemOrder::Consume, mlir::cir::MemOrder::Acquire);
+        buildAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size,
+                           SuccessOrder, mlir::cir::MemOrder::Acquire, Scope);
+
+        // A failed compare-exchange is a read-only operation.  So
+        // memory_order_release and memory_order_acq_rel are not supported for
+        // the failure memory order.  They fall back to memory_order_relaxed.
+
+        // case seq_cst:
+        buildSingleMemOrderCase(builder, CaseAttrs, Regions, loc,
+                                FailureOrderVal.getType(),
+                                mlir::cir::MemOrder::SequentiallyConsistent);
+        buildAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size,
+                           SuccessOrder,
+                           mlir::cir::MemOrder::SequentiallyConsistent, Scope);
+
+        os.addRegions(Regions);
+        os.addAttribute("cases", builder.getArrayAttr(CaseAttrs));
+      });
+}
+
+static void buildAtomicOp(CIRGenFunction &CGF, AtomicExpr *E, Address Dest,
+                          Address Ptr, Address Val1, Address Val2,
+                          mlir::Value IsWeak, mlir::Value FailureOrder,
+                          uint64_t Size, mlir::cir::MemOrder Order,
+                          uint8_t Scope) {
+  assert(!MissingFeatures::syncScopeID());
+  StringRef Op;
+
+  auto &builder = CGF.getBuilder();
+  auto loc = CGF.getLoc(E->getSourceRange());
+  auto orderAttr = mlir::cir::MemOrderAttr::get(builder.getContext(), Order);
+  mlir::cir::AtomicFetchKindAttr fetchAttr;
+  bool fetchFirst = true;
+
+  switch (E->getOp()) {
+  case AtomicExpr::AO__c11_atomic_init:
+  case AtomicExpr::AO__opencl_atomic_init:
+    llvm_unreachable("Already handled!");
+
+  case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+    buildAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
+                                 FailureOrder, Size, Order, Scope);
+    return;
+  case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
+    llvm_unreachable("NYI");
+    return;
+  case AtomicExpr::AO__atomic_compare_exchange:
+  case AtomicExpr::AO__atomic_compare_exchange_n:
+  case AtomicExpr::AO__scoped_atomic_compare_exchange:
+  case AtomicExpr::AO__scoped_atomic_compare_exchange_n: {
+    bool weakVal;
+    if (isCstWeak(IsWeak, weakVal)) {
+      buildAtomicCmpXchgFailureSet(CGF, E, weakVal, Dest, Ptr, Val1, Val2,
+                                   FailureOrder, Size, Order, Scope);
+    } else {
+      llvm_unreachable("NYI");
+    }
+    return;
+  }
+  case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__opencl_atomic_load:
+  case AtomicExpr::AO__hip_atomic_load:
+  case AtomicExpr::AO__atomic_load_n:
+  case AtomicExpr::AO__atomic_load:
+  case AtomicExpr::AO__scoped_atomic_load_n:
+  case AtomicExpr::AO__scoped_atomic_load: {
+    auto *load = builder.createLoad(loc, Ptr).getDefiningOp();
+    // FIXME(cir): add scope information.
+    assert(!MissingFeatures::syncScopeID());
+    load->setAttr("mem_order", orderAttr);
+    if (E->isVolatile())
+      load->setAttr("is_volatile", mlir::UnitAttr::get(builder.getContext()));
+
+    // TODO(cir): this logic should be part of createStore, but doing so
+    // currently breaks CodeGen/union.cpp and CodeGen/union.cpp.
+    auto ptrTy =
+        mlir::cast<mlir::cir::PointerType>(Dest.getPointer().getType());
+    if (Dest.getElementType() != ptrTy.getPointee()) {
+      Dest = Dest.withPointer(
+          builder.createPtrBitcast(Dest.getPointer(), Dest.getElementType()));
+    }
+    builder.createStore(loc, load->getResult(0), Dest);
+    return;
+  }
+
+  case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__opencl_atomic_store:
+  case AtomicExpr::AO__hip_atomic_store:
+  case AtomicExpr::AO__atomic_store:
+  case AtomicExpr::AO__atomic_store_n:
+  case AtomicExpr::AO__scoped_atomic_store:
+  case AtomicExpr::AO__scoped_atomic_store_n: {
+    auto loadVal1 = builder.createLoad(loc, Val1);
+    // FIXME(cir): add scope information.
+    assert(!MissingFeatures::syncScopeID());
+    builder.createStore(loc, loadVal1, Ptr, E->isVolatile(),
+                        /*alignment=*/mlir::IntegerAttr{}, orderAttr);
+    return;
+  }
+
+  case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__hip_atomic_exchange:
+  case AtomicExpr::AO__opencl_atomic_exchange:
+  case AtomicExpr::AO__atomic_exchange_n:
+  case AtomicExpr::AO__atomic_exchange:
+  case AtomicExpr::AO__scoped_atomic_exchange_n:
+  case AtomicExpr::AO__scoped_atomic_exchange:
+    Op = mlir::cir::AtomicXchg::getOperationName();
+    break;
+
+  case AtomicExpr::AO__atomic_add_fetch:
+  case AtomicExpr::AO__scoped_atomic_add_fetch:
+    fetchFirst = false;
+    [[fallthrough]];
+  case AtomicExpr::AO__c11_atomic_fetch_add:
+  case AtomicExpr::AO__hip_atomic_fetch_add:
+  case AtomicExpr::AO__opencl_atomic_fetch_add:
+  case AtomicExpr::AO__atomic_fetch_add:
+  case AtomicExpr::AO__scoped_atomic_fetch_add:
+    Op = mlir::cir::AtomicFetch::getOperationName();
+    fetchAttr = mlir::cir::AtomicFetchKindAttr::get(
+        builder.getContext(), mlir::cir::AtomicFetchKind::Add);
+    break;
+
+  case AtomicExpr::AO__atomic_sub_fetch:
+  case AtomicExpr::AO__scoped_atomic_sub_fetch:
+    fetchFirst = false;
+    [[fallthrough]];
+  case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__hip_atomic_fetch_sub:
+  case AtomicExpr::AO__opencl_atomic_fetch_sub:
+  case AtomicExpr::AO__atomic_fetch_sub:
+  case AtomicExpr::AO__scoped_atomic_fetch_sub:
+    Op = mlir::cir::AtomicFetch::getOperationName();
+    fetchAttr = mlir::cir::AtomicFetchKindAttr::get(
+        builder.getContext(), mlir::cir::AtomicFetchKind::Sub);
+    break;
+
+  case AtomicExpr::AO__atomic_min_fetch:
+  case AtomicExpr::AO__scoped_atomic_min_fetch:
+    fetchFirst = false;
+    [[fallthrough]];
+  case AtomicExpr::AO__c11_atomic_fetch_min:
+  case AtomicExpr::AO__hip_atomic_fetch_min:
+  case AtomicExpr::AO__opencl_atomic_fetch_min:
+  case AtomicExpr::AO__atomic_fetch_min:
+  case AtomicExpr::AO__scoped_atomic_fetch_min:
+    Op = mlir::cir::AtomicFetch::getOperationName();
+    fetchAttr = mlir::cir::AtomicFetchKindAttr::get(
+        builder.getContext(), mlir::cir::AtomicFetchKind::Min);
+    break;
+
+  case AtomicExpr::AO__atomic_max_fetch:
+  case AtomicExpr::AO__scoped_atomic_max_fetch:
+    fetchFirst = false;
+    [[fallthrough]];
+  case AtomicExpr::AO__c11_atomic_fetch_max:
+  case AtomicExpr::AO__hip_atomic_fetch_max:
+  case AtomicExpr::AO__opencl_atomic_fetch_max:
+  case AtomicExpr::AO__atomic_fetch_max:
+  case AtomicExpr::AO__scoped_atomic_fetch_max:
+    Op = mlir::cir::AtomicFetch::getOperationName();
+    fetchAttr = mlir::cir::AtomicFetchKindAttr::get(
+        builder.getContext(), mlir::cir::AtomicFetchKind::Max);
+    break;
+
+  case AtomicExpr::AO__atomic_and_fetch:
+  case AtomicExpr::AO__scoped_atomic_and_fetch:
+    fetchFirst = false;
+    [[fallthrough]];
+  case AtomicExpr::AO__c11_atomic_fetch_and:
+  case AtomicExpr::AO__hip_atomic_fetch_and:
+  case AtomicExpr::AO__opencl_atomic_fetch_and:
+  case AtomicExpr::AO__atomic_fetch_and:
+  case AtomicExpr::AO__scoped_atomic_fetch_and:
+    Op = mlir::cir::AtomicFetch::getOperationName();
+    fetchAttr = mlir::cir::AtomicFetchKindAttr::get(
+        builder.getContext(), mlir::cir::AtomicFetchKind::And);
+    break;
+
+  case AtomicExpr::AO__atomic_or_fetch:
+  case AtomicExpr::AO__scoped_atomic_or_fetch:
+    fetchFirst = false;
+    [[fallthrough]];
+  case AtomicExpr::AO__c11_atomic_fetch_or:
+  case AtomicExpr::AO__hip_atomic_fetch_or:
+  case AtomicExpr::AO__opencl_atomic_fetch_or:
+  case AtomicExpr::AO__atomic_fetch_or:
+  case AtomicExpr::AO__scoped_atomic_fetch_or:
+    Op = mlir::cir::AtomicFetch::getOperationName();
+    fetchAttr = mlir::cir::AtomicFetchKindAttr::get(
+        builder.getContext(), mlir::cir::AtomicFetchKind::Or);
+    break;
+
+  case AtomicExpr::AO__atomic_xor_fetch:
+  case AtomicExpr::AO__scoped_atomic_xor_fetch:
+    fetchFirst = false;
+    [[fallthrough]];
+  case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__hip_atomic_fetch_xor:
+  case AtomicExpr::AO__opencl_atomic_fetch_xor:
+  case AtomicExpr::AO__atomic_fetch_xor:
+  case AtomicExpr::AO__scoped_atomic_fetch_xor:
+    Op = mlir::cir::AtomicFetch::getOperationName();
+    fetchAttr = mlir::cir::AtomicFetchKindAttr::get(
+        builder.getContext(), mlir::cir::AtomicFetchKind::Xor);
+    break;
+
+  case AtomicExpr::AO__atomic_nand_fetch:
+  case AtomicExpr::AO__scoped_atomic_nand_fetch:
+    fetchFirst = false;
+    [[fallthrough]];
+  case AtomicExpr::AO__c11_atomic_fetch_nand:
+  case AtomicExpr::AO__atomic_fetch_nand:
+  case AtomicExpr::AO__scoped_atomic_fetch_nand:
+    Op = mlir::cir::AtomicFetch::getOperationName();
+    fetchAttr = mlir::cir::AtomicFetchKindAttr::get(
+        builder.getContext(), mlir::cir::AtomicFetchKind::Nand);
+    break;
+  }
+
+  assert(Op.size() && "expected operation name to build");
+  auto LoadVal1 = builder.createLoad(loc, Val1);
+
+  SmallVector<mlir::Value> atomicOperands = {Ptr.getPointer(), LoadVal1};
+  SmallVector<mlir::Type> atomicResTys = {LoadVal1.getType()};
+  auto RMWI = builder.create(loc, builder.getStringAttr(Op), atomicOperands,
+                             atomicResTys, {});
+
+  if (fetchAttr)
+    RMWI->setAttr("binop", fetchAttr);
+  RMWI->setAttr("mem_order", orderAttr);
+  if (E->isVolatile())
+    RMWI->setAttr("is_volatile", mlir::UnitAttr::get(builder.getContext()));
+  if (fetchFirst && Op == mlir::cir::AtomicFetch::getOperationName())
+    RMWI->setAttr("fetch_first", mlir::UnitAttr::get(builder.getContext()));
+
+  auto Result = RMWI->getResult(0);
+
+  // TODO(cir): this logic should be part of createStore, but doing so currently
+  // breaks CodeGen/union.cpp and CodeGen/union.cpp.
+  auto ptrTy = mlir::cast<mlir::cir::PointerType>(Dest.getPointer().getType());
+  if (Dest.getElementType() != ptrTy.getPointee()) {
+    Dest = Dest.withPointer(
+        builder.createPtrBitcast(Dest.getPointer(), Dest.getElementType()));
+  }
+  builder.createStore(loc, Result, Dest);
+}
+
+static RValue buildAtomicLibcall(CIRGenFunction &CGF, StringRef fnName,
+                                 QualType resultType, CallArgList &args) {
+  [[maybe_unused]] const CIRGenFunctionInfo &fnInfo =
+      CGF.CGM.getTypes().arrangeBuiltinFunctionCall(resultType, args);
+  [[maybe_unused]] auto fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo);
+  llvm_unreachable("NYI");
+}
+
+static void buildAtomicOp(CIRGenFunction &CGF, AtomicExpr *Expr, Address Dest,
+                          Address Ptr, Address Val1, Address Val2,
+                          mlir::Value IsWeak, mlir::Value FailureOrder,
+                          uint64_t Size, mlir::cir::MemOrder Order,
+                          mlir::Value Scope) {
+  auto ScopeModel = Expr->getScopeModel();
+
+  // LLVM atomic instructions always have synch scope. If clang atomic
+  // expression has no scope operand, use default LLVM synch scope.
+  if (!ScopeModel) {
+    assert(!MissingFeatures::syncScopeID());
+    buildAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+                  Order, /*FIXME(cir): LLVM default scope*/ 1);
+    return;
+  }
+
+  // Handle constant scope.
+  if (getConstOpIntAttr(Scope)) {
+    assert(!MissingFeatures::syncScopeID());
+    llvm_unreachable("NYI");
+    return;
+  }
+
+  // Handle non-constant scope.
+  llvm_unreachable("NYI");
+}
+
+RValue CIRGenFunction::buildAtomicExpr(AtomicExpr *E) {
+  QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
+  QualType MemTy = AtomicTy;
+  if (const AtomicType *AT = AtomicTy->getAs<AtomicType>())
+    MemTy = AT->getValueType();
+  mlir::Value IsWeak = nullptr, OrderFail = nullptr;
+
+  Address Val1 = Address::invalid();
+  Address Val2 = Address::invalid();
+  Address Dest = Address::invalid();
+  Address Ptr = buildPointerWithAlignment(E->getPtr());
+
+  if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
+      E->getOp() == AtomicExpr::AO__opencl_atomic_init) {
+    LValue lvalue = makeAddrLValue(Ptr, AtomicTy);
+    buildAtomicInit(E->getVal1(), lvalue);
+    return RValue::get(nullptr);
+  }
+
+  auto TInfo = getContext().getTypeInfoInChars(AtomicTy);
+  uint64_t Size = TInfo.Width.getQuantity();
+  unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
+
+  CharUnits MaxInlineWidth =
+      getContext().toCharUnitsFromBits(MaxInlineWidthInBits);
+  DiagnosticsEngine &Diags = CGM.getDiags();
+  bool Misaligned = (Ptr.getAlignment() % TInfo.Width) != 0;
+  bool Oversized = getContext().toBits(TInfo.Width) > MaxInlineWidthInBits;
+  if (Misaligned) {
+    Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned)
+        << (int)TInfo.Width.getQuantity()
+        << (int)Ptr.getAlignment().getQuantity();
+  }
+  if (Oversized) {
+    Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_oversized)
+        << (int)TInfo.Width.getQuantity() << (int)MaxInlineWidth.getQuantity();
+  }
+
+  auto Order = buildScalarExpr(E->getOrder());
+  auto Scope = E->getScopeModel() ? buildScalarExpr(E->getScope()) : nullptr;
+  bool ShouldCastToIntPtrTy = true;
+
+  switch (E->getOp()) {
+  case AtomicExpr::AO__c11_atomic_init:
+  case AtomicExpr::AO__opencl_atomic_init:
+    llvm_unreachable("Already handled above with EmitAtomicInit!");
+
+  case AtomicExpr::AO__atomic_load_n:
+  case AtomicExpr::AO__scoped_atomic_load_n:
+  case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__opencl_atomic_load:
+  case AtomicExpr::AO__hip_atomic_load:
+    break;
+
+  case AtomicExpr::AO__atomic_load:
+  case AtomicExpr::AO__scoped_atomic_load:
+    Dest = buildPointerWithAlignment(E->getVal1());
+    break;
+
+  case AtomicExpr::AO__atomic_store:
+  case AtomicExpr::AO__scoped_atomic_store:
+    Val1 = buildPointerWithAlignment(E->getVal1());
+    break;
+
+  case AtomicExpr::AO__atomic_exchange:
+  case AtomicExpr::AO__scoped_atomic_exchange:
+    Val1 = buildPointerWithAlignment(E->getVal1());
+    Dest = buildPointerWithAlignment(E->getVal2());
+    break;
+
+  case AtomicExpr::AO__atomic_compare_exchange:
+  case AtomicExpr::AO__atomic_compare_exchange_n:
+  case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__scoped_atomic_compare_exchange:
+  case AtomicExpr::AO__scoped_atomic_compare_exchange_n:
+    Val1 = buildPointerWithAlignment(E->getVal1());
+    if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange ||
+        E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange)
+      Val2 = buildPointerWithAlignment(E->getVal2());
+    else
+      Val2 = buildValToTemp(*this, E->getVal2());
+    OrderFail = buildScalarExpr(E->getOrderFail());
+    if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n ||
+        E->getOp() == AtomicExpr::AO__atomic_compare_exchange ||
+        E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange_n ||
+        E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange) {
+      IsWeak = buildScalarExpr(E->getWeak());
+    }
+    break;
+
+  case AtomicExpr::AO__c11_atomic_fetch_add:
+  case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__hip_atomic_fetch_add:
+  case AtomicExpr::AO__hip_atomic_fetch_sub:
+  case AtomicExpr::AO__opencl_atomic_fetch_add:
+  case AtomicExpr::AO__opencl_atomic_fetch_sub:
+    if (MemTy->isPointerType()) {
+      llvm_unreachable("NYI");
+    }
+    [[fallthrough]];
+  case AtomicExpr::AO__atomic_fetch_add:
+  case AtomicExpr::AO__atomic_fetch_max:
+  case AtomicExpr::AO__atomic_fetch_min:
+  case AtomicExpr::AO__atomic_fetch_sub:
+  case AtomicExpr::AO__atomic_add_fetch:
+  case AtomicExpr::AO__atomic_max_fetch:
+  case AtomicExpr::AO__atomic_min_fetch:
+  case AtomicExpr::AO__atomic_sub_fetch:
+  case AtomicExpr::AO__c11_atomic_fetch_max:
+  case AtomicExpr::AO__c11_atomic_fetch_min:
+  case AtomicExpr::AO__opencl_atomic_fetch_max:
+  case AtomicExpr::AO__opencl_atomic_fetch_min:
+  case AtomicExpr::AO__hip_atomic_fetch_max:
+  case AtomicExpr::AO__hip_atomic_fetch_min:
+  case AtomicExpr::AO__scoped_atomic_fetch_add:
+  case AtomicExpr::AO__scoped_atomic_fetch_max:
+  case AtomicExpr::AO__scoped_atomic_fetch_min:
+  case AtomicExpr::AO__scoped_atomic_fetch_sub:
+  case AtomicExpr::AO__scoped_atomic_add_fetch:
+  case AtomicExpr::AO__scoped_atomic_max_fetch:
+  case AtomicExpr::AO__scoped_atomic_min_fetch:
+  case AtomicExpr::AO__scoped_atomic_sub_fetch:
+    ShouldCastToIntPtrTy = !MemTy->isFloatingType();
+    [[fallthrough]];
+
+  case AtomicExpr::AO__atomic_fetch_and:
+  case AtomicExpr::AO__atomic_fetch_nand:
+  case AtomicExpr::AO__atomic_fetch_or:
+  case AtomicExpr::AO__atomic_fetch_xor:
+  case AtomicExpr::AO__atomic_and_fetch:
+  case AtomicExpr::AO__atomic_nand_fetch:
+  case AtomicExpr::AO__atomic_or_fetch:
+  case AtomicExpr::AO__atomic_xor_fetch:
+  case AtomicExpr::AO__atomic_store_n:
+  case AtomicExpr::AO__atomic_exchange_n:
+  case AtomicExpr::AO__c11_atomic_fetch_and:
+  case AtomicExpr::AO__c11_atomic_fetch_nand:
+  case AtomicExpr::AO__c11_atomic_fetch_or:
+  case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__hip_atomic_fetch_and:
+  case AtomicExpr::AO__hip_atomic_fetch_or:
+  case AtomicExpr::AO__hip_atomic_fetch_xor:
+  case AtomicExpr::AO__hip_atomic_store:
+  case AtomicExpr::AO__hip_atomic_exchange:
+  case AtomicExpr::AO__opencl_atomic_fetch_and:
+  case AtomicExpr::AO__opencl_atomic_fetch_or:
+  case AtomicExpr::AO__opencl_atomic_fetch_xor:
+  case AtomicExpr::AO__opencl_atomic_store:
+  case AtomicExpr::AO__opencl_atomic_exchange:
+  case AtomicExpr::AO__scoped_atomic_fetch_and:
+  case AtomicExpr::AO__scoped_atomic_fetch_nand:
+  case AtomicExpr::AO__scoped_atomic_fetch_or:
+  case AtomicExpr::AO__scoped_atomic_fetch_xor:
+  case AtomicExpr::AO__scoped_atomic_and_fetch:
+  case AtomicExpr::AO__scoped_atomic_nand_fetch:
+  case AtomicExpr::AO__scoped_atomic_or_fetch:
+  case AtomicExpr::AO__scoped_atomic_xor_fetch:
+  case AtomicExpr::AO__scoped_atomic_store_n:
+  case AtomicExpr::AO__scoped_atomic_exchange_n:
+    Val1 = buildValToTemp(*this, E->getVal1());
+    break;
+  }
+
+  QualType RValTy = E->getType().getUnqualifiedType();
+
+  // The inlined atomics only function on iN types, where N is a power of 2. We
+  // need to make sure (via temporaries if necessary) that all incoming values
+  // are compatible.
+  LValue AtomicVal = makeAddrLValue(Ptr, AtomicTy);
+  AtomicInfo Atomics(*this, AtomicVal, getLoc(E->getSourceRange()));
+
+  if (ShouldCastToIntPtrTy) {
+    Ptr = Atomics.castToAtomicIntPointer(Ptr);
+    if (Val1.isValid())
+      Val1 = Atomics.convertToAtomicIntPointer(Val1);
+    if (Val2.isValid())
+      Val2 = Atomics.convertToAtomicIntPointer(Val2);
+  }
+  if (Dest.isValid()) {
+    if (ShouldCastToIntPtrTy)
+      Dest = Atomics.castToAtomicIntPointer(Dest);
+  } else if (E->isCmpXChg())
+    Dest = CreateMemTemp(RValTy, getLoc(E->getSourceRange()), "cmpxchg.bool");
+  else if (!RValTy->isVoidType()) {
+    Dest = Atomics.CreateTempAlloca();
+    if (ShouldCastToIntPtrTy)
+      Dest = Atomics.castToAtomicIntPointer(Dest);
+  }
+
+  bool PowerOf2Size = (Size & (Size - 1)) == 0;
+  bool UseLibcall = !PowerOf2Size || (Size > 16);
+
+  // For atomics larger than 16 bytes, emit a libcall from the frontend. This
+  // avoids the overhead of dealing with excessively-large value types in IR.
+  // Non-power-of-2 values also lower to libcall here, as they are not currently
+  // permitted in IR instructions (although that constraint could be relaxed in
+  // the future). For other cases where a libcall is required on a given
+  // platform, we let the backend handle it (this includes handling for all of
+  // the size-optimized libcall variants, which are only valid up to 16 bytes.)
+  //
+  // See: https://llvm.org/docs/Atomics.html#libcalls-atomic
+  if (UseLibcall) {
+    CallArgList Args;
+    // For non-optimized library calls, the size is the first parameter.
+    Args.add(RValue::get(builder.getConstInt(getLoc(E->getSourceRange()),
+                                             SizeTy, Size)),
+             getContext().getSizeType());
+
+    // The atomic address is the second parameter.
+    // The OpenCL atomic library functions only accept pointer arguments to
+    // generic address space.
+    auto CastToGenericAddrSpace = [&](mlir::Value V, QualType PT) {
+      if (!E->isOpenCL())
+        return V;
+      llvm_unreachable("NYI");
+    };
+
+    Args.add(RValue::get(CastToGenericAddrSpace(Ptr.emitRawPointer(),
+                                                E->getPtr()->getType())),
+             getContext().VoidPtrTy);
+
+    // The next 1-3 parameters are op-dependent.
+    std::string LibCallName;
+    QualType RetTy;
+    bool HaveRetTy = false;
+    switch (E->getOp()) {
+    case AtomicExpr::AO__c11_atomic_init:
+    case AtomicExpr::AO__opencl_atomic_init:
+      llvm_unreachable("Already handled!");
+
+    // There is only one libcall for compare an exchange, because there is no
+    // optimisation benefit possible from a libcall version of a weak compare
+    // and exchange.
+    // bool __atomic_compare_exchange(size_t size, void *mem, void *expected,
+    //                                void *desired, int success, int failure)
+    case AtomicExpr::AO__atomic_compare_exchange:
+    case AtomicExpr::AO__atomic_compare_exchange_n:
+    case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+    case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+    case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
+    case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
+    case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+    case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+    case AtomicExpr::AO__scoped_atomic_compare_exchange:
+    case AtomicExpr::AO__scoped_atomic_compare_exchange_n:
+      LibCallName = "__atomic_compare_exchange";
+      llvm_unreachable("NYI");
+      break;
+    // void __atomic_exchange(size_t size, void *mem, void *val, void *return,
+    //                        int order)
+    case AtomicExpr::AO__atomic_exchange:
+    case AtomicExpr::AO__atomic_exchange_n:
+    case AtomicExpr::AO__c11_atomic_exchange:
+    case AtomicExpr::AO__hip_atomic_exchange:
+    case AtomicExpr::AO__opencl_atomic_exchange:
+    case AtomicExpr::AO__scoped_atomic_exchange:
+    case AtomicExpr::AO__scoped_atomic_exchange_n:
+      LibCallName = "__atomic_exchange";
+      llvm_unreachable("NYI");
+      break;
+    // void __atomic_store(size_t size, void *mem, void *val, int order)
+    case AtomicExpr::AO__atomic_store:
+    case AtomicExpr::AO__atomic_store_n:
+    case AtomicExpr::AO__c11_atomic_store:
+    case AtomicExpr::AO__hip_atomic_store:
+    case AtomicExpr::AO__opencl_atomic_store:
+    case AtomicExpr::AO__scoped_atomic_store:
+    case AtomicExpr::AO__scoped_atomic_store_n:
+      LibCallName = "__atomic_store";
+      llvm_unreachable("NYI");
+      break;
+    // void __atomic_load(size_t size, void *mem, void *return, int order)
+    case AtomicExpr::AO__atomic_load:
+    case AtomicExpr::AO__atomic_load_n:
+    case AtomicExpr::AO__c11_atomic_load:
+    case AtomicExpr::AO__hip_atomic_load:
+    case AtomicExpr::AO__opencl_atomic_load:
+    case AtomicExpr::AO__scoped_atomic_load:
+    case AtomicExpr::AO__scoped_atomic_load_n:
+      LibCallName = "__atomic_load";
+      break;
+    case AtomicExpr::AO__atomic_add_fetch:
+    case AtomicExpr::AO__scoped_atomic_add_fetch:
+    case AtomicExpr::AO__atomic_fetch_add:
+    case AtomicExpr::AO__c11_atomic_fetch_add:
+    case AtomicExpr::AO__hip_atomic_fetch_add:
+    case AtomicExpr::AO__opencl_atomic_fetch_add:
+    case AtomicExpr::AO__scoped_atomic_fetch_add:
+    case AtomicExpr::AO__atomic_and_fetch:
+    case AtomicExpr::AO__scoped_atomic_and_fetch:
+    case AtomicExpr::AO__atomic_fetch_and:
+    case AtomicExpr::AO__c11_atomic_fetch_and:
+    case AtomicExpr::AO__hip_atomic_fetch_and:
+    case AtomicExpr::AO__opencl_atomic_fetch_and:
+    case AtomicExpr::AO__scoped_atomic_fetch_and:
+    case AtomicExpr::AO__atomic_or_fetch:
+    case AtomicExpr::AO__scoped_atomic_or_fetch:
+    case AtomicExpr::AO__atomic_fetch_or:
+    case AtomicExpr::AO__c11_atomic_fetch_or:
+    case AtomicExpr::AO__hip_atomic_fetch_or:
+    case AtomicExpr::AO__opencl_atomic_fetch_or:
+    case AtomicExpr::AO__scoped_atomic_fetch_or:
+    case AtomicExpr::AO__atomic_sub_fetch:
+    case AtomicExpr::AO__scoped_atomic_sub_fetch:
+    case AtomicExpr::AO__atomic_fetch_sub:
+    case AtomicExpr::AO__c11_atomic_fetch_sub:
+    case AtomicExpr::AO__hip_atomic_fetch_sub:
+    case AtomicExpr::AO__opencl_atomic_fetch_sub:
+    case AtomicExpr::AO__scoped_atomic_fetch_sub:
+    case AtomicExpr::AO__atomic_xor_fetch:
+    case AtomicExpr::AO__scoped_atomic_xor_fetch:
+    case AtomicExpr::AO__atomic_fetch_xor:
+    case AtomicExpr::AO__c11_atomic_fetch_xor:
+    case AtomicExpr::AO__hip_atomic_fetch_xor:
+    case AtomicExpr::AO__opencl_atomic_fetch_xor:
+    case AtomicExpr::AO__scoped_atomic_fetch_xor:
+    case AtomicExpr::AO__atomic_nand_fetch:
+    case AtomicExpr::AO__atomic_fetch_nand:
+    case AtomicExpr::AO__c11_atomic_fetch_nand:
+    case AtomicExpr::AO__scoped_atomic_fetch_nand:
+    case AtomicExpr::AO__scoped_atomic_nand_fetch:
+    case AtomicExpr::AO__atomic_min_fetch:
+    case AtomicExpr::AO__atomic_fetch_min:
+    case AtomicExpr::AO__c11_atomic_fetch_min:
+    case AtomicExpr::AO__hip_atomic_fetch_min:
+    case AtomicExpr::AO__opencl_atomic_fetch_min:
+    case AtomicExpr::AO__scoped_atomic_fetch_min:
+    case AtomicExpr::AO__scoped_atomic_min_fetch:
+    case AtomicExpr::AO__atomic_max_fetch:
+    case AtomicExpr::AO__atomic_fetch_max:
+    case AtomicExpr::AO__c11_atomic_fetch_max:
+    case AtomicExpr::AO__hip_atomic_fetch_max:
+    case AtomicExpr::AO__opencl_atomic_fetch_max:
+    case AtomicExpr::AO__scoped_atomic_fetch_max:
+    case AtomicExpr::AO__scoped_atomic_max_fetch:
+      llvm_unreachable("Integral atomic operations always become atomicrmw!");
+    }
+
+    if (E->isOpenCL()) {
+      LibCallName =
+          std::string("__opencl") + StringRef(LibCallName).drop_front(1).str();
+    }
+    // By default, assume we return a value of the atomic type.
+    if (!HaveRetTy) {
+      llvm_unreachable("NYI");
+    }
+    // Order is always the last parameter.
+    Args.add(RValue::get(Order), getContext().IntTy);
+    if (E->isOpenCL()) {
+      llvm_unreachable("NYI");
+    }
+
+    [[maybe_unused]] RValue Res =
+        buildAtomicLibcall(*this, LibCallName, RetTy, Args);
+    // The value is returned directly from the libcall.
+    if (E->isCmpXChg()) {
+      llvm_unreachable("NYI");
+    }
+
+    if (RValTy->isVoidType()) {
+      llvm_unreachable("NYI");
+    }
+
+    llvm_unreachable("NYI");
+  }
+
+  [[maybe_unused]] bool IsStore =
+      E->getOp() == AtomicExpr::AO__c11_atomic_store ||
+      E->getOp() == AtomicExpr::AO__opencl_atomic_store ||
+      E->getOp() == AtomicExpr::AO__hip_atomic_store ||
+      E->getOp() == AtomicExpr::AO__atomic_store ||
+      E->getOp() == AtomicExpr::AO__atomic_store_n ||
+      E->getOp() == AtomicExpr::AO__scoped_atomic_store ||
+      E->getOp() == AtomicExpr::AO__scoped_atomic_store_n;
+  [[maybe_unused]] bool IsLoad =
+      E->getOp() == AtomicExpr::AO__c11_atomic_load ||
+      E->getOp() == AtomicExpr::AO__opencl_atomic_load ||
+      E->getOp() == AtomicExpr::AO__hip_atomic_load ||
+      E->getOp() == AtomicExpr::AO__atomic_load ||
+      E->getOp() == AtomicExpr::AO__atomic_load_n ||
+      E->getOp() == AtomicExpr::AO__scoped_atomic_load ||
+      E->getOp() == AtomicExpr::AO__scoped_atomic_load_n;
+
+  if (auto ordAttr = getConstOpIntAttr(Order)) {
+    // We should not ever get to a case where the ordering isn't a valid CABI
+    // value, but it's hard to enforce that in general.
+    auto ord = ordAttr.getUInt();
+    if (mlir::cir::isValidCIRAtomicOrderingCABI(ord)) {
+      switch ((mlir::cir::MemOrder)ord) {
+      case mlir::cir::MemOrder::Relaxed:
+        buildAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                      mlir::cir::MemOrder::Relaxed, Scope);
+        break;
+      case mlir::cir::MemOrder::Consume:
+      case mlir::cir::MemOrder::Acquire:
+        if (IsStore)
+          break; // Avoid crashing on code with undefined behavior
+        buildAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                      mlir::cir::MemOrder::Acquire, Scope);
+        break;
+      case mlir::cir::MemOrder::Release:
+        if (IsLoad)
+          break; // Avoid crashing on code with undefined behavior
+        buildAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                      mlir::cir::MemOrder::Release, Scope);
+        break;
+      case mlir::cir::MemOrder::AcquireRelease:
+        if (IsLoad || IsStore)
+          break; // Avoid crashing on code with undefined behavior
+        buildAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                      mlir::cir::MemOrder::AcquireRelease, Scope);
+        break;
+      case mlir::cir::MemOrder::SequentiallyConsistent:
+        buildAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                      mlir::cir::MemOrder::SequentiallyConsistent, Scope);
+        break;
+      }
+    }
+    if (RValTy->isVoidType())
+      return RValue::get(nullptr);
+
+    return convertTempToRValue(Dest.withElementType(convertTypeForMem(RValTy)),
+                               RValTy, E->getExprLoc());
+  }
+
+  // The memory order is not known at compile-time.  The atomic operations
+  // can't handle runtime memory orders; the memory order must be hard coded.
+  // Generate a "switch" statement that converts a runtime value into a
+  // compile-time value.
+  builder.create<mlir::cir::SwitchOp>(
+      Order.getLoc(), Order,
+      [&](mlir::OpBuilder &builder, mlir::Location loc,
+          mlir::OperationState &os) {
+        llvm::SmallVector<mlir::Attribute, 6> CaseAttrs;
+        llvm::SmallVector<std::unique_ptr<mlir::Region>, 6> Regions;
+
+        // default:
+        // Use memory_order_relaxed for relaxed operations and for any memory
+        // order value that is not supported.  There is no good way to report
+        // an unsupported memory order at runtime, hence the fallback to
+        // memory_order_relaxed.
+        buildDefaultCase(builder, CaseAttrs, Regions, loc);
+        buildAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                      mlir::cir::MemOrder::Relaxed, Scope);
+
+        if (!IsStore) {
+          // case consume:
+          // case acquire:
+          // memory_order_consume is not implemented; it is always treated like
+          // memory_order_acquire.  These memory orders are not valid for
+          // write-only operations.
+          buildDoubleMemOrderCase(builder, CaseAttrs, Regions, loc,
+                                  Order.getType(), mlir::cir::MemOrder::Consume,
+                                  mlir::cir::MemOrder::Acquire);
+          buildAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+                        Size, mlir::cir::MemOrder::Acquire, Scope);
+        }
+
+        if (!IsLoad) {
+          // case release:
+          // memory_order_release is not valid for read-only operations.
+          buildSingleMemOrderCase(builder, CaseAttrs, Regions, loc,
+                                  Order.getType(),
+                                  mlir::cir::MemOrder::Release);
+          buildAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+                        Size, mlir::cir::MemOrder::Release, Scope);
+        }
+
+        if (!IsLoad && !IsStore) {
+          // case acq_rel:
+          // memory_order_acq_rel is only valid for read-write operations.
+          buildSingleMemOrderCase(builder, CaseAttrs, Regions, loc,
+                                  Order.getType(),
+                                  mlir::cir::MemOrder::AcquireRelease);
+          buildAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
+                        Size, mlir::cir::MemOrder::AcquireRelease, Scope);
+        }
+
+        // case seq_cst:
+        buildSingleMemOrderCase(builder, CaseAttrs, Regions, loc,
+                                Order.getType(),
+                                mlir::cir::MemOrder::SequentiallyConsistent);
+        buildAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                      mlir::cir::MemOrder::SequentiallyConsistent, Scope);
+
+        os.addRegions(Regions);
+        os.addAttribute("cases", builder.getArrayAttr(CaseAttrs));
+      });
+
+  if (RValTy->isVoidType())
+    return RValue::get(nullptr);
+  return convertTempToRValue(Dest.withElementType(convertTypeForMem(RValTy)),
+                             RValTy, E->getExprLoc());
+}
+
+void CIRGenFunction::buildAtomicStore(RValue rvalue, LValue lvalue,
+                                      bool isInit) {
+  bool IsVolatile = lvalue.isVolatileQualified();
+  mlir::cir::MemOrder MO;
+  if (lvalue.getType()->isAtomicType()) {
+    MO = mlir::cir::MemOrder::SequentiallyConsistent;
+  } else {
+    MO = mlir::cir::MemOrder::Release;
+    IsVolatile = true;
+  }
+  return buildAtomicStore(rvalue, lvalue, MO, IsVolatile, isInit);
+}
+
+/// Return true if \param ValTy is a type that should be casted to integer
+/// around the atomic memory operation. If \param CmpXchg is true, then the
+/// cast of a floating point type is made as that instruction can not have
+/// floating point operands.  TODO: Allow compare-and-exchange and FP - see
+/// comment in CIRGenAtomicExpandPass.cpp.
+static bool shouldCastToInt(mlir::Type ValTy, bool CmpXchg) {
+  if (mlir::cir::isAnyFloatingPointType(ValTy))
+    return isa<mlir::cir::FP80Type>(ValTy) || CmpXchg;
+  return !isa<mlir::cir::IntType>(ValTy) && !isa<mlir::cir::PointerType>(ValTy);
+}
+
+mlir::Value AtomicInfo::getScalarRValValueOrNull(RValue RVal) const {
+  if (RVal.isScalar() && (!hasPadding() || !LVal.isSimple()))
+    return RVal.getScalarVal();
+  return nullptr;
+}
+
+/// Materialize an r-value into memory for the purposes of storing it
+/// to an atomic type.
+Address AtomicInfo::materializeRValue(RValue rvalue) const {
+  // Aggregate r-values are already in memory, and EmitAtomicStore
+  // requires them to be values of the atomic type.
+  if (rvalue.isAggregate())
+    return rvalue.getAggregateAddress();
+
+  // Otherwise, make a temporary and materialize into it.
+  LValue TempLV = CGF.makeAddrLValue(CreateTempAlloca(), getAtomicType());
+  AtomicInfo Atomics(CGF, TempLV, TempLV.getAddress().getPointer().getLoc());
+  Atomics.emitCopyIntoMemory(rvalue);
+  return TempLV.getAddress();
+}
+
+bool AtomicInfo::emitMemSetZeroIfNecessary() const {
+  assert(LVal.isSimple());
+  Address addr = LVal.getAddress();
+  if (!requiresMemSetZero(addr.getElementType()))
+    return false;
+
+  llvm_unreachable("NYI");
+}
+
+/// Copy an r-value into memory as part of storing to an atomic type.
+/// This needs to create a bit-pattern suitable for atomic operations.
+void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const {
+  assert(LVal.isSimple());
+  // If we have an r-value, the rvalue should be of the atomic type,
+  // which means that the caller is responsible for having zeroed
+  // any padding.  Just do an aggregate copy of that type.
+  if (rvalue.isAggregate()) {
+    llvm_unreachable("NYI");
+    return;
+  }
+
+  // Okay, otherwise we're copying stuff.
+
+  // Zero out the buffer if necessary.
+  emitMemSetZeroIfNecessary();
+
+  // Drill past the padding if present.
+  LValue TempLVal = projectValue();
+
+  // Okay, store the rvalue in.
+  if (rvalue.isScalar()) {
+    CGF.buildStoreOfScalar(rvalue.getScalarVal(), TempLVal, /*init*/ true);
+  } else {
+    llvm_unreachable("NYI");
+  }
+}
+
+mlir::Value AtomicInfo::convertRValueToInt(RValue RVal, bool CmpXchg) const {
+  // If we've got a scalar value of the right size, try to avoid going
+  // through memory. Floats get casted if needed by AtomicExpandPass.
+  if (auto Value = getScalarRValValueOrNull(RVal)) {
+    if (!shouldCastToInt(Value.getType(), CmpXchg)) {
+      return CGF.buildToMemory(Value, ValueTy);
+    } else {
+      llvm_unreachable("NYI");
+    }
+  }
+
+  llvm_unreachable("NYI");
+}
+
+/// Emit a store to an l-value of atomic type.
+///
+/// Note that the r-value is expected to be an r-value *of the atomic
+/// type*; this means that for aggregate r-values, it should include
+/// storage for any padding that was necessary.
+void CIRGenFunction::buildAtomicStore(RValue rvalue, LValue dest,
+                                      mlir::cir::MemOrder MO, bool IsVolatile,
+                                      bool isInit) {
+  // If this is an aggregate r-value, it should agree in type except
+  // maybe for address-space qualification.
+  auto loc = dest.getPointer().getLoc();
+  assert(!rvalue.isAggregate() ||
+         rvalue.getAggregateAddress().getElementType() ==
+             dest.getAddress().getElementType());
+
+  AtomicInfo atomics(*this, dest, loc);
+  LValue LVal = atomics.getAtomicLValue();
+
+  // If this is an initialization, just put the value there normally.
+  if (LVal.isSimple()) {
+    if (isInit) {
+      atomics.emitCopyIntoMemory(rvalue);
+      return;
+    }
+
+    // Check whether we should use a library call.
+    if (atomics.shouldUseLibcall()) {
+      llvm_unreachable("NYI");
+    }
+
+    // Okay, we're doing this natively.
+    auto ValToStore = atomics.convertRValueToInt(rvalue);
+
+    // Do the atomic store.
+    Address Addr = atomics.getAtomicAddress();
+    if (auto Value = atomics.getScalarRValValueOrNull(rvalue))
+      if (shouldCastToInt(Value.getType(), /*CmpXchg=*/false)) {
+        Addr = atomics.castToAtomicIntPointer(Addr);
+        ValToStore = builder.createIntCast(ValToStore, Addr.getElementType());
+      }
+    auto store = builder.createStore(loc, ValToStore, Addr);
+
+    if (MO == mlir::cir::MemOrder::Acquire)
+      MO = mlir::cir::MemOrder::Relaxed; // Monotonic
+    else if (MO == mlir::cir::MemOrder::AcquireRelease)
+      MO = mlir::cir::MemOrder::Release;
+    // Initializations don't need to be atomic.
+    if (!isInit)
+      store.setMemOrder(MO);
+
+    // Other decoration.
+    if (IsVolatile)
+      store.setIsVolatile(true);
+
+    // DecorateInstructionWithTBAA
+    assert(!MissingFeatures::tbaa());
+    return;
+  }
+
+  llvm_unreachable("NYI");
+}
+
+void CIRGenFunction::buildAtomicInit(Expr *init, LValue dest) {
+  AtomicInfo atomics(*this, dest, getLoc(init->getSourceRange()));
+
+  switch (atomics.getEvaluationKind()) {
+  case TEK_Scalar: {
+    mlir::Value value = buildScalarExpr(init);
+    atomics.emitCopyIntoMemory(RValue::get(value));
+    return;
+  }
+
+  case TEK_Complex: {
+    llvm_unreachable("NYI");
+    return;
+  }
+
+  case TEK_Aggregate: {
+    // Fix up the destination if the initializer isn't an expression
+    // of atomic type.
+    llvm_unreachable("NYI");
+    return;
+  }
+  }
+  llvm_unreachable("bad evaluation kind");
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenBuilder.cpp
new file mode 100644
index 000000000000..13ec20d8eda2
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.cpp
@@ -0,0 +1,70 @@
+//===-- CIRGenBuilder.cpp - CIRBuilder implementation ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "CIRGenBuilder.h"
+
+namespace cir {
+
+mlir::Value CIRGenBuilderTy::maybeBuildArrayDecay(mlir::Location loc,
+                                                  mlir::Value arrayPtr,
+                                                  mlir::Type eltTy) {
+  auto arrayPtrTy =
+      ::mlir::dyn_cast<::mlir::cir::PointerType>(arrayPtr.getType());
+  assert(arrayPtrTy && "expected pointer type");
+  auto arrayTy =
+      ::mlir::dyn_cast<::mlir::cir::ArrayType>(arrayPtrTy.getPointee());
+
+  if (arrayTy) {
+    auto addrSpace = ::mlir::cast_if_present<::mlir::cir::AddressSpaceAttr>(
+        arrayPtrTy.getAddrSpace());
+    mlir::cir::PointerType flatPtrTy =
+        getPointerTo(arrayTy.getEltType(), addrSpace);
+    return create<mlir::cir::CastOp>(
+        loc, flatPtrTy, mlir::cir::CastKind::array_to_ptrdecay, arrayPtr);
+  }
+
+  assert(arrayPtrTy.getPointee() == eltTy &&
+         "flat pointee type must match original array element type");
+  return arrayPtr;
+}
+
+mlir::Value CIRGenBuilderTy::getArrayElement(mlir::Location arrayLocBegin,
+                                             mlir::Location arrayLocEnd,
+                                             mlir::Value arrayPtr,
+                                             mlir::Type eltTy, mlir::Value idx,
+                                             bool shouldDecay) {
+  mlir::Value basePtr = arrayPtr;
+  if (shouldDecay)
+    basePtr = maybeBuildArrayDecay(arrayLocBegin, arrayPtr, eltTy);
+  mlir::Type flatPtrTy = basePtr.getType();
+  return create<mlir::cir::PtrStrideOp>(arrayLocEnd, flatPtrTy, basePtr, idx);
+}
+
+mlir::cir::ConstantOp CIRGenBuilderTy::getConstInt(mlir::Location loc,
+                                                   llvm::APSInt intVal) {
+  bool isSigned = intVal.isSigned();
+  auto width = intVal.getBitWidth();
+  mlir::cir::IntType t = isSigned ? getSIntNTy(width) : getUIntNTy(width);
+  return getConstInt(loc, t,
+                     isSigned ? intVal.getSExtValue() : intVal.getZExtValue());
+}
+
+mlir::cir::ConstantOp CIRGenBuilderTy::getConstInt(mlir::Location loc,
+                                                   llvm::APInt intVal) {
+  auto width = intVal.getBitWidth();
+  mlir::cir::IntType t = getUIntNTy(width);
+  return getConstInt(loc, t, intVal.getZExtValue());
+}
+
+mlir::cir::ConstantOp CIRGenBuilderTy::getConstInt(mlir::Location loc,
+                                                   mlir::Type t, uint64_t C) {
+  auto intTy = mlir::dyn_cast<mlir::cir::IntType>(t);
+  assert(intTy && "expected mlir::cir::IntType");
+  return create<mlir::cir::ConstantOp>(loc, intTy,
+                                       mlir::cir::IntAttr::get(t, C));
+}
+} // namespace cir
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
new file mode 100644
index 000000000000..a02cdf26ee4d
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -0,0 +1,1003 @@
+//===-- CIRGenBuilder.h - CIRBuilder implementation  ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CIRGENBUILDER_H
+#define LLVM_CLANG_LIB_CIR_CIRGENBUILDER_H
+
+#include "Address.h"
+#include "CIRGenRecordLayout.h"
+#include "CIRGenTypeCache.h"
+#include "clang/CIR/MissingFeatures.h"
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/Type.h"
+#include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/Dialect/IR/FPEnv.h"
+
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/Types.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FloatingPointMode.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <optional>
+#include <string>
+#include <utility>
+
+namespace cir {
+
+class CIRGenFunction;
+
+class CIRGenBuilderTy : public CIRBaseBuilderTy {
+  const CIRGenTypeCache &typeCache;
+  bool IsFPConstrained = false;
+  fp::ExceptionBehavior DefaultConstrainedExcept = fp::ebStrict;
+  llvm::RoundingMode DefaultConstrainedRounding = llvm::RoundingMode::Dynamic;
+
+  llvm::StringMap<unsigned> GlobalsVersioning;
+  llvm::StringMap<unsigned> RecordNames;
+
+public:
+  CIRGenBuilderTy(mlir::MLIRContext &C, const CIRGenTypeCache &tc)
+      : CIRBaseBuilderTy(C), typeCache(tc) {
+    RecordNames["anon"] = 0; // in order to start from the name "anon.0"
+  }
+
+  std::string getUniqueAnonRecordName() { return getUniqueRecordName("anon"); }
+
+  std::string getUniqueRecordName(const std::string &baseName) {
+    auto it = RecordNames.find(baseName);
+    if (it == RecordNames.end()) {
+      RecordNames[baseName] = 0;
+      return baseName;
+    }
+
+    return baseName + "." + std::to_string(RecordNames[baseName]++);
+  }
+
+  //
+  // Floating point specific helpers
+  // -------------------------------
+  //
+
+  /// Enable/Disable use of constrained floating point math. When enabled the
+  /// CreateF<op>() calls instead create constrained floating point intrinsic
+  /// calls. Fast math flags are unaffected by this setting.
+  void setIsFPConstrained(bool IsCon) {
+    if (IsCon)
+      llvm_unreachable("Constrained FP NYI");
+    IsFPConstrained = IsCon;
+  }
+
+  /// Query for the use of constrained floating point math
+  bool getIsFPConstrained() {
+    if (IsFPConstrained)
+      llvm_unreachable("Constrained FP NYI");
+    return IsFPConstrained;
+  }
+
+  /// Set the exception handling to be used with constrained floating point
+  void setDefaultConstrainedExcept(fp::ExceptionBehavior NewExcept) {
+#ifndef NDEBUG
+    std::optional<llvm::StringRef> ExceptStr =
+        convertExceptionBehaviorToStr(NewExcept);
+    assert(ExceptStr && "Garbage strict exception behavior!");
+#endif
+    DefaultConstrainedExcept = NewExcept;
+  }
+
+  /// Set the rounding mode handling to be used with constrained floating point
+  void setDefaultConstrainedRounding(llvm::RoundingMode NewRounding) {
+#ifndef NDEBUG
+    std::optional<llvm::StringRef> RoundingStr =
+        convertRoundingModeToStr(NewRounding);
+    assert(RoundingStr && "Garbage strict rounding mode!");
+#endif
+    DefaultConstrainedRounding = NewRounding;
+  }
+
+  /// Get the exception handling used with constrained floating point
+  fp::ExceptionBehavior getDefaultConstrainedExcept() {
+    return DefaultConstrainedExcept;
+  }
+
+  /// Get the rounding mode handling used with constrained floating point
+  llvm::RoundingMode getDefaultConstrainedRounding() {
+    return DefaultConstrainedRounding;
+  }
+
+  //
+  // Attribute helpers
+  // -----------------
+  //
+
+  /// Get constant address of a global variable as an MLIR attribute.
+  /// This wrapper infers the attribute type through the global op.
+  mlir::cir::GlobalViewAttr getGlobalViewAttr(mlir::cir::GlobalOp globalOp,
+                                              mlir::ArrayAttr indices = {}) {
+    auto type = getPointerTo(globalOp.getSymType());
+    return getGlobalViewAttr(type, globalOp, indices);
+  }
+
+  /// Get constant address of a global variable as an MLIR attribute.
+  mlir::cir::GlobalViewAttr getGlobalViewAttr(mlir::cir::PointerType type,
+                                              mlir::cir::GlobalOp globalOp,
+                                              mlir::ArrayAttr indices = {}) {
+    auto symbol = mlir::FlatSymbolRefAttr::get(globalOp.getSymNameAttr());
+    return mlir::cir::GlobalViewAttr::get(type, symbol, indices);
+  }
+
+  mlir::Attribute getString(llvm::StringRef str, mlir::Type eltTy,
+                            unsigned size = 0) {
+    unsigned finalSize = size ? size : str.size();
+
+    size_t lastNonZeroPos = str.find_last_not_of('\0');
+    // If the string is full of null bytes, emit a #cir.zero rather than
+    // a #cir.const_array.
+    if (lastNonZeroPos == llvm::StringRef::npos) {
+      auto arrayTy = mlir::cir::ArrayType::get(getContext(), eltTy, finalSize);
+      return getZeroAttr(arrayTy);
+    }
+    // We will use trailing zeros only if there are more than one zero
+    // at the end
+    int trailingZerosNum =
+        finalSize > lastNonZeroPos + 2 ? finalSize - lastNonZeroPos - 1 : 0;
+    auto truncatedArrayTy = mlir::cir::ArrayType::get(
+        getContext(), eltTy, finalSize - trailingZerosNum);
+    auto fullArrayTy =
+        mlir::cir::ArrayType::get(getContext(), eltTy, finalSize);
+    return mlir::cir::ConstArrayAttr::get(
+        getContext(), fullArrayTy,
+        mlir::StringAttr::get(str.drop_back(trailingZerosNum),
+                              truncatedArrayTy),
+        trailingZerosNum);
+  }
+
+  mlir::cir::ConstArrayAttr getConstArray(mlir::Attribute attrs,
+                                          mlir::cir::ArrayType arrayTy) {
+    return mlir::cir::ConstArrayAttr::get(arrayTy, attrs);
+  }
+
+  mlir::Attribute getConstStructOrZeroAttr(mlir::ArrayAttr arrayAttr,
+                                           bool packed = false,
+                                           mlir::Type type = {}) {
+    llvm::SmallVector<mlir::Type, 8> members;
+    auto structTy = mlir::dyn_cast<mlir::cir::StructType>(type);
+    assert(structTy && "expected cir.struct");
+
+    // Collect members and check if they are all zero.
+    bool isZero = true;
+    for (auto &attr : arrayAttr) {
+      const auto typedAttr = mlir::dyn_cast<mlir::TypedAttr>(attr);
+      members.push_back(typedAttr.getType());
+      isZero &= isNullValue(typedAttr);
+    }
+
+    // Struct type not specified: create anon struct type from members.
+    if (!structTy)
+      structTy = getType<mlir::cir::StructType>(members, packed,
+                                                mlir::cir::StructType::Struct,
+                                                /*ast=*/nullptr);
+
+    // Return zero or anonymous constant struct.
+    if (isZero)
+      return mlir::cir::ZeroAttr::get(getContext(), structTy);
+    return mlir::cir::ConstStructAttr::get(structTy, arrayAttr);
+  }
+
+  mlir::cir::ConstStructAttr getAnonConstStruct(mlir::ArrayAttr arrayAttr,
+                                                bool packed = false,
+                                                mlir::Type ty = {}) {
+    llvm::SmallVector<mlir::Type, 4> members;
+    for (auto &f : arrayAttr) {
+      auto ta = mlir::dyn_cast<mlir::TypedAttr>(f);
+      assert(ta && "expected typed attribute member");
+      members.push_back(ta.getType());
+    }
+
+    if (!ty)
+      ty = getAnonStructTy(members, packed);
+
+    auto sTy = mlir::dyn_cast<mlir::cir::StructType>(ty);
+    assert(sTy && "expected struct type");
+    return mlir::cir::ConstStructAttr::get(sTy, arrayAttr);
+  }
+
+  mlir::cir::TypeInfoAttr getTypeInfo(mlir::ArrayAttr fieldsAttr) {
+    auto anonStruct = getAnonConstStruct(fieldsAttr);
+    return mlir::cir::TypeInfoAttr::get(anonStruct.getType(), fieldsAttr);
+  }
+
+  mlir::cir::CmpThreeWayInfoAttr getCmpThreeWayInfoStrongOrdering(
+      const llvm::APSInt &lt, const llvm::APSInt &eq, const llvm::APSInt &gt) {
+    return mlir::cir::CmpThreeWayInfoAttr::get(
+        getContext(), lt.getSExtValue(), eq.getSExtValue(), gt.getSExtValue());
+  }
+
+  mlir::cir::CmpThreeWayInfoAttr getCmpThreeWayInfoPartialOrdering(
+      const llvm::APSInt &lt, const llvm::APSInt &eq, const llvm::APSInt &gt,
+      const llvm::APSInt &unordered) {
+    return mlir::cir::CmpThreeWayInfoAttr::get(
+        getContext(), lt.getSExtValue(), eq.getSExtValue(), gt.getSExtValue(),
+        unordered.getSExtValue());
+  }
+
+  mlir::cir::DataMemberAttr getDataMemberAttr(mlir::cir::DataMemberType ty,
+                                              unsigned memberIndex) {
+    return mlir::cir::DataMemberAttr::get(getContext(), ty, memberIndex);
+  }
+
+  mlir::cir::DataMemberAttr
+  getNullDataMemberAttr(mlir::cir::DataMemberType ty) {
+    return mlir::cir::DataMemberAttr::get(getContext(), ty, std::nullopt);
+  }
+
+  mlir::cir::MethodAttr getMethodAttr(mlir::cir::MethodType ty,
+                                      mlir::cir::FuncOp methodFuncOp) {
+    auto methodFuncSymbolRef = mlir::FlatSymbolRefAttr::get(methodFuncOp);
+    return mlir::cir::MethodAttr::get(ty, methodFuncSymbolRef);
+  }
+
+  mlir::cir::MethodAttr getNullMethodAttr(mlir::cir::MethodType ty) {
+    return mlir::cir::MethodAttr::get(ty);
+  }
+
+  // TODO(cir): Once we have CIR float types, replace this by something like a
+  // NullableValueInterface to allow for type-independent queries.
+  bool isNullValue(mlir::Attribute attr) const {
+    if (mlir::isa<mlir::cir::ZeroAttr>(attr))
+      return true;
+    if (const auto ptrVal = mlir::dyn_cast<mlir::cir::ConstPtrAttr>(attr))
+      return ptrVal.isNullValue();
+
+    if (mlir::isa<mlir::cir::GlobalViewAttr>(attr))
+      return false;
+
+    // TODO(cir): introduce char type in CIR and check for that instead.
+    if (const auto intVal = mlir::dyn_cast<mlir::cir::IntAttr>(attr))
+      return intVal.isNullValue();
+
+    if (const auto boolVal = mlir::dyn_cast<mlir::cir::BoolAttr>(attr))
+      return !boolVal.getValue();
+
+    if (auto fpAttr = mlir::dyn_cast<mlir::cir::FPAttr>(attr)) {
+      auto fpVal = fpAttr.getValue();
+      bool ignored;
+      llvm::APFloat FV(+0.0);
+      FV.convert(fpVal.getSemantics(), llvm::APFloat::rmNearestTiesToEven,
+                 &ignored);
+      return FV.bitwiseIsEqual(fpVal);
+    }
+
+    if (const auto structVal =
+            mlir::dyn_cast<mlir::cir::ConstStructAttr>(attr)) {
+      for (const auto elt : structVal.getMembers()) {
+        // FIXME(cir): the struct's ID should not be considered a member.
+        if (mlir::isa<mlir::StringAttr>(elt))
+          continue;
+        if (!isNullValue(elt))
+          return false;
+      }
+      return true;
+    }
+
+    if (const auto arrayVal = mlir::dyn_cast<mlir::cir::ConstArrayAttr>(attr)) {
+      if (mlir::isa<mlir::StringAttr>(arrayVal.getElts()))
+        return false;
+      for (const auto elt : mlir::cast<mlir::ArrayAttr>(arrayVal.getElts())) {
+        if (!isNullValue(elt))
+          return false;
+      }
+      return true;
+    }
+
+    llvm_unreachable("NYI");
+  }
+
+  //
+  // Type helpers
+  // ------------
+  //
+  mlir::cir::IntType getUIntNTy(int N) {
+    switch (N) {
+    case 8:
+      return getUInt8Ty();
+    case 16:
+      return getUInt16Ty();
+    case 32:
+      return getUInt32Ty();
+    case 64:
+      return getUInt64Ty();
+    default:
+      return mlir::cir::IntType::get(getContext(), N, false);
+    }
+  }
+
+  mlir::cir::IntType getSIntNTy(int N) {
+    switch (N) {
+    case 8:
+      return getSInt8Ty();
+    case 16:
+      return getSInt16Ty();
+    case 32:
+      return getSInt32Ty();
+    case 64:
+      return getSInt64Ty();
+    default:
+      return mlir::cir::IntType::get(getContext(), N, true);
+    }
+  }
+
+  mlir::cir::VoidType getVoidTy() { return typeCache.VoidTy; }
+
+  mlir::cir::IntType getSInt8Ty() { return typeCache.SInt8Ty; }
+  mlir::cir::IntType getSInt16Ty() { return typeCache.SInt16Ty; }
+  mlir::cir::IntType getSInt32Ty() { return typeCache.SInt32Ty; }
+  mlir::cir::IntType getSInt64Ty() { return typeCache.SInt64Ty; }
+
+  mlir::cir::IntType getUInt8Ty() { return typeCache.UInt8Ty; }
+  mlir::cir::IntType getUInt16Ty() { return typeCache.UInt16Ty; }
+  mlir::cir::IntType getUInt32Ty() { return typeCache.UInt32Ty; }
+  mlir::cir::IntType getUInt64Ty() { return typeCache.UInt64Ty; }
+
+  bool isInt8Ty(mlir::Type i) {
+    return i == typeCache.UInt8Ty || i == typeCache.SInt8Ty;
+  }
+  bool isInt16Ty(mlir::Type i) {
+    return i == typeCache.UInt16Ty || i == typeCache.SInt16Ty;
+  }
+  bool isInt32Ty(mlir::Type i) {
+    return i == typeCache.UInt32Ty || i == typeCache.SInt32Ty;
+  }
+  bool isInt64Ty(mlir::Type i) {
+    return i == typeCache.UInt64Ty || i == typeCache.SInt64Ty;
+  }
+  bool isInt(mlir::Type i) { return mlir::isa<mlir::cir::IntType>(i); }
+
+  mlir::cir::LongDoubleType
+  getLongDoubleTy(const llvm::fltSemantics &format) const {
+    if (&format == &llvm::APFloat::IEEEdouble())
+      return mlir::cir::LongDoubleType::get(getContext(), typeCache.DoubleTy);
+    if (&format == &llvm::APFloat::x87DoubleExtended())
+      return mlir::cir::LongDoubleType::get(getContext(), typeCache.FP80Ty);
+    if (&format == &llvm::APFloat::IEEEquad())
+      llvm_unreachable("NYI");
+    if (&format == &llvm::APFloat::PPCDoubleDouble())
+      llvm_unreachable("NYI");
+
+    llvm_unreachable("unsupported long double format");
+  }
+
+  mlir::Type getVirtualFnPtrType(bool isVarArg = false) {
+    // FIXME: replay LLVM codegen for now, perhaps add a vtable ptr special
+    // type so it's a bit more clear and C++ idiomatic.
+    auto fnTy = mlir::cir::FuncType::get({}, getUInt32Ty(), isVarArg);
+    assert(!MissingFeatures::isVarArg());
+    return getPointerTo(getPointerTo(fnTy));
+  }
+
+  mlir::cir::FuncType getFuncType(llvm::ArrayRef<mlir::Type> params,
+                                  mlir::Type retTy, bool isVarArg = false) {
+    return mlir::cir::FuncType::get(params, retTy, isVarArg);
+  }
+
+  // Fetch the type representing a pointer to unsigned int values.
+  mlir::cir::PointerType getUInt8PtrTy(unsigned AddrSpace = 0) {
+    return typeCache.UInt8PtrTy;
+  }
+  mlir::cir::PointerType getUInt32PtrTy(unsigned AddrSpace = 0) {
+    return mlir::cir::PointerType::get(getContext(), typeCache.UInt32Ty);
+  }
+
+  /// Get a CIR anonymous struct type.
+  mlir::cir::StructType
+  getAnonStructTy(llvm::ArrayRef<mlir::Type> members, bool packed = false,
+                  const clang::RecordDecl *ast = nullptr) {
+    mlir::cir::ASTRecordDeclAttr astAttr = nullptr;
+    auto kind = mlir::cir::StructType::RecordKind::Struct;
+    if (ast) {
+      astAttr = getAttr<mlir::cir::ASTRecordDeclAttr>(ast);
+      kind = getRecordKind(ast->getTagKind());
+    }
+    return getType<mlir::cir::StructType>(members, packed, kind, astAttr);
+  }
+
+  /// Get a CIR record kind from a AST declaration tag.
+  mlir::cir::StructType::RecordKind
+  getRecordKind(const clang::TagTypeKind kind) {
+    switch (kind) {
+    case clang::TagTypeKind::Struct:
+      return mlir::cir::StructType::Struct;
+    case clang::TagTypeKind::Union:
+      return mlir::cir::StructType::Union;
+    case clang::TagTypeKind::Class:
+      return mlir::cir::StructType::Class;
+    case clang::TagTypeKind::Interface:
+      llvm_unreachable("interface records are NYI");
+    case clang::TagTypeKind::Enum:
+      llvm_unreachable("enum records are NYI");
+    }
+  }
+
+  /// Get a incomplete CIR struct type.
+  mlir::cir::StructType getIncompleteStructTy(llvm::StringRef name,
+                                              const clang::RecordDecl *ast) {
+    const auto nameAttr = getStringAttr(name);
+    auto kind = mlir::cir::StructType::RecordKind::Struct;
+    if (ast)
+      kind = getRecordKind(ast->getTagKind());
+    return getType<mlir::cir::StructType>(nameAttr, kind);
+  }
+
+  /// Get a CIR named struct type.
+  ///
+  /// If a struct already exists and is complete, but the client tries to fetch
+  /// it with a different set of attributes, this method will crash.
+  mlir::cir::StructType getCompleteStructTy(llvm::ArrayRef<mlir::Type> members,
+                                            llvm::StringRef name, bool packed,
+                                            const clang::RecordDecl *ast) {
+    const auto nameAttr = getStringAttr(name);
+    mlir::cir::ASTRecordDeclAttr astAttr = nullptr;
+    auto kind = mlir::cir::StructType::RecordKind::Struct;
+    if (ast) {
+      astAttr = getAttr<mlir::cir::ASTRecordDeclAttr>(ast);
+      kind = getRecordKind(ast->getTagKind());
+    }
+
+    // Create or get the struct.
+    auto type = getType<mlir::cir::StructType>(members, nameAttr, packed, kind,
+                                               astAttr);
+
+    // Complete an incomplete struct or ensure the existing complete struct
+    // matches the requested attributes.
+    type.complete(members, packed, astAttr);
+
+    return type;
+  }
+
+  mlir::cir::StructType
+  getCompleteStructType(mlir::ArrayAttr fields, bool packed = false,
+                        llvm::StringRef name = "",
+                        const clang::RecordDecl *ast = nullptr) {
+    llvm::SmallVector<mlir::Type, 8> members;
+    for (auto &attr : fields) {
+      const auto typedAttr = mlir::dyn_cast<mlir::TypedAttr>(attr);
+      members.push_back(typedAttr.getType());
+    }
+
+    if (name.empty())
+      return getAnonStructTy(members, packed, ast);
+    else
+      return getCompleteStructTy(members, name, packed, ast);
+  }
+
+  mlir::cir::ArrayType getArrayType(mlir::Type eltType, unsigned size) {
+    return mlir::cir::ArrayType::get(getContext(), eltType, size);
+  }
+
+  bool isSized(mlir::Type ty) {
+    if (mlir::isa<mlir::cir::PointerType, mlir::cir::StructType,
+                  mlir::cir::ArrayType, mlir::cir::BoolType, mlir::cir::IntType,
+                  mlir::cir::CIRFPTypeInterface>(ty))
+      return true;
+    if (mlir::isa<mlir::cir::VectorType>(ty)) {
+      return isSized(mlir::cast<mlir::cir::VectorType>(ty).getEltType());
+    }
+    assert(0 && "Unimplemented size for type");
+    return false;
+  }
+
+  //
+  // Constant creation helpers
+  // -------------------------
+  //
+  mlir::cir::ConstantOp getSInt32(uint32_t c, mlir::Location loc) {
+    auto sInt32Ty = getSInt32Ty();
+    return create<mlir::cir::ConstantOp>(loc, sInt32Ty,
+                                         mlir::cir::IntAttr::get(sInt32Ty, c));
+  }
+  mlir::cir::ConstantOp getUInt32(uint32_t C, mlir::Location loc) {
+    auto uInt32Ty = getUInt32Ty();
+    return create<mlir::cir::ConstantOp>(loc, uInt32Ty,
+                                         mlir::cir::IntAttr::get(uInt32Ty, C));
+  }
+  mlir::cir::ConstantOp getSInt64(uint64_t C, mlir::Location loc) {
+    auto sInt64Ty = getSInt64Ty();
+    return create<mlir::cir::ConstantOp>(loc, sInt64Ty,
+                                         mlir::cir::IntAttr::get(sInt64Ty, C));
+  }
+  mlir::cir::ConstantOp getUInt64(uint64_t C, mlir::Location loc) {
+    auto uInt64Ty = getUInt64Ty();
+    return create<mlir::cir::ConstantOp>(loc, uInt64Ty,
+                                         mlir::cir::IntAttr::get(uInt64Ty, C));
+  }
+
+  mlir::cir::ConstantOp getConstInt(mlir::Location loc, llvm::APSInt intVal);
+
+  mlir::cir::ConstantOp getConstInt(mlir::Location loc, llvm::APInt intVal);
+
+  mlir::cir::ConstantOp getConstInt(mlir::Location loc, mlir::Type t,
+                                    uint64_t C);
+  /// Create constant nullptr for pointer-to-data-member type ty.
+  mlir::cir::ConstantOp getNullDataMemberPtr(mlir::cir::DataMemberType ty,
+                                             mlir::Location loc) {
+    return create<mlir::cir::ConstantOp>(loc, ty, getNullDataMemberAttr(ty));
+  }
+
+  mlir::cir::ConstantOp getNullMethodPtr(mlir::cir::MethodType ty,
+                                         mlir::Location loc) {
+    return create<mlir::cir::ConstantOp>(loc, ty, getNullMethodAttr(ty));
+  }
+
+  mlir::cir::ConstantOp getZero(mlir::Location loc, mlir::Type ty) {
+    // TODO: dispatch creation for primitive types.
+    assert((mlir::isa<mlir::cir::StructType>(ty) ||
+            mlir::isa<mlir::cir::ArrayType>(ty)) &&
+           "NYI for other types");
+    return create<mlir::cir::ConstantOp>(loc, ty, getZeroAttr(ty));
+  }
+
+  //
+  // Operation creation helpers
+  // --------------------------
+  //
+
+  /// Create a copy with inferred length.
+  mlir::cir::CopyOp createCopy(mlir::Value dst, mlir::Value src,
+                               bool isVolatile = false) {
+    return create<mlir::cir::CopyOp>(dst.getLoc(), dst, src, isVolatile);
+  }
+
+  /// Create a break operation.
+  mlir::cir::BreakOp createBreak(mlir::Location loc) {
+    return create<mlir::cir::BreakOp>(loc);
+  }
+
+  /// Create a continue operation.
+  mlir::cir::ContinueOp createContinue(mlir::Location loc) {
+    return create<mlir::cir::ContinueOp>(loc);
+  }
+
+  mlir::cir::MemCpyOp createMemCpy(mlir::Location loc, mlir::Value dst,
+                                   mlir::Value src, mlir::Value len) {
+    return create<mlir::cir::MemCpyOp>(loc, dst, src, len);
+  }
+
+  mlir::Value createNeg(mlir::Value value) {
+
+    if (auto intTy = mlir::dyn_cast<mlir::cir::IntType>(value.getType())) {
+      // Source is a unsigned integer: first cast it to signed.
+      if (intTy.isUnsigned())
+        value = createIntCast(value, getSIntNTy(intTy.getWidth()));
+      return create<mlir::cir::UnaryOp>(value.getLoc(), value.getType(),
+                                        mlir::cir::UnaryOpKind::Minus, value);
+    }
+
+    llvm_unreachable("negation for the given type is NYI");
+  }
+
+  // TODO: split this to createFPExt/createFPTrunc when we have dedicated cast
+  // operations.
+  mlir::Value createFloatingCast(mlir::Value v, mlir::Type destType) {
+    if (getIsFPConstrained())
+      llvm_unreachable("constrainedfp NYI");
+
+    return create<mlir::cir::CastOp>(v.getLoc(), destType,
+                                     mlir::cir::CastKind::floating, v);
+  }
+
+  mlir::Value createFSub(mlir::Value lhs, mlir::Value rhs) {
+    assert(!MissingFeatures::metaDataNode());
+    if (IsFPConstrained)
+      llvm_unreachable("Constrained FP NYI");
+
+    assert(!MissingFeatures::foldBinOpFMF());
+    return create<mlir::cir::BinOp>(lhs.getLoc(), mlir::cir::BinOpKind::Sub,
+                                    lhs, rhs);
+  }
+
+  mlir::Value createFAdd(mlir::Value lhs, mlir::Value rhs) {
+    assert(!MissingFeatures::metaDataNode());
+    if (IsFPConstrained)
+      llvm_unreachable("Constrained FP NYI");
+
+    assert(!MissingFeatures::foldBinOpFMF());
+    return create<mlir::cir::BinOp>(lhs.getLoc(), mlir::cir::BinOpKind::Add,
+                                    lhs, rhs);
+  }
+  mlir::Value createFMul(mlir::Value lhs, mlir::Value rhs) {
+    assert(!MissingFeatures::metaDataNode());
+    if (IsFPConstrained)
+      llvm_unreachable("Constrained FP NYI");
+
+    assert(!MissingFeatures::foldBinOpFMF());
+    return create<mlir::cir::BinOp>(lhs.getLoc(), mlir::cir::BinOpKind::Mul,
+                                    lhs, rhs);
+  }
+
+  mlir::Value createDynCast(mlir::Location loc, mlir::Value src,
+                            mlir::cir::PointerType destType, bool isRefCast,
+                            mlir::cir::DynamicCastInfoAttr info) {
+    auto castKind = isRefCast ? mlir::cir::DynamicCastKind::ref
+                              : mlir::cir::DynamicCastKind::ptr;
+    return create<mlir::cir::DynamicCastOp>(loc, destType, castKind, src, info,
+                                            /*relative_layout=*/false);
+  }
+
+  mlir::Value createDynCastToVoid(mlir::Location loc, mlir::Value src,
+                                  bool vtableUseRelativeLayout) {
+    // TODO(cir): consider address space here.
+    assert(!MissingFeatures::addressSpace());
+    auto destTy = getVoidPtrTy();
+    return create<mlir::cir::DynamicCastOp>(
+        loc, destTy, mlir::cir::DynamicCastKind::ptr, src,
+        mlir::cir::DynamicCastInfoAttr{}, vtableUseRelativeLayout);
+  }
+
+  cir::Address createBaseClassAddr(mlir::Location loc, cir::Address addr,
+                                   mlir::Type destType) {
+    if (destType == addr.getElementType())
+      return addr;
+
+    auto ptrTy = getPointerTo(destType);
+    auto baseAddr =
+        create<mlir::cir::BaseClassAddrOp>(loc, ptrTy, addr.getPointer());
+
+    return Address(baseAddr, ptrTy, addr.getAlignment());
+  }
+
+  // FIXME(cir): CIRGenBuilder class should have an attribute with a reference
+  // to the module so that we don't have search for it or pass it around.
+  // FIXME(cir): Track a list of globals, or at least the last one inserted, so
+  // that we can insert globals in the same order they are defined by CIRGen.
+
+  [[nodiscard]] mlir::cir::GlobalOp
+  createGlobal(mlir::ModuleOp module, mlir::Location loc, mlir::StringRef name,
+               mlir::Type type, bool isConst,
+               mlir::cir::GlobalLinkageKind linkage,
+               mlir::cir::AddressSpaceAttr addrSpace = {}) {
+    mlir::OpBuilder::InsertionGuard guard(*this);
+    setInsertionPointToStart(module.getBody());
+    return create<mlir::cir::GlobalOp>(loc, name, type, isConst, linkage,
+                                       addrSpace);
+  }
+
+  /// Creates a versioned global variable. If the symbol is already taken, an ID
+  /// will be appended to the symbol. The returned global must always be queried
+  /// for its name so it can be referenced correctly.
+  [[nodiscard]] mlir::cir::GlobalOp
+  createVersionedGlobal(mlir::ModuleOp module, mlir::Location loc,
+                        mlir::StringRef name, mlir::Type type, bool isConst,
+                        mlir::cir::GlobalLinkageKind linkage,
+                        mlir::cir::AddressSpaceAttr addrSpace = {}) {
+    // Create a unique name if the given name is already taken.
+    std::string uniqueName;
+    if (unsigned version = GlobalsVersioning[name.str()]++)
+      uniqueName = name.str() + "." + std::to_string(version);
+    else
+      uniqueName = name.str();
+
+    return createGlobal(module, loc, uniqueName, type, isConst, linkage,
+                        addrSpace);
+  }
+
+  mlir::Value createGetGlobal(mlir::cir::GlobalOp global,
+                              bool threadLocal = false) {
+    return create<mlir::cir::GetGlobalOp>(
+        global.getLoc(),
+        getPointerTo(global.getSymType(), global.getAddrSpaceAttr()),
+        global.getName(), threadLocal);
+  }
+
+  mlir::Value createGetBitfield(mlir::Location loc, mlir::Type resultType,
+                                mlir::Value addr, mlir::Type storageType,
+                                const CIRGenBitFieldInfo &info,
+                                bool isLvalueVolatile, bool useVolatile) {
+    auto offset = useVolatile ? info.VolatileOffset : info.Offset;
+    return create<mlir::cir::GetBitfieldOp>(loc, resultType, addr, storageType,
+                                            info.Name, info.Size, offset,
+                                            info.IsSigned, isLvalueVolatile);
+  }
+
+  mlir::Value createSetBitfield(mlir::Location loc, mlir::Type resultType,
+                                mlir::Value dstAddr, mlir::Type storageType,
+                                mlir::Value src, const CIRGenBitFieldInfo &info,
+                                bool isLvalueVolatile, bool useVolatile) {
+    auto offset = useVolatile ? info.VolatileOffset : info.Offset;
+    return create<mlir::cir::SetBitfieldOp>(
+        loc, resultType, dstAddr, storageType, src, info.Name, info.Size,
+        offset, info.IsSigned, isLvalueVolatile);
+  }
+
+  /// Create a pointer to a record member.
+  mlir::Value createGetMember(mlir::Location loc, mlir::Type result,
+                              mlir::Value base, llvm::StringRef name,
+                              unsigned index) {
+    return create<mlir::cir::GetMemberOp>(loc, result, base, name, index);
+  }
+
+  /// Create a cir.complex.real_ptr operation that derives a pointer to the real
+  /// part of the complex value pointed to by the specified pointer value.
+  mlir::Value createRealPtr(mlir::Location loc, mlir::Value value) {
+    auto srcPtrTy = mlir::cast<mlir::cir::PointerType>(value.getType());
+    auto srcComplexTy =
+        mlir::cast<mlir::cir::ComplexType>(srcPtrTy.getPointee());
+    return create<mlir::cir::ComplexRealPtrOp>(
+        loc, getPointerTo(srcComplexTy.getElementTy()), value);
+  }
+
+  Address createRealPtr(mlir::Location loc, Address addr) {
+    return Address{createRealPtr(loc, addr.getPointer()), addr.getAlignment()};
+  }
+
+  /// Create a cir.complex.imag_ptr operation that derives a pointer to the
+  /// imaginary part of the complex value pointed to by the specified pointer
+  /// value.
+  mlir::Value createImagPtr(mlir::Location loc, mlir::Value value) {
+    auto srcPtrTy = mlir::cast<mlir::cir::PointerType>(value.getType());
+    auto srcComplexTy =
+        mlir::cast<mlir::cir::ComplexType>(srcPtrTy.getPointee());
+    return create<mlir::cir::ComplexImagPtrOp>(
+        loc, getPointerTo(srcComplexTy.getElementTy()), value);
+  }
+
+  Address createImagPtr(mlir::Location loc, Address addr) {
+    return Address{createImagPtr(loc, addr.getPointer()), addr.getAlignment()};
+  }
+
+  /// Cast the element type of the given address to a different type,
+  /// preserving information like the alignment.
+  cir::Address createElementBitCast(mlir::Location loc, cir::Address addr,
+                                    mlir::Type destType) {
+    if (destType == addr.getElementType())
+      return addr;
+
+    auto ptrTy = getPointerTo(destType);
+    return Address(createBitcast(loc, addr.getPointer(), ptrTy), destType,
+                   addr.getAlignment());
+  }
+
+  mlir::Value createLoad(mlir::Location loc, Address addr,
+                         bool isVolatile = false) {
+    auto ptrTy =
+        mlir::dyn_cast<mlir::cir::PointerType>(addr.getPointer().getType());
+    if (addr.getElementType() != ptrTy.getPointee())
+      addr = addr.withPointer(
+          createPtrBitcast(addr.getPointer(), addr.getElementType()));
+
+    return create<mlir::cir::LoadOp>(
+        loc, addr.getElementType(), addr.getPointer(), /*isDeref=*/false,
+        /*is_volatile=*/isVolatile, /*alignment=*/mlir::IntegerAttr{},
+        /*mem_order=*/mlir::cir::MemOrderAttr{});
+  }
+
+  mlir::Value createAlignedLoad(mlir::Location loc, mlir::Type ty,
+                                mlir::Value ptr, llvm::MaybeAlign align,
+                                bool isVolatile) {
+    if (ty != mlir::cast<mlir::cir::PointerType>(ptr.getType()).getPointee())
+      ptr = createPtrBitcast(ptr, ty);
+    uint64_t alignment = align ? align->value() : 0;
+    return CIRBaseBuilderTy::createLoad(loc, ptr, isVolatile, alignment);
+  }
+
+  mlir::Value createAlignedLoad(mlir::Location loc, mlir::Type ty,
+                                mlir::Value ptr, llvm::MaybeAlign align) {
+    // TODO: make sure callsites shouldn't be really passing volatile.
+    assert(!MissingFeatures::volatileLoadOrStore());
+    return createAlignedLoad(loc, ty, ptr, align, /*isVolatile=*/false);
+  }
+
+  mlir::Value
+  createAlignedLoad(mlir::Location loc, mlir::Type ty, mlir::Value addr,
+                    clang::CharUnits align = clang::CharUnits::One()) {
+    return createAlignedLoad(loc, ty, addr, align.getAsAlign());
+  }
+
+  mlir::cir::StoreOp createStore(mlir::Location loc, mlir::Value val,
+                                 Address dst, bool _volatile = false,
+                                 ::mlir::IntegerAttr align = {},
+                                 ::mlir::cir::MemOrderAttr order = {}) {
+    return CIRBaseBuilderTy::createStore(loc, val, dst.getPointer(), _volatile,
+                                         align, order);
+  }
+
+  mlir::cir::StoreOp createFlagStore(mlir::Location loc, bool val,
+                                     mlir::Value dst) {
+    auto flag = getBool(val, loc);
+    return CIRBaseBuilderTy::createStore(loc, flag, dst);
+  }
+
+  mlir::cir::VecShuffleOp
+  createVecShuffle(mlir::Location loc, mlir::Value vec1, mlir::Value vec2,
+                   llvm::ArrayRef<mlir::Attribute> maskAttrs) {
+    auto vecType = mlir::cast<mlir::cir::VectorType>(vec1.getType());
+    auto resultTy = mlir::cir::VectorType::get(
+        getContext(), vecType.getEltType(), maskAttrs.size());
+    return CIRBaseBuilderTy::create<mlir::cir::VecShuffleOp>(
+        loc, resultTy, vec1, vec2, getArrayAttr(maskAttrs));
+  }
+
+  mlir::cir::VecShuffleOp createVecShuffle(mlir::Location loc, mlir::Value vec1,
+                                           mlir::Value vec2,
+                                           llvm::ArrayRef<int64_t> mask) {
+    llvm::SmallVector<mlir::Attribute, 4> maskAttrs;
+    for (int32_t idx : mask) {
+      maskAttrs.push_back(mlir::cir::IntAttr::get(getSInt32Ty(), idx));
+    }
+
+    return createVecShuffle(loc, vec1, vec2, maskAttrs);
+  }
+
+  mlir::cir::VecShuffleOp createVecShuffle(mlir::Location loc, mlir::Value vec1,
+                                           llvm::ArrayRef<int64_t> mask) {
+    // FIXME(cir): Support use cir.vec.shuffle with single vec
+    // Workaround: pass Vec as both vec1 and vec2
+    return createVecShuffle(loc, vec1, vec1, mask);
+  }
+
+  mlir::cir::StoreOp
+  createAlignedStore(mlir::Location loc, mlir::Value val, mlir::Value dst,
+                     clang::CharUnits align = clang::CharUnits::One(),
+                     bool _volatile = false,
+                     ::mlir::cir::MemOrderAttr order = {}) {
+    llvm::MaybeAlign mayAlign = align.getAsAlign();
+    mlir::IntegerAttr alignAttr;
+    if (mayAlign) {
+      uint64_t alignment = mayAlign ? mayAlign->value() : 0;
+      alignAttr = mlir::IntegerAttr::get(
+          mlir::IntegerType::get(dst.getContext(), 64), alignment);
+    }
+    return CIRBaseBuilderTy::createStore(loc, val, dst, _volatile, alignAttr,
+                                         order);
+  }
+
+  // Convert byte offset to sequence of high-level indices suitable for
+  // GlobalViewAttr. Ideally we shouldn't deal with low-level offsets at all
+  // but currently some parts of Clang AST, which we don't want to touch just
+  // yet, return them.
+  void computeGlobalViewIndicesFromFlatOffset(
+      int64_t Offset, mlir::Type Ty, CIRDataLayout Layout,
+      llvm::SmallVectorImpl<int64_t> &Indices) {
+    if (!Offset)
+      return;
+
+    mlir::Type SubType;
+
+    auto getIndexAndNewOffset =
+        [](int64_t Offset, int64_t EltSize) -> std::pair<int64_t, int64_t> {
+      int64_t DivRet = Offset / EltSize;
+      if (DivRet < 0)
+        DivRet -= 1; // make sure offset is positive
+      int64_t ModRet = Offset - (DivRet * EltSize);
+      return {DivRet, ModRet};
+    };
+
+    if (auto ArrayTy = mlir::dyn_cast<mlir::cir::ArrayType>(Ty)) {
+      int64_t EltSize = Layout.getTypeAllocSize(ArrayTy.getEltType());
+      SubType = ArrayTy.getEltType();
+      auto const [Index, NewOffset] = getIndexAndNewOffset(Offset, EltSize);
+      Indices.push_back(Index);
+      Offset = NewOffset;
+    } else if (auto StructTy = mlir::dyn_cast<mlir::cir::StructType>(Ty)) {
+      auto Elts = StructTy.getMembers();
+      int64_t Pos = 0;
+      for (size_t I = 0; I < Elts.size(); ++I) {
+        int64_t EltSize =
+            (int64_t)Layout.getTypeAllocSize(Elts[I]).getFixedValue();
+        unsigned AlignMask = Layout.getABITypeAlign(Elts[I]).value() - 1;
+        Pos = (Pos + AlignMask) & ~AlignMask;
+        assert(Offset >= 0);
+        if (Offset < Pos + EltSize) {
+          Indices.push_back(I);
+          SubType = Elts[I];
+          Offset -= Pos;
+          break;
+        }
+        Pos += EltSize;
+      }
+    } else {
+      llvm_unreachable("unexpected type");
+    }
+
+    assert(SubType);
+    computeGlobalViewIndicesFromFlatOffset(Offset, SubType, Layout, Indices);
+  }
+
+  mlir::cir::StackSaveOp createStackSave(mlir::Location loc, mlir::Type ty) {
+    return create<mlir::cir::StackSaveOp>(loc, ty);
+  }
+
+  mlir::cir::StackRestoreOp createStackRestore(mlir::Location loc,
+                                               mlir::Value v) {
+    return create<mlir::cir::StackRestoreOp>(loc, v);
+  }
+
+  // TODO(cir): Change this to hoist alloca to the parent *scope* instead.
+  /// Move alloca operation to the parent region.
+  void hoistAllocaToParentRegion(mlir::cir::AllocaOp alloca) {
+    auto &block = alloca->getParentOp()->getParentRegion()->front();
+    const auto allocas = block.getOps<mlir::cir::AllocaOp>();
+    if (allocas.empty()) {
+      alloca->moveBefore(&block, block.begin());
+    } else {
+      alloca->moveAfter(*std::prev(allocas.end()));
+    }
+  }
+
+  mlir::cir::CmpThreeWayOp
+  createThreeWayCmpStrong(mlir::Location loc, mlir::Value lhs, mlir::Value rhs,
+                          const llvm::APSInt &ltRes, const llvm::APSInt &eqRes,
+                          const llvm::APSInt &gtRes) {
+    assert(ltRes.getBitWidth() == eqRes.getBitWidth() &&
+           ltRes.getBitWidth() == gtRes.getBitWidth() &&
+           "the three comparison results must have the same bit width");
+    auto cmpResultTy = getSIntNTy(ltRes.getBitWidth());
+    auto infoAttr = getCmpThreeWayInfoStrongOrdering(ltRes, eqRes, gtRes);
+    return create<mlir::cir::CmpThreeWayOp>(loc, cmpResultTy, lhs, rhs,
+                                            infoAttr);
+  }
+
+  mlir::cir::CmpThreeWayOp
+  createThreeWayCmpPartial(mlir::Location loc, mlir::Value lhs, mlir::Value rhs,
+                           const llvm::APSInt &ltRes, const llvm::APSInt &eqRes,
+                           const llvm::APSInt &gtRes,
+                           const llvm::APSInt &unorderedRes) {
+    assert(ltRes.getBitWidth() == eqRes.getBitWidth() &&
+           ltRes.getBitWidth() == gtRes.getBitWidth() &&
+           ltRes.getBitWidth() == unorderedRes.getBitWidth() &&
+           "the four comparison results must have the same bit width");
+    auto cmpResultTy = getSIntNTy(ltRes.getBitWidth());
+    auto infoAttr =
+        getCmpThreeWayInfoPartialOrdering(ltRes, eqRes, gtRes, unorderedRes);
+    return create<mlir::cir::CmpThreeWayOp>(loc, cmpResultTy, lhs, rhs,
+                                            infoAttr);
+  }
+
+  mlir::cir::GetRuntimeMemberOp createGetIndirectMember(mlir::Location loc,
+                                                        mlir::Value objectPtr,
+                                                        mlir::Value memberPtr) {
+    auto memberPtrTy =
+        mlir::cast<mlir::cir::DataMemberType>(memberPtr.getType());
+
+    // TODO(cir): consider address space.
+    assert(!MissingFeatures::addressSpace());
+    auto resultTy = getPointerTo(memberPtrTy.getMemberTy());
+
+    return create<mlir::cir::GetRuntimeMemberOp>(loc, resultTy, objectPtr,
+                                                 memberPtr);
+  }
+
+  /// Create a cir.ptr_stride operation to get access to an array element.
+  /// idx is the index of the element to access, shouldDecay is true if the
+  /// result should decay to a pointer to the element type.
+  mlir::Value getArrayElement(mlir::Location arrayLocBegin,
+                              mlir::Location arrayLocEnd, mlir::Value arrayPtr,
+                              mlir::Type eltTy, mlir::Value idx,
+                              bool shouldDecay);
+
+  /// Returns a decayed pointer to the first element of the array
+  /// pointed to by arrayPtr.
+  mlir::Value maybeBuildArrayDecay(mlir::Location loc, mlir::Value arrayPtr,
+                                   mlir::Type eltTy);
+};
+
+} // namespace cir
+#endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
new file mode 100644
index 000000000000..ab4525d38b55
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -0,0 +1,1630 @@
+//===---- CIRGenBuiltin.cpp - Emit CIR for builtins -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit Builtin calls as CIR or a function call to be
+// later resolved.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenCXXABI.h"
+#include "CIRGenCall.h"
+#include "CIRGenCstEmitter.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "TargetInfo.h"
+#include "clang/CIR/MissingFeatures.h"
+
+// TODO(cir): we shouldn't need this but we currently reuse intrinsic IDs for
+// convenience.
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "llvm/IR/Intrinsics.h"
+
+#include "clang/AST/GlobalDecl.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/Basic/TargetBuiltins.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Value.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace cir;
+using namespace clang;
+using namespace mlir::cir;
+using namespace llvm;
+
+static RValue buildLibraryCall(CIRGenFunction &CGF, const FunctionDecl *FD,
+                               const CallExpr *E,
+                               mlir::Operation *calleeValue) {
+  auto callee = CIRGenCallee::forDirect(calleeValue, GlobalDecl(FD));
+  return CGF.buildCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
+}
+
+template <class Operation>
+static RValue buildUnaryFPBuiltin(CIRGenFunction &CGF, const CallExpr &E) {
+  auto Arg = CGF.buildScalarExpr(E.getArg(0));
+
+  CIRGenFunction::CIRGenFPOptionsRAII FPOptsRAII(CGF, &E);
+  if (CGF.getBuilder().getIsFPConstrained())
+    llvm_unreachable("constraint FP operations are NYI");
+
+  auto Call =
+      CGF.getBuilder().create<Operation>(Arg.getLoc(), Arg.getType(), Arg);
+  return RValue::get(Call->getResult(0));
+}
+
+template <typename Op>
+static RValue buildUnaryMaybeConstrainedFPToIntBuiltin(CIRGenFunction &CGF,
+                                                       const CallExpr &E) {
+  auto ResultType = CGF.ConvertType(E.getType());
+  auto Src = CGF.buildScalarExpr(E.getArg(0));
+
+  if (CGF.getBuilder().getIsFPConstrained())
+    llvm_unreachable("constraint FP operations are NYI");
+
+  auto Call = CGF.getBuilder().create<Op>(Src.getLoc(), ResultType, Src);
+  return RValue::get(Call->getResult(0));
+}
+
+template <typename Op>
+static RValue buildBinaryFPBuiltin(CIRGenFunction &CGF, const CallExpr &E) {
+  auto Arg0 = CGF.buildScalarExpr(E.getArg(0));
+  auto Arg1 = CGF.buildScalarExpr(E.getArg(1));
+
+  auto Loc = CGF.getLoc(E.getExprLoc());
+  auto Ty = CGF.ConvertType(E.getType());
+  auto Call = CGF.getBuilder().create<Op>(Loc, Ty, Arg0, Arg1);
+
+  return RValue::get(Call->getResult(0));
+}
+
+template <typename Op>
+static mlir::Value buildBinaryMaybeConstrainedFPBuiltin(CIRGenFunction &CGF,
+                                                        const CallExpr &E) {
+  auto Arg0 = CGF.buildScalarExpr(E.getArg(0));
+  auto Arg1 = CGF.buildScalarExpr(E.getArg(1));
+
+  auto Loc = CGF.getLoc(E.getExprLoc());
+  auto Ty = CGF.ConvertType(E.getType());
+
+  if (CGF.getBuilder().getIsFPConstrained()) {
+    CIRGenFunction::CIRGenFPOptionsRAII FPOptsRAII(CGF, &E);
+    llvm_unreachable("constrained FP operations are NYI");
+  } else {
+    auto Call = CGF.getBuilder().create<Op>(Loc, Ty, Arg0, Arg1);
+    return Call->getResult(0);
+  }
+}
+
+template <typename Op>
+static RValue
+buildBuiltinBitOp(CIRGenFunction &CGF, const CallExpr *E,
+                  std::optional<CIRGenFunction::BuiltinCheckKind> CK) {
+  mlir::Value arg;
+  if (CK.has_value())
+    arg = CGF.buildCheckedArgForBuiltin(E->getArg(0), *CK);
+  else
+    arg = CGF.buildScalarExpr(E->getArg(0));
+
+  auto resultTy = CGF.ConvertType(E->getType());
+  auto op =
+      CGF.getBuilder().create<Op>(CGF.getLoc(E->getExprLoc()), resultTy, arg);
+  return RValue::get(op);
+}
+
+// Initialize the alloca with the given size and alignment according to the lang
+// opts. Supporting only the trivial non-initialization for now.
+static void initializeAlloca(CIRGenFunction &CGF,
+                             [[maybe_unused]] mlir::Value AllocaAddr,
+                             [[maybe_unused]] mlir::Value Size,
+                             [[maybe_unused]] CharUnits AlignmentInBytes) {
+
+  switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
+  case LangOptions::TrivialAutoVarInitKind::Uninitialized:
+    // Nothing to initialize.
+    return;
+  case LangOptions::TrivialAutoVarInitKind::Zero:
+  case LangOptions::TrivialAutoVarInitKind::Pattern:
+    assert(false && "unexpected trivial auto var init kind NYI");
+    return;
+  }
+}
+
+namespace {
+struct WidthAndSignedness {
+  unsigned Width;
+  bool Signed;
+};
+} // namespace
+
+static WidthAndSignedness
+getIntegerWidthAndSignedness(const clang::ASTContext &context,
+                             const clang::QualType Type) {
+  assert(Type->isIntegerType() && "Given type is not an integer.");
+  unsigned Width = Type->isBooleanType()  ? 1
+                   : Type->isBitIntType() ? context.getIntWidth(Type)
+                                          : context.getTypeInfo(Type).Width;
+  bool Signed = Type->isSignedIntegerType();
+  return {Width, Signed};
+}
+
+// Given one or more integer types, this function produces an integer type that
+// encompasses them: any value in one of the given types could be expressed in
+// the encompassing type.
+static struct WidthAndSignedness
+EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
+  assert(Types.size() > 0 && "Empty list of types.");
+
+  // If any of the given types is signed, we must return a signed type.
+  bool Signed = false;
+  for (const auto &Type : Types) {
+    Signed |= Type.Signed;
+  }
+
+  // The encompassing type must have a width greater than or equal to the width
+  // of the specified types.  Additionally, if the encompassing type is signed,
+  // its width must be strictly greater than the width of any unsigned types
+  // given.
+  unsigned Width = 0;
+  for (const auto &Type : Types) {
+    unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
+    if (Width < MinWidth) {
+      Width = MinWidth;
+    }
+  }
+
+  return {Width, Signed};
+}
+
+/// Emit the conversions required to turn the given value into an
+/// integer of the given size.
+static mlir::Value buildToInt(CIRGenFunction &CGF, mlir::Value v, QualType t,
+                              mlir::cir::IntType intType) {
+  v = CGF.buildToMemory(v, t);
+
+  if (isa<mlir::cir::PointerType>(v.getType()))
+    return CGF.getBuilder().createPtrToInt(v, intType);
+
+  assert(v.getType() == intType);
+  return v;
+}
+
+static mlir::Value buildFromInt(CIRGenFunction &CGF, mlir::Value v, QualType t,
+                                mlir::Type resultType) {
+  v = CGF.buildFromMemory(v, t);
+
+  if (isa<mlir::cir::PointerType>(resultType))
+    return CGF.getBuilder().createIntToPtr(v, resultType);
+
+  assert(v.getType() == resultType);
+  return v;
+}
+
+static Address checkAtomicAlignment(CIRGenFunction &CGF, const CallExpr *E) {
+  ASTContext &ctx = CGF.getContext();
+  Address ptr = CGF.buildPointerWithAlignment(E->getArg(0));
+  unsigned bytes =
+      isa<mlir::cir::PointerType>(ptr.getElementType())
+          ? ctx.getTypeSizeInChars(ctx.VoidPtrTy).getQuantity()
+          : CGF.CGM.getDataLayout().getTypeSizeInBits(ptr.getElementType()) / 8;
+  unsigned align = ptr.getAlignment().getQuantity();
+  if (align % bytes != 0) {
+    DiagnosticsEngine &diags = CGF.CGM.getDiags();
+    diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
+    // Force address to be at least naturally-aligned.
+    return ptr.withAlignment(CharUnits::fromQuantity(bytes));
+  }
+  return ptr;
+}
+
+/// Utility to insert an atomic instruction based on Intrinsic::ID
+/// and the expression node.
+static mlir::Value
+makeBinaryAtomicValue(CIRGenFunction &cgf, mlir::cir::AtomicFetchKind kind,
+                      const CallExpr *expr,
+                      mlir::cir::MemOrder ordering =
+                          mlir::cir::MemOrder::SequentiallyConsistent) {
+
+  QualType typ = expr->getType();
+
+  assert(expr->getArg(0)->getType()->isPointerType());
+  assert(cgf.getContext().hasSameUnqualifiedType(
+      typ, expr->getArg(0)->getType()->getPointeeType()));
+  assert(
+      cgf.getContext().hasSameUnqualifiedType(typ, expr->getArg(1)->getType()));
+
+  Address destAddr = checkAtomicAlignment(cgf, expr);
+  auto &builder = cgf.getBuilder();
+  auto intType =
+      expr->getArg(0)->getType()->getPointeeType()->isUnsignedIntegerType()
+          ? builder.getUIntNTy(cgf.getContext().getTypeSize(typ))
+          : builder.getSIntNTy(cgf.getContext().getTypeSize(typ));
+  mlir::Value val = cgf.buildScalarExpr(expr->getArg(1));
+  mlir::Type valueType = val.getType();
+  val = buildToInt(cgf, val, typ, intType);
+
+  auto rmwi = builder.create<mlir::cir::AtomicFetch>(
+      cgf.getLoc(expr->getSourceRange()), destAddr.emitRawPointer(), val, kind,
+      ordering, false, /* is volatile */
+      true);           /* fetch first */
+  return buildFromInt(cgf, rmwi->getResult(0), typ, valueType);
+}
+
+static RValue buildBinaryAtomic(CIRGenFunction &CGF,
+                                mlir::cir::AtomicFetchKind kind,
+                                const CallExpr *E) {
+  return RValue::get(makeBinaryAtomicValue(CGF, kind, E));
+}
+
+static mlir::Value MakeAtomicCmpXchgValue(CIRGenFunction &cgf,
+                                          const CallExpr *expr,
+                                          bool returnBool) {
+  QualType typ = returnBool ? expr->getArg(1)->getType() : expr->getType();
+  Address destAddr = checkAtomicAlignment(cgf, expr);
+  auto &builder = cgf.getBuilder();
+
+  auto intType = builder.getSIntNTy(cgf.getContext().getTypeSize(typ));
+  auto cmpVal = cgf.buildScalarExpr(expr->getArg(1));
+  cmpVal = buildToInt(cgf, cmpVal, typ, intType);
+  auto newVal =
+      buildToInt(cgf, cgf.buildScalarExpr(expr->getArg(2)), typ, intType);
+
+  auto op = builder.create<mlir::cir::AtomicCmpXchg>(
+      cgf.getLoc(expr->getSourceRange()), cmpVal.getType(), builder.getBoolTy(),
+      destAddr.getPointer(), cmpVal, newVal,
+      mlir::cir::MemOrder::SequentiallyConsistent,
+      mlir::cir::MemOrder::SequentiallyConsistent);
+
+  return returnBool ? op.getResult(1) : op.getResult(0);
+}
+
+RValue CIRGenFunction::buildRotate(const CallExpr *E, bool IsRotateRight) {
+  auto src = buildScalarExpr(E->getArg(0));
+  auto shiftAmt = buildScalarExpr(E->getArg(1));
+
+  // The builtin's shift arg may have a different type than the source arg and
+  // result, but the CIR ops uses the same type for all values.
+  auto ty = src.getType();
+  shiftAmt = builder.createIntCast(shiftAmt, ty);
+  auto r = builder.create<mlir::cir::RotateOp>(getLoc(E->getSourceRange()), src,
+                                               shiftAmt);
+  if (!IsRotateRight)
+    r->setAttr("left", mlir::UnitAttr::get(src.getContext()));
+  return RValue::get(r);
+}
+
+RValue CIRGenFunction::buildBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
+                                        const CallExpr *E,
+                                        ReturnValueSlot ReturnValue) {
+  const FunctionDecl *FD = GD.getDecl()->getAsFunction();
+
+  // See if we can constant fold this builtin.  If so, don't emit it at all.
+  // TODO: Extend this handling to all builtin calls that we can constant-fold.
+  Expr::EvalResult Result;
+  if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getASTContext()) &&
+      !Result.hasSideEffects()) {
+    if (Result.Val.isInt()) {
+      return RValue::get(builder.getConstInt(getLoc(E->getSourceRange()),
+                                             Result.Val.getInt()));
+    }
+    if (Result.Val.isFloat())
+      llvm_unreachable("NYI");
+  }
+
+  // If current long-double semantics is IEEE 128-bit, replace math builtins
+  // of long-double with f128 equivalent.
+  // TODO: This mutation should also be applied to other targets other than PPC,
+  // after backend supports IEEE 128-bit style libcalls.
+  if (getTarget().getTriple().isPPC64() &&
+      &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
+    llvm_unreachable("NYI");
+
+  // If the builtin has been declared explicitly with an assembler label,
+  // disable the specialized emitting below. Ideally we should communicate the
+  // rename in IR, or at least avoid generating the intrinsic calls that are
+  // likely to get lowered to the renamed library functions.
+  const unsigned BuiltinIDIfNoAsmLabel =
+      FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
+
+  std::optional<bool> ErrnoOverriden;
+  // ErrnoOverriden is true if math-errno is overriden via the
+  // '#pragma float_control(precise, on)'. This pragma disables fast-math,
+  // which implies math-errno.
+  if (E->hasStoredFPFeatures()) {
+    llvm_unreachable("NYI");
+  }
+  // True if 'atttibute__((optnone)) is used. This attibute overrides
+  // fast-math which implies math-errno.
+  bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
+
+  // True if we are compiling at -O2 and errno has been disabled
+  // using the '#pragma float_control(precise, off)', and
+  // attribute opt-none hasn't been seen.
+  [[maybe_unused]] bool ErrnoOverridenToFalseWithOpt =
+      ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
+      CGM.getCodeGenOpts().OptimizationLevel != 0;
+
+  // There are LLVM math intrinsics/instructions corresponding to math library
+  // functions except the LLVM op will never set errno while the math library
+  // might. Also, math builtins have the same semantics as their math library
+  // twins. Thus, we can transform math library and builtin calls to their
+  // LLVM counterparts if the call is marked 'const' (known to never set errno).
+  // In case FP exceptions are enabled, the experimental versions of the
+  // intrinsics model those.
+  [[maybe_unused]] bool ConstAlways =
+      getContext().BuiltinInfo.isConst(BuiltinID);
+
+  // There's a special case with the fma builtins where they are always const
+  // if the target environment is GNU or the target is OS is Windows and we're
+  // targeting the MSVCRT.dll environment.
+  // FIXME: This list can be become outdated. Need to find a way to get it some
+  // other way.
+  switch (BuiltinID) {
+  case Builtin::BI__builtin_fma:
+  case Builtin::BI__builtin_fmaf:
+  case Builtin::BI__builtin_fmal:
+  case Builtin::BIfma:
+  case Builtin::BIfmaf:
+  case Builtin::BIfmal: {
+    auto &Trip = CGM.getTriple();
+    if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
+      ConstAlways = true;
+    break;
+  }
+  default:
+    break;
+  }
+
+  bool ConstWithoutErrnoAndExceptions =
+      getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
+  bool ConstWithoutExceptions =
+      getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID);
+
+  // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
+  // disabled.
+  // Math intrinsics are generated only when math-errno is disabled. Any pragmas
+  // or attributes that affect math-errno should prevent or allow math
+  // intrincs to be generated. Intrinsics are generated:
+  //   1- In fast math mode, unless math-errno is overriden
+  //      via '#pragma float_control(precise, on)', or via an
+  //      'attribute__((optnone))'.
+  //   2- If math-errno was enabled on command line but overriden
+  //      to false via '#pragma float_control(precise, off))' and
+  //      'attribute__((optnone))' hasn't been used.
+  //   3- If we are compiling with optimization and errno has been disabled
+  //      via '#pragma float_control(precise, off)', and
+  //      'attribute__((optnone))' hasn't been used.
+
+  bool ConstWithoutErrnoOrExceptions =
+      ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
+  bool GenerateIntrinsics =
+      (ConstAlways && !OptNone) ||
+      (!getLangOpts().MathErrno &&
+       !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
+  if (!GenerateIntrinsics) {
+    GenerateIntrinsics =
+        ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
+    if (!GenerateIntrinsics)
+      GenerateIntrinsics =
+          ConstWithoutErrnoOrExceptions &&
+          (!getLangOpts().MathErrno &&
+           !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
+    if (!GenerateIntrinsics)
+      GenerateIntrinsics =
+          ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
+  }
+
+  if (GenerateIntrinsics) {
+    switch (BuiltinIDIfNoAsmLabel) {
+    case Builtin::BIceil:
+    case Builtin::BIceilf:
+    case Builtin::BIceill:
+    case Builtin::BI__builtin_ceil:
+    case Builtin::BI__builtin_ceilf:
+    case Builtin::BI__builtin_ceilf16:
+    case Builtin::BI__builtin_ceill:
+    case Builtin::BI__builtin_ceilf128:
+      return buildUnaryFPBuiltin<mlir::cir::CeilOp>(*this, *E);
+
+    case Builtin::BIcopysign:
+    case Builtin::BIcopysignf:
+    case Builtin::BIcopysignl:
+    case Builtin::BI__builtin_copysign:
+    case Builtin::BI__builtin_copysignf:
+    case Builtin::BI__builtin_copysignl:
+      return buildBinaryFPBuiltin<mlir::cir::CopysignOp>(*this, *E);
+
+    case Builtin::BI__builtin_copysignf16:
+    case Builtin::BI__builtin_copysignf128:
+      llvm_unreachable("NYI");
+
+    case Builtin::BIcos:
+    case Builtin::BIcosf:
+    case Builtin::BIcosl:
+    case Builtin::BI__builtin_cos:
+    case Builtin::BI__builtin_cosf:
+    case Builtin::BI__builtin_cosf16:
+    case Builtin::BI__builtin_cosl:
+    case Builtin::BI__builtin_cosf128:
+      assert(!MissingFeatures::fastMathFlags());
+      return buildUnaryFPBuiltin<mlir::cir::CosOp>(*this, *E);
+
+    case Builtin::BIexp:
+    case Builtin::BIexpf:
+    case Builtin::BIexpl:
+    case Builtin::BI__builtin_exp:
+    case Builtin::BI__builtin_expf:
+    case Builtin::BI__builtin_expf16:
+    case Builtin::BI__builtin_expl:
+    case Builtin::BI__builtin_expf128:
+      assert(!MissingFeatures::fastMathFlags());
+      return buildUnaryFPBuiltin<mlir::cir::ExpOp>(*this, *E);
+
+    case Builtin::BIexp2:
+    case Builtin::BIexp2f:
+    case Builtin::BIexp2l:
+    case Builtin::BI__builtin_exp2:
+    case Builtin::BI__builtin_exp2f:
+    case Builtin::BI__builtin_exp2f16:
+    case Builtin::BI__builtin_exp2l:
+    case Builtin::BI__builtin_exp2f128:
+      assert(!MissingFeatures::fastMathFlags());
+      return buildUnaryFPBuiltin<mlir::cir::Exp2Op>(*this, *E);
+
+    case Builtin::BIfabs:
+    case Builtin::BIfabsf:
+    case Builtin::BIfabsl:
+    case Builtin::BI__builtin_fabs:
+    case Builtin::BI__builtin_fabsf:
+    case Builtin::BI__builtin_fabsf16:
+    case Builtin::BI__builtin_fabsl:
+    case Builtin::BI__builtin_fabsf128:
+      return buildUnaryFPBuiltin<mlir::cir::FAbsOp>(*this, *E);
+
+    case Builtin::BIfloor:
+    case Builtin::BIfloorf:
+    case Builtin::BIfloorl:
+    case Builtin::BI__builtin_floor:
+    case Builtin::BI__builtin_floorf:
+    case Builtin::BI__builtin_floorf16:
+    case Builtin::BI__builtin_floorl:
+    case Builtin::BI__builtin_floorf128:
+      return buildUnaryFPBuiltin<mlir::cir::FloorOp>(*this, *E);
+
+    case Builtin::BIfma:
+    case Builtin::BIfmaf:
+    case Builtin::BIfmal:
+    case Builtin::BI__builtin_fma:
+    case Builtin::BI__builtin_fmaf:
+    case Builtin::BI__builtin_fmaf16:
+    case Builtin::BI__builtin_fmal:
+    case Builtin::BI__builtin_fmaf128:
+      llvm_unreachable("NYI");
+
+    case Builtin::BIfmax:
+    case Builtin::BIfmaxf:
+    case Builtin::BIfmaxl:
+    case Builtin::BI__builtin_fmax:
+    case Builtin::BI__builtin_fmaxf:
+    case Builtin::BI__builtin_fmaxl:
+      return RValue::get(
+          buildBinaryMaybeConstrainedFPBuiltin<mlir::cir::FMaxOp>(*this, *E));
+
+    case Builtin::BI__builtin_fmaxf16:
+    case Builtin::BI__builtin_fmaxf128:
+      llvm_unreachable("NYI");
+
+    case Builtin::BIfmin:
+    case Builtin::BIfminf:
+    case Builtin::BIfminl:
+    case Builtin::BI__builtin_fmin:
+    case Builtin::BI__builtin_fminf:
+    case Builtin::BI__builtin_fminl:
+      return RValue::get(
+          buildBinaryMaybeConstrainedFPBuiltin<mlir::cir::FMinOp>(*this, *E));
+
+    case Builtin::BI__builtin_fminf16:
+    case Builtin::BI__builtin_fminf128:
+      llvm_unreachable("NYI");
+
+    // fmod() is a special-case. It maps to the frem instruction rather than an
+    // LLVM intrinsic.
+    case Builtin::BIfmod:
+    case Builtin::BIfmodf:
+    case Builtin::BIfmodl:
+    case Builtin::BI__builtin_fmod:
+    case Builtin::BI__builtin_fmodf:
+    case Builtin::BI__builtin_fmodl:
+      assert(!MissingFeatures::fastMathFlags());
+      return buildBinaryFPBuiltin<mlir::cir::FModOp>(*this, *E);
+
+    case Builtin::BI__builtin_fmodf16:
+    case Builtin::BI__builtin_fmodf128:
+      llvm_unreachable("NYI");
+
+    case Builtin::BIlog:
+    case Builtin::BIlogf:
+    case Builtin::BIlogl:
+    case Builtin::BI__builtin_log:
+    case Builtin::BI__builtin_logf:
+    case Builtin::BI__builtin_logf16:
+    case Builtin::BI__builtin_logl:
+    case Builtin::BI__builtin_logf128:
+      assert(!MissingFeatures::fastMathFlags());
+      return buildUnaryFPBuiltin<mlir::cir::LogOp>(*this, *E);
+
+    case Builtin::BIlog10:
+    case Builtin::BIlog10f:
+    case Builtin::BIlog10l:
+    case Builtin::BI__builtin_log10:
+    case Builtin::BI__builtin_log10f:
+    case Builtin::BI__builtin_log10f16:
+    case Builtin::BI__builtin_log10l:
+    case Builtin::BI__builtin_log10f128:
+      assert(!MissingFeatures::fastMathFlags());
+      return buildUnaryFPBuiltin<mlir::cir::Log10Op>(*this, *E);
+
+    case Builtin::BIlog2:
+    case Builtin::BIlog2f:
+    case Builtin::BIlog2l:
+    case Builtin::BI__builtin_log2:
+    case Builtin::BI__builtin_log2f:
+    case Builtin::BI__builtin_log2f16:
+    case Builtin::BI__builtin_log2l:
+    case Builtin::BI__builtin_log2f128:
+      assert(!MissingFeatures::fastMathFlags());
+      return buildUnaryFPBuiltin<mlir::cir::Log2Op>(*this, *E);
+
+    case Builtin::BInearbyint:
+    case Builtin::BInearbyintf:
+    case Builtin::BInearbyintl:
+    case Builtin::BI__builtin_nearbyint:
+    case Builtin::BI__builtin_nearbyintf:
+    case Builtin::BI__builtin_nearbyintl:
+    case Builtin::BI__builtin_nearbyintf128:
+      return buildUnaryFPBuiltin<mlir::cir::NearbyintOp>(*this, *E);
+
+    case Builtin::BIpow:
+    case Builtin::BIpowf:
+    case Builtin::BIpowl:
+    case Builtin::BI__builtin_pow:
+    case Builtin::BI__builtin_powf:
+    case Builtin::BI__builtin_powl:
+      assert(!MissingFeatures::fastMathFlags());
+      return RValue::get(
+          buildBinaryMaybeConstrainedFPBuiltin<mlir::cir::PowOp>(*this, *E));
+
+    case Builtin::BI__builtin_powf16:
+    case Builtin::BI__builtin_powf128:
+      llvm_unreachable("NYI");
+
+    case Builtin::BIrint:
+    case Builtin::BIrintf:
+    case Builtin::BIrintl:
+    case Builtin::BI__builtin_rint:
+    case Builtin::BI__builtin_rintf:
+    case Builtin::BI__builtin_rintf16:
+    case Builtin::BI__builtin_rintl:
+    case Builtin::BI__builtin_rintf128:
+      return buildUnaryFPBuiltin<mlir::cir::RintOp>(*this, *E);
+
+    case Builtin::BIround:
+    case Builtin::BIroundf:
+    case Builtin::BIroundl:
+    case Builtin::BI__builtin_round:
+    case Builtin::BI__builtin_roundf:
+    case Builtin::BI__builtin_roundf16:
+    case Builtin::BI__builtin_roundl:
+    case Builtin::BI__builtin_roundf128:
+      return buildUnaryFPBuiltin<mlir::cir::RoundOp>(*this, *E);
+
+    case Builtin::BIsin:
+    case Builtin::BIsinf:
+    case Builtin::BIsinl:
+    case Builtin::BI__builtin_sin:
+    case Builtin::BI__builtin_sinf:
+    case Builtin::BI__builtin_sinf16:
+    case Builtin::BI__builtin_sinl:
+    case Builtin::BI__builtin_sinf128:
+      assert(!MissingFeatures::fastMathFlags());
+      return buildUnaryFPBuiltin<mlir::cir::SinOp>(*this, *E);
+
+    case Builtin::BIsqrt:
+    case Builtin::BIsqrtf:
+    case Builtin::BIsqrtl:
+    case Builtin::BI__builtin_sqrt:
+    case Builtin::BI__builtin_sqrtf:
+    case Builtin::BI__builtin_sqrtf16:
+    case Builtin::BI__builtin_sqrtl:
+    case Builtin::BI__builtin_sqrtf128:
+      assert(!MissingFeatures::fastMathFlags());
+      return buildUnaryFPBuiltin<mlir::cir::SqrtOp>(*this, *E);
+
+    case Builtin::BItrunc:
+    case Builtin::BItruncf:
+    case Builtin::BItruncl:
+    case Builtin::BI__builtin_trunc:
+    case Builtin::BI__builtin_truncf:
+    case Builtin::BI__builtin_truncf16:
+    case Builtin::BI__builtin_truncl:
+    case Builtin::BI__builtin_truncf128:
+      return buildUnaryFPBuiltin<mlir::cir::TruncOp>(*this, *E);
+
+    case Builtin::BIlround:
+    case Builtin::BIlroundf:
+    case Builtin::BIlroundl:
+    case Builtin::BI__builtin_lround:
+    case Builtin::BI__builtin_lroundf:
+    case Builtin::BI__builtin_lroundl:
+      return buildUnaryMaybeConstrainedFPToIntBuiltin<mlir::cir::LroundOp>(
+          *this, *E);
+
+    case Builtin::BI__builtin_lroundf128:
+      llvm_unreachable("NYI");
+
+    case Builtin::BIllround:
+    case Builtin::BIllroundf:
+    case Builtin::BIllroundl:
+    case Builtin::BI__builtin_llround:
+    case Builtin::BI__builtin_llroundf:
+    case Builtin::BI__builtin_llroundl:
+      return buildUnaryMaybeConstrainedFPToIntBuiltin<mlir::cir::LLroundOp>(
+          *this, *E);
+
+    case Builtin::BI__builtin_llroundf128:
+      llvm_unreachable("NYI");
+
+    case Builtin::BIlrint:
+    case Builtin::BIlrintf:
+    case Builtin::BIlrintl:
+    case Builtin::BI__builtin_lrint:
+    case Builtin::BI__builtin_lrintf:
+    case Builtin::BI__builtin_lrintl:
+      return buildUnaryMaybeConstrainedFPToIntBuiltin<mlir::cir::LrintOp>(*this,
+                                                                          *E);
+
+    case Builtin::BI__builtin_lrintf128:
+      llvm_unreachable("NYI");
+
+    case Builtin::BIllrint:
+    case Builtin::BIllrintf:
+    case Builtin::BIllrintl:
+    case Builtin::BI__builtin_llrint:
+    case Builtin::BI__builtin_llrintf:
+    case Builtin::BI__builtin_llrintl:
+      return buildUnaryMaybeConstrainedFPToIntBuiltin<mlir::cir::LLrintOp>(
+          *this, *E);
+
+    case Builtin::BI__builtin_llrintf128:
+      llvm_unreachable("NYI");
+
+    default:
+      break;
+    }
+  }
+
+  switch (BuiltinIDIfNoAsmLabel) {
+  default:
+    break;
+
+  case Builtin::BI__builtin_complex: {
+    mlir::Value Real = buildScalarExpr(E->getArg(0));
+    mlir::Value Imag = buildScalarExpr(E->getArg(1));
+    mlir::Value Complex =
+        builder.createComplexCreate(getLoc(E->getExprLoc()), Real, Imag);
+    return RValue::getComplex(Complex);
+  }
+
+  case Builtin::BI__builtin_creal:
+  case Builtin::BI__builtin_crealf:
+  case Builtin::BI__builtin_creall:
+  case Builtin::BIcreal:
+  case Builtin::BIcrealf:
+  case Builtin::BIcreall: {
+    mlir::Value ComplexVal = buildComplexExpr(E->getArg(0));
+    mlir::Value Real =
+        builder.createComplexReal(getLoc(E->getExprLoc()), ComplexVal);
+    return RValue::get(Real);
+  }
+
+  case Builtin::BI__builtin_cimag:
+  case Builtin::BI__builtin_cimagf:
+  case Builtin::BI__builtin_cimagl:
+  case Builtin::BIcimag:
+  case Builtin::BIcimagf:
+  case Builtin::BIcimagl: {
+    mlir::Value ComplexVal = buildComplexExpr(E->getArg(0));
+    mlir::Value Real =
+        builder.createComplexImag(getLoc(E->getExprLoc()), ComplexVal);
+    return RValue::get(Real);
+  }
+
+  case Builtin::BI__builtin_conj:
+  case Builtin::BI__builtin_conjf:
+  case Builtin::BI__builtin_conjl:
+  case Builtin::BIconj:
+  case Builtin::BIconjf:
+  case Builtin::BIconjl: {
+    mlir::Value ComplexVal = buildComplexExpr(E->getArg(0));
+    mlir::Value Conj = builder.createUnaryOp(
+        getLoc(E->getExprLoc()), mlir::cir::UnaryOpKind::Not, ComplexVal);
+    return RValue::getComplex(Conj);
+  }
+
+  case Builtin::BI__builtin___CFStringMakeConstantString:
+  case Builtin::BI__builtin___NSStringMakeConstantString:
+    llvm_unreachable("NYI");
+
+  case Builtin::BIprintf:
+    if (getTarget().getTriple().isNVPTX() ||
+        getTarget().getTriple().isAMDGCN()) {
+      llvm_unreachable("NYI");
+    }
+    break;
+
+  // C stdarg builtins.
+  case Builtin::BI__builtin_stdarg_start:
+  case Builtin::BI__builtin_va_start:
+  case Builtin::BI__va_start:
+  case Builtin::BI__builtin_va_end: {
+    buildVAStartEnd(BuiltinID == Builtin::BI__va_start
+                        ? buildScalarExpr(E->getArg(0))
+                        : buildVAListRef(E->getArg(0)).getPointer(),
+                    BuiltinID != Builtin::BI__builtin_va_end);
+    return {};
+  }
+  case Builtin::BI__builtin_va_copy: {
+    auto dstPtr = buildVAListRef(E->getArg(0)).getPointer();
+    auto srcPtr = buildVAListRef(E->getArg(1)).getPointer();
+    builder.create<mlir::cir::VACopyOp>(dstPtr.getLoc(), dstPtr, srcPtr);
+    return {};
+  }
+
+  case Builtin::BI__builtin_expect:
+  case Builtin::BI__builtin_expect_with_probability: {
+    auto ArgValue = buildScalarExpr(E->getArg(0));
+    auto ExpectedValue = buildScalarExpr(E->getArg(1));
+
+    // Don't generate cir.expect on -O0 as the backend won't use it for
+    // anything. Note, we still IRGen ExpectedValue because it could have
+    // side-effects.
+    if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+      return RValue::get(ArgValue);
+
+    mlir::FloatAttr ProbAttr = {};
+    if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_expect_with_probability) {
+      llvm::APFloat Probability(0.0);
+      const Expr *ProbArg = E->getArg(2);
+      bool EvalSucceed =
+          ProbArg->EvaluateAsFloat(Probability, CGM.getASTContext());
+      assert(EvalSucceed && "probability should be able to evaluate as float");
+      (void)EvalSucceed;
+      bool LoseInfo = false;
+      Probability.convert(llvm::APFloat::IEEEdouble(),
+                          llvm::RoundingMode::Dynamic, &LoseInfo);
+      ProbAttr = mlir::FloatAttr::get(
+          mlir::FloatType::getF64(builder.getContext()), Probability);
+    }
+
+    auto result = builder.create<mlir::cir::ExpectOp>(
+        getLoc(E->getSourceRange()), ArgValue.getType(), ArgValue,
+        ExpectedValue, ProbAttr);
+
+    return RValue::get(result);
+  }
+  case Builtin::BI__builtin_unpredictable: {
+    if (CGM.getCodeGenOpts().OptimizationLevel != 0)
+      assert(!MissingFeatures::insertBuiltinUnpredictable());
+    return RValue::get(buildScalarExpr(E->getArg(0)));
+  }
+
+  case Builtin::BI__builtin_assume_aligned: {
+    const Expr *ptr = E->getArg(0);
+    mlir::Value ptrValue = buildScalarExpr(ptr);
+    mlir::Value offsetValue =
+        (E->getNumArgs() > 2) ? buildScalarExpr(E->getArg(2)) : nullptr;
+
+    mlir::Attribute alignmentAttr = ConstantEmitter(*this).emitAbstract(
+        E->getArg(1), E->getArg(1)->getType());
+    std::int64_t alignment = cast<mlir::cir::IntAttr>(alignmentAttr).getSInt();
+
+    ptrValue = buildAlignmentAssumption(ptrValue, ptr, ptr->getExprLoc(),
+                                        builder.getI64IntegerAttr(alignment),
+                                        offsetValue);
+    return RValue::get(ptrValue);
+  }
+
+  case Builtin::BI__assume:
+  case Builtin::BI__builtin_assume: {
+    if (E->getArg(0)->HasSideEffects(getContext()))
+      return RValue::get(nullptr);
+
+    mlir::Value argValue = buildScalarExpr(E->getArg(0));
+    builder.create<mlir::cir::AssumeOp>(getLoc(E->getExprLoc()), argValue);
+    return RValue::get(nullptr);
+  }
+
+  case Builtin::BI__builtin_assume_separate_storage: {
+    const Expr *arg0 = E->getArg(0);
+    const Expr *arg1 = E->getArg(1);
+
+    mlir::Value value0 = buildScalarExpr(arg0);
+    mlir::Value value1 = buildScalarExpr(arg1);
+
+    builder.create<mlir::cir::AssumeSepStorageOp>(getLoc(E->getExprLoc()),
+                                                  value0, value1);
+    return RValue::get(nullptr);
+  }
+
+  case Builtin::BI__builtin_prefetch: {
+    auto evaluateOperandAsInt = [&](const Expr *Arg) {
+      Expr::EvalResult Res;
+      [[maybe_unused]] bool EvalSucceed =
+          Arg->EvaluateAsInt(Res, CGM.getASTContext());
+      assert(EvalSucceed && "expression should be able to evaluate as int");
+      return Res.Val.getInt().getZExtValue();
+    };
+
+    bool IsWrite = false;
+    if (E->getNumArgs() > 1)
+      IsWrite = evaluateOperandAsInt(E->getArg(1));
+
+    int Locality = 0;
+    if (E->getNumArgs() > 2)
+      Locality = evaluateOperandAsInt(E->getArg(2));
+
+    mlir::Value Address = buildScalarExpr(E->getArg(0));
+    builder.create<mlir::cir::PrefetchOp>(getLoc(E->getSourceRange()), Address,
+                                          Locality, IsWrite);
+    return RValue::get(nullptr);
+  }
+
+  case Builtin::BI__builtin___clear_cache: {
+    mlir::Type voidTy = mlir::cir::VoidType::get(builder.getContext());
+    mlir::Value begin =
+        builder.createPtrBitcast(buildScalarExpr(E->getArg(0)), voidTy);
+    mlir::Value end =
+        builder.createPtrBitcast(buildScalarExpr(E->getArg(1)), voidTy);
+    builder.create<mlir::cir::ClearCacheOp>(getLoc(E->getSourceRange()), begin,
+                                            end);
+    return RValue::get(nullptr);
+  }
+
+  // C++ std:: builtins.
+  case Builtin::BImove:
+  case Builtin::BImove_if_noexcept:
+  case Builtin::BIforward:
+  case Builtin::BIas_const:
+    return RValue::get(buildLValue(E->getArg(0)).getPointer());
+  case Builtin::BI__GetExceptionInfo: {
+    llvm_unreachable("NYI");
+  }
+
+  case Builtin::BI__fastfail:
+    llvm_unreachable("NYI");
+
+  case Builtin::BI__builtin_coro_id:
+  case Builtin::BI__builtin_coro_promise:
+  case Builtin::BI__builtin_coro_resume:
+  case Builtin::BI__builtin_coro_noop:
+  case Builtin::BI__builtin_coro_destroy:
+  case Builtin::BI__builtin_coro_done:
+  case Builtin::BI__builtin_coro_alloc:
+  case Builtin::BI__builtin_coro_begin:
+  case Builtin::BI__builtin_coro_end:
+  case Builtin::BI__builtin_coro_suspend:
+  case Builtin::BI__builtin_coro_align:
+    llvm_unreachable("NYI");
+
+  case Builtin::BI__builtin_coro_frame: {
+    return buildCoroutineFrame();
+  }
+  case Builtin::BI__builtin_coro_free:
+  case Builtin::BI__builtin_coro_size: {
+    GlobalDecl gd{FD};
+    mlir::Type ty = CGM.getTypes().GetFunctionType(
+        CGM.getTypes().arrangeGlobalDeclaration(GD));
+    const auto *ND = cast<NamedDecl>(GD.getDecl());
+    auto fnOp =
+        CGM.GetOrCreateCIRFunction(ND->getName(), ty, gd, /*ForVTable=*/false,
+                                   /*DontDefer=*/false);
+    fnOp.setBuiltinAttr(mlir::UnitAttr::get(builder.getContext()));
+    return buildCall(E->getCallee()->getType(), CIRGenCallee::forDirect(fnOp),
+                     E, ReturnValue);
+  }
+  case Builtin::BI__builtin_dynamic_object_size: {
+    // Fallthrough below, assert until we have a testcase.
+    llvm_unreachable("NYI");
+  }
+  case Builtin::BI__builtin_object_size: {
+    unsigned Type =
+        E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
+    auto ResType =
+        mlir::dyn_cast<mlir::cir::IntType>(ConvertType(E->getType()));
+    assert(ResType && "not sure what to do?");
+
+    // We pass this builtin onto the optimizer so that it can figure out the
+    // object size in more complex cases.
+    bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
+    return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
+                                             /*EmittedE=*/nullptr, IsDynamic));
+  }
+  case Builtin::BI__builtin_unreachable: {
+    buildUnreachable(E->getExprLoc());
+
+    // We do need to preserve an insertion point.
+    builder.createBlock(builder.getBlock()->getParent());
+
+    return RValue::get(nullptr);
+  }
+  case Builtin::BI__builtin_trap: {
+    builder.create<mlir::cir::TrapOp>(getLoc(E->getExprLoc()));
+
+    // Note that cir.trap is a terminator so we need to start a new block to
+    // preserve the insertion point.
+    builder.createBlock(builder.getBlock()->getParent());
+
+    return RValue::get(nullptr);
+  }
+  case Builtin::BImemcpy:
+  case Builtin::BI__builtin_memcpy:
+  case Builtin::BImempcpy:
+  case Builtin::BI__builtin_mempcpy: {
+    Address Dest = buildPointerWithAlignment(E->getArg(0));
+    Address Src = buildPointerWithAlignment(E->getArg(1));
+    mlir::Value SizeVal = buildScalarExpr(E->getArg(2));
+    buildNonNullArgCheck(RValue::get(Dest.getPointer()),
+                         E->getArg(0)->getType(), E->getArg(0)->getExprLoc(),
+                         FD, 0);
+    buildNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
+                         E->getArg(1)->getExprLoc(), FD, 1);
+    builder.createMemCpy(getLoc(E->getSourceRange()), Dest.getPointer(),
+                         Src.getPointer(), SizeVal);
+    if (BuiltinID == Builtin::BImempcpy ||
+        BuiltinID == Builtin::BI__builtin_mempcpy)
+      llvm_unreachable("mempcpy is NYI");
+    else
+      return RValue::get(Dest.getPointer());
+  }
+
+  case Builtin::BI__builtin_clrsb:
+  case Builtin::BI__builtin_clrsbl:
+  case Builtin::BI__builtin_clrsbll:
+    return buildBuiltinBitOp<mlir::cir::BitClrsbOp>(*this, E, std::nullopt);
+
+  case Builtin::BI__builtin_ctzs:
+  case Builtin::BI__builtin_ctz:
+  case Builtin::BI__builtin_ctzl:
+  case Builtin::BI__builtin_ctzll:
+  case Builtin::BI__builtin_ctzg:
+    return buildBuiltinBitOp<mlir::cir::BitCtzOp>(*this, E, BCK_CTZPassedZero);
+
+  case Builtin::BI__builtin_clzs:
+  case Builtin::BI__builtin_clz:
+  case Builtin::BI__builtin_clzl:
+  case Builtin::BI__builtin_clzll:
+  case Builtin::BI__builtin_clzg:
+    return buildBuiltinBitOp<mlir::cir::BitClzOp>(*this, E, BCK_CLZPassedZero);
+
+  case Builtin::BI__builtin_ffs:
+  case Builtin::BI__builtin_ffsl:
+  case Builtin::BI__builtin_ffsll:
+    return buildBuiltinBitOp<mlir::cir::BitFfsOp>(*this, E, std::nullopt);
+
+  case Builtin::BI__builtin_parity:
+  case Builtin::BI__builtin_parityl:
+  case Builtin::BI__builtin_parityll:
+    return buildBuiltinBitOp<mlir::cir::BitParityOp>(*this, E, std::nullopt);
+
+  case Builtin::BI__popcnt16:
+  case Builtin::BI__popcnt:
+  case Builtin::BI__popcnt64:
+  case Builtin::BI__builtin_popcount:
+  case Builtin::BI__builtin_popcountl:
+  case Builtin::BI__builtin_popcountll:
+  case Builtin::BI__builtin_popcountg:
+    return buildBuiltinBitOp<mlir::cir::BitPopcountOp>(*this, E, std::nullopt);
+
+  case Builtin::BI__builtin_bswap16:
+  case Builtin::BI__builtin_bswap32:
+  case Builtin::BI__builtin_bswap64:
+  case Builtin::BI_byteswap_ushort:
+  case Builtin::BI_byteswap_ulong:
+  case Builtin::BI_byteswap_uint64: {
+    auto arg = buildScalarExpr(E->getArg(0));
+    return RValue::get(builder.create<mlir::cir::ByteswapOp>(
+        getLoc(E->getSourceRange()), arg));
+  }
+
+  case Builtin::BI__builtin_rotateleft8:
+  case Builtin::BI__builtin_rotateleft16:
+  case Builtin::BI__builtin_rotateleft32:
+  case Builtin::BI__builtin_rotateleft64:
+  case Builtin::BI_rotl8: // Microsoft variants of rotate left
+  case Builtin::BI_rotl16:
+  case Builtin::BI_rotl:
+  case Builtin::BI_lrotl:
+  case Builtin::BI_rotl64:
+    return buildRotate(E, false);
+
+  case Builtin::BI__builtin_rotateright8:
+  case Builtin::BI__builtin_rotateright16:
+  case Builtin::BI__builtin_rotateright32:
+  case Builtin::BI__builtin_rotateright64:
+  case Builtin::BI_rotr8: // Microsoft variants of rotate right
+  case Builtin::BI_rotr16:
+  case Builtin::BI_rotr:
+  case Builtin::BI_lrotr:
+  case Builtin::BI_rotr64:
+    return buildRotate(E, true);
+
+  case Builtin::BI__builtin_constant_p: {
+    mlir::Type ResultType = ConvertType(E->getType());
+
+    const Expr *Arg = E->getArg(0);
+    QualType ArgType = Arg->getType();
+    // FIXME: The allowance for Obj-C pointers and block pointers is historical
+    // and likely a mistake.
+    if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
+        !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
+      // Per the GCC documentation, only numeric constants are recognized after
+      // inlining.
+      return RValue::get(
+          builder.getConstInt(getLoc(E->getSourceRange()),
+                              mlir::cast<mlir::cir::IntType>(ResultType), 0));
+
+    if (Arg->HasSideEffects(getContext()))
+      // The argument is unevaluated, so be conservative if it might have
+      // side-effects.
+      return RValue::get(
+          builder.getConstInt(getLoc(E->getSourceRange()),
+                              mlir::cast<mlir::cir::IntType>(ResultType), 0));
+
+    mlir::Value ArgValue = buildScalarExpr(Arg);
+    if (ArgType->isObjCObjectPointerType())
+      // Convert Objective-C objects to id because we cannot distinguish between
+      // LLVM types for Obj-C classes as they are opaque.
+      ArgType = CGM.getASTContext().getObjCIdType();
+    ArgValue = builder.createBitcast(ArgValue, ConvertType(ArgType));
+
+    mlir::Value Result = builder.create<mlir::cir::IsConstantOp>(
+        getLoc(E->getSourceRange()), ArgValue);
+    if (Result.getType() != ResultType)
+      Result = builder.createBoolToInt(Result, ResultType);
+    return RValue::get(Result);
+  }
+
+  case Builtin::BIalloca:
+  case Builtin::BI_alloca:
+  case Builtin::BI__builtin_alloca_uninitialized:
+  case Builtin::BI__builtin_alloca: {
+    // Get alloca size input
+    mlir::Value Size = buildScalarExpr(E->getArg(0));
+
+    // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
+    const TargetInfo &TI = getContext().getTargetInfo();
+    const CharUnits SuitableAlignmentInBytes =
+        getContext().toCharUnitsFromBits(TI.getSuitableAlign());
+
+    // Emit the alloca op with type `u8 *` to match the semantics of
+    // `llvm.alloca`. We later bitcast the type to `void *` to match the
+    // semantics of C/C++
+    // FIXME(cir): It may make sense to allow AllocaOp of type `u8` to return a
+    // pointer of type `void *`. This will require a change to the allocaOp
+    // verifier.
+    auto AllocaAddr = builder.createAlloca(
+        getLoc(E->getSourceRange()), builder.getUInt8PtrTy(),
+        builder.getUInt8Ty(), "bi_alloca", SuitableAlignmentInBytes, Size);
+
+    // Initialize the allocated buffer if required.
+    if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
+      initializeAlloca(*this, AllocaAddr, Size, SuitableAlignmentInBytes);
+
+    // An alloca will always return a pointer to the alloca (stack) address
+    // space. This address space need not be the same as the AST / Language
+    // default (e.g. in C / C++ auto vars are in the generic address space). At
+    // the AST level this is handled within CreateTempAlloca et al., but for the
+    // builtin / dynamic alloca we have to handle it here.
+    assert(!MissingFeatures::addressSpace());
+    auto AAS = getCIRAllocaAddressSpace();
+    auto EAS = builder.getAddrSpaceAttr(
+        E->getType()->getPointeeType().getAddressSpace());
+    if (EAS != AAS) {
+      assert(false && "Non-default address space for alloca NYI");
+    }
+
+    // Bitcast the alloca to the expected type.
+    return RValue::get(
+        builder.createBitcast(AllocaAddr, builder.getVoidPtrTy()));
+  }
+
+  case Builtin::BI__sync_fetch_and_add:
+    llvm_unreachable("Shouldn't make it through sema");
+  case Builtin::BI__sync_fetch_and_add_1:
+  case Builtin::BI__sync_fetch_and_add_2:
+  case Builtin::BI__sync_fetch_and_add_4:
+  case Builtin::BI__sync_fetch_and_add_8:
+  case Builtin::BI__sync_fetch_and_add_16: {
+    return buildBinaryAtomic(*this, mlir::cir::AtomicFetchKind::Add, E);
+  }
+
+  case Builtin::BI__sync_val_compare_and_swap_1:
+  case Builtin::BI__sync_val_compare_and_swap_2:
+  case Builtin::BI__sync_val_compare_and_swap_4:
+  case Builtin::BI__sync_val_compare_and_swap_8:
+  case Builtin::BI__sync_val_compare_and_swap_16:
+    return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
+
+  case Builtin::BI__sync_bool_compare_and_swap_1:
+  case Builtin::BI__sync_bool_compare_and_swap_2:
+  case Builtin::BI__sync_bool_compare_and_swap_4:
+  case Builtin::BI__sync_bool_compare_and_swap_8:
+  case Builtin::BI__sync_bool_compare_and_swap_16:
+    return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
+
+  case Builtin::BI__builtin_add_overflow:
+  case Builtin::BI__builtin_sub_overflow:
+  case Builtin::BI__builtin_mul_overflow: {
+    const clang::Expr *LeftArg = E->getArg(0);
+    const clang::Expr *RightArg = E->getArg(1);
+    const clang::Expr *ResultArg = E->getArg(2);
+
+    clang::QualType ResultQTy =
+        ResultArg->getType()->castAs<clang::PointerType>()->getPointeeType();
+
+    WidthAndSignedness LeftInfo =
+        getIntegerWidthAndSignedness(CGM.getASTContext(), LeftArg->getType());
+    WidthAndSignedness RightInfo =
+        getIntegerWidthAndSignedness(CGM.getASTContext(), RightArg->getType());
+    WidthAndSignedness ResultInfo =
+        getIntegerWidthAndSignedness(CGM.getASTContext(), ResultQTy);
+
+    // Note we compute the encompassing type with the consideration to the
+    // result type, so later in LLVM lowering we don't get redundant integral
+    // extension casts.
+    WidthAndSignedness EncompassingInfo =
+        EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
+
+    auto EncompassingCIRTy = mlir::cir::IntType::get(
+        builder.getContext(), EncompassingInfo.Width, EncompassingInfo.Signed);
+    auto ResultCIRTy =
+        mlir::cast<mlir::cir::IntType>(CGM.getTypes().ConvertType(ResultQTy));
+
+    mlir::Value Left = buildScalarExpr(LeftArg);
+    mlir::Value Right = buildScalarExpr(RightArg);
+    Address ResultPtr = buildPointerWithAlignment(ResultArg);
+
+    // Extend each operand to the encompassing type, if necessary.
+    if (Left.getType() != EncompassingCIRTy)
+      Left = builder.createCast(mlir::cir::CastKind::integral, Left,
+                                EncompassingCIRTy);
+    if (Right.getType() != EncompassingCIRTy)
+      Right = builder.createCast(mlir::cir::CastKind::integral, Right,
+                                 EncompassingCIRTy);
+
+    // Perform the operation on the extended values.
+    mlir::cir::BinOpOverflowKind OpKind;
+    switch (BuiltinID) {
+    default:
+      llvm_unreachable("Unknown overflow builtin id.");
+    case Builtin::BI__builtin_add_overflow:
+      OpKind = mlir::cir::BinOpOverflowKind::Add;
+      break;
+    case Builtin::BI__builtin_sub_overflow:
+      OpKind = mlir::cir::BinOpOverflowKind::Sub;
+      break;
+    case Builtin::BI__builtin_mul_overflow:
+      OpKind = mlir::cir::BinOpOverflowKind::Mul;
+      break;
+    }
+
+    auto Loc = getLoc(E->getSourceRange());
+    auto ArithResult =
+        builder.createBinOpOverflowOp(Loc, ResultCIRTy, OpKind, Left, Right);
+
+    // Here is a slight difference from the original clang CodeGen:
+    //   - In the original clang CodeGen, the checked arithmetic result is
+    //     first computed as a value of the encompassing type, and then it is
+    //     truncated to the actual result type with a second overflow checking.
+    //   - In CIRGen, the checked arithmetic operation directly produce the
+    //     checked arithmetic result in its expected type.
+    //
+    // So we don't need a truncation and a second overflow checking here.
+
+    // Finally, store the result using the pointer.
+    bool isVolatile =
+        ResultArg->getType()->getPointeeType().isVolatileQualified();
+    builder.createStore(Loc, buildToMemory(ArithResult.result, ResultQTy),
+                        ResultPtr, isVolatile);
+
+    return RValue::get(ArithResult.overflow);
+  }
+
+  case Builtin::BI__builtin_uadd_overflow:
+  case Builtin::BI__builtin_uaddl_overflow:
+  case Builtin::BI__builtin_uaddll_overflow:
+  case Builtin::BI__builtin_usub_overflow:
+  case Builtin::BI__builtin_usubl_overflow:
+  case Builtin::BI__builtin_usubll_overflow:
+  case Builtin::BI__builtin_umul_overflow:
+  case Builtin::BI__builtin_umull_overflow:
+  case Builtin::BI__builtin_umulll_overflow:
+  case Builtin::BI__builtin_sadd_overflow:
+  case Builtin::BI__builtin_saddl_overflow:
+  case Builtin::BI__builtin_saddll_overflow:
+  case Builtin::BI__builtin_ssub_overflow:
+  case Builtin::BI__builtin_ssubl_overflow:
+  case Builtin::BI__builtin_ssubll_overflow:
+  case Builtin::BI__builtin_smul_overflow:
+  case Builtin::BI__builtin_smull_overflow:
+  case Builtin::BI__builtin_smulll_overflow: {
+    // Scalarize our inputs.
+    mlir::Value X = buildScalarExpr(E->getArg(0));
+    mlir::Value Y = buildScalarExpr(E->getArg(1));
+
+    const clang::Expr *ResultArg = E->getArg(2);
+    Address ResultPtr = buildPointerWithAlignment(ResultArg);
+
+    // Decide which of the arithmetic operation we are lowering to:
+    mlir::cir::BinOpOverflowKind ArithKind;
+    switch (BuiltinID) {
+    default:
+      llvm_unreachable("Unknown overflow builtin id.");
+    case Builtin::BI__builtin_uadd_overflow:
+    case Builtin::BI__builtin_uaddl_overflow:
+    case Builtin::BI__builtin_uaddll_overflow:
+    case Builtin::BI__builtin_sadd_overflow:
+    case Builtin::BI__builtin_saddl_overflow:
+    case Builtin::BI__builtin_saddll_overflow:
+      ArithKind = mlir::cir::BinOpOverflowKind::Add;
+      break;
+    case Builtin::BI__builtin_usub_overflow:
+    case Builtin::BI__builtin_usubl_overflow:
+    case Builtin::BI__builtin_usubll_overflow:
+    case Builtin::BI__builtin_ssub_overflow:
+    case Builtin::BI__builtin_ssubl_overflow:
+    case Builtin::BI__builtin_ssubll_overflow:
+      ArithKind = mlir::cir::BinOpOverflowKind::Sub;
+      break;
+    case Builtin::BI__builtin_umul_overflow:
+    case Builtin::BI__builtin_umull_overflow:
+    case Builtin::BI__builtin_umulll_overflow:
+    case Builtin::BI__builtin_smul_overflow:
+    case Builtin::BI__builtin_smull_overflow:
+    case Builtin::BI__builtin_smulll_overflow:
+      ArithKind = mlir::cir::BinOpOverflowKind::Mul;
+      break;
+    }
+
+    clang::QualType ResultQTy =
+        ResultArg->getType()->castAs<clang::PointerType>()->getPointeeType();
+    auto ResultCIRTy =
+        mlir::cast<mlir::cir::IntType>(CGM.getTypes().ConvertType(ResultQTy));
+
+    auto Loc = getLoc(E->getSourceRange());
+    auto ArithResult =
+        builder.createBinOpOverflowOp(Loc, ResultCIRTy, ArithKind, X, Y);
+
+    bool isVolatile =
+        ResultArg->getType()->getPointeeType().isVolatileQualified();
+    builder.createStore(Loc, buildToMemory(ArithResult.result, ResultQTy),
+                        ResultPtr, isVolatile);
+
+    return RValue::get(ArithResult.overflow);
+  }
+  }
+
+  // If this is an alias for a lib function (e.g. __builtin_sin), emit
+  // the call using the normal call path, but using the unmangled
+  // version of the function name.
+  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
+    return buildLibraryCall(*this, FD, E,
+                            CGM.getBuiltinLibFunction(FD, BuiltinID));
+
+  // If this is a predefined lib function (e.g. malloc), emit the call
+  // using exactly the normal call path.
+  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
+    return buildLibraryCall(*this, FD, E,
+                            buildScalarExpr(E->getCallee()).getDefiningOp());
+
+  // Check that a call to a target specific builtin has the correct target
+  // features.
+  // This is down here to avoid non-target specific builtins, however, if
+  // generic builtins start to require generic target features then we
+  // can move this up to the beginning of the function.
+  //   checkTargetFeatures(E, FD);
+
+  if (unsigned VectorWidth =
+          getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
+    llvm_unreachable("NYI");
+
+  // See if we have a target specific intrinsic.
+  auto Name = getContext().BuiltinInfo.getName(BuiltinID).str();
+  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
+  StringRef Prefix =
+      llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
+  if (!Prefix.empty()) {
+    IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
+    // NOTE we don't need to perform a compatibility flag check here since the
+    // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
+    // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
+    if (IntrinsicID == Intrinsic::not_intrinsic)
+      IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
+  }
+
+  if (IntrinsicID != Intrinsic::not_intrinsic) {
+    llvm_unreachable("NYI");
+  }
+
+  // Some target-specific builtins can have aggregate return values, e.g.
+  // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
+  // ReturnValue to be non-null, so that the target-specific emission code can
+  // always just emit into it.
+  TypeEvaluationKind EvalKind = getEvaluationKind(E->getType());
+  if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
+    llvm_unreachable("NYI");
+  }
+
+  // Now see if we can emit a target-specific builtin.
+  if (auto V = buildTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
+    switch (EvalKind) {
+    case TEK_Scalar:
+      if (mlir::isa<mlir::cir::VoidType>(V.getType()))
+        return RValue::get(nullptr);
+      return RValue::get(V);
+    case TEK_Aggregate:
+      llvm_unreachable("NYI");
+    case TEK_Complex:
+      llvm_unreachable("No current target builtin returns complex");
+    }
+    llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
+  }
+
+  CGM.ErrorUnsupported(E, "builtin function");
+
+  // Unknown builtin, for now just dump it out and return undef.
+  return GetUndefRValue(E->getType());
+}
+
+mlir::Value CIRGenFunction::buildCheckedArgForBuiltin(const Expr *E,
+                                                      BuiltinCheckKind Kind) {
+  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
+         "Unsupported builtin check kind");
+
+  auto value = buildScalarExpr(E);
+  if (!SanOpts.has(SanitizerKind::Builtin))
+    return value;
+
+  assert(!MissingFeatures::sanitizerBuiltin());
+  llvm_unreachable("NYI");
+}
+
+static mlir::Value buildTargetArchBuiltinExpr(CIRGenFunction *CGF,
+                                              unsigned BuiltinID,
+                                              const CallExpr *E,
+                                              ReturnValueSlot ReturnValue,
+                                              llvm::Triple::ArchType Arch) {
+  // When compiling in HipStdPar mode we have to be conservative in rejecting
+  // target specific features in the FE, and defer the possible error to the
+  // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
+  // referenced by an accelerator executable function, we emit an error.
+  // Returning nullptr here leads to the builtin being handled in
+  // EmitStdParUnsupportedBuiltin.
+  if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
+      Arch != CGF->getTarget().getTriple().getArch())
+    return nullptr;
+
+  switch (Arch) {
+  case llvm::Triple::arm:
+  case llvm::Triple::armeb:
+  case llvm::Triple::thumb:
+  case llvm::Triple::thumbeb:
+    llvm_unreachable("NYI");
+  case llvm::Triple::aarch64:
+  case llvm::Triple::aarch64_32:
+  case llvm::Triple::aarch64_be:
+    return CGF->buildAArch64BuiltinExpr(BuiltinID, E, ReturnValue, Arch);
+  case llvm::Triple::bpfeb:
+  case llvm::Triple::bpfel:
+    llvm_unreachable("NYI");
+  case llvm::Triple::x86:
+  case llvm::Triple::x86_64:
+    return CGF->buildX86BuiltinExpr(BuiltinID, E);
+  case llvm::Triple::ppc:
+  case llvm::Triple::ppcle:
+  case llvm::Triple::ppc64:
+  case llvm::Triple::ppc64le:
+    llvm_unreachable("NYI");
+  case llvm::Triple::r600:
+  case llvm::Triple::amdgcn:
+    llvm_unreachable("NYI");
+  case llvm::Triple::systemz:
+    llvm_unreachable("NYI");
+  case llvm::Triple::nvptx:
+  case llvm::Triple::nvptx64:
+    llvm_unreachable("NYI");
+  case llvm::Triple::wasm32:
+  case llvm::Triple::wasm64:
+    llvm_unreachable("NYI");
+  case llvm::Triple::hexagon:
+    llvm_unreachable("NYI");
+  case llvm::Triple::riscv32:
+  case llvm::Triple::riscv64:
+    llvm_unreachable("NYI");
+  default:
+    return {};
+  }
+}
+
+mlir::Value
+CIRGenFunction::buildTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                       ReturnValueSlot ReturnValue) {
+  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
+    assert(getContext().getAuxTargetInfo() && "Missing aux target info");
+    return buildTargetArchBuiltinExpr(
+        this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
+        ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
+  }
+
+  return buildTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
+                                    getTarget().getTriple().getArch());
+}
+
+void CIRGenFunction::buildVAStartEnd(mlir::Value ArgValue, bool IsStart) {
+  // LLVM codegen casts to *i8, no real gain on doing this for CIRGen this
+  // early, defer to LLVM lowering.
+  if (IsStart)
+    builder.create<mlir::cir::VAStartOp>(ArgValue.getLoc(), ArgValue);
+  else
+    builder.create<mlir::cir::VAEndOp>(ArgValue.getLoc(), ArgValue);
+}
+
+/// Checks if using the result of __builtin_object_size(p, @p From) in place of
+/// __builtin_object_size(p, @p To) is correct
+static bool areBOSTypesCompatible(int From, int To) {
+  // Note: Our __builtin_object_size implementation currently treats Type=0 and
+  // Type=2 identically. Encoding this implementation detail here may make
+  // improving __builtin_object_size difficult in the future, so it's omitted.
+  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
+}
+
+/// Returns a Value corresponding to the size of the given expression.
+/// This Value may be either of the following:
+///
+///   - Reference an argument if `pass_object_size` is used.
+///   - A call to a `cir.objsize`.
+///
+/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
+/// and we wouldn't otherwise try to reference a pass_object_size parameter,
+/// we'll call `cir.objsize` on EmittedE, rather than emitting E.
+mlir::Value CIRGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
+                                                  mlir::cir::IntType ResType,
+                                                  mlir::Value EmittedE,
+                                                  bool IsDynamic) {
+  // We need to reference an argument if the pointer is a parameter with the
+  // pass_object_size attribute.
+  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
+    auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
+    auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
+    if (Param != nullptr && PS != nullptr &&
+        areBOSTypesCompatible(PS->getType(), Type)) {
+      auto Iter = SizeArguments.find(Param);
+      assert(Iter != SizeArguments.end());
+
+      const ImplicitParamDecl *D = Iter->second;
+      auto DIter = LocalDeclMap.find(D);
+      assert(DIter != LocalDeclMap.end());
+
+      return buildLoadOfScalar(DIter->second, /*Volatile=*/false,
+                               getContext().getSizeType(), E->getBeginLoc());
+    }
+  }
+
+  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
+  // evaluate E for side-effects. In either case, just like original LLVM
+  // lowering, we shouldn't lower to `cir.objsize`.
+  if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
+    llvm_unreachable("NYI");
+
+  auto Ptr = EmittedE ? EmittedE : buildScalarExpr(E);
+  assert(mlir::isa<mlir::cir::PointerType>(Ptr.getType()) &&
+         "Non-pointer passed to __builtin_object_size?");
+
+  // LLVM intrinsics (which CIR lowers to at some point, only supports 0
+  // and 2, account for that right now.
+  mlir::cir::SizeInfoType sizeInfoTy = ((Type & 2) != 0)
+                                           ? mlir::cir::SizeInfoType::min
+                                           : mlir::cir::SizeInfoType::max;
+  // TODO(cir): Heads up for LLVM lowering, For GCC compatibility,
+  // __builtin_object_size treat NULL as unknown size.
+  return builder.create<mlir::cir::ObjSizeOp>(
+      getLoc(E->getSourceRange()), ResType, Ptr, sizeInfoTy, IsDynamic);
+}
+
+mlir::Value CIRGenFunction::evaluateOrEmitBuiltinObjectSize(
+    const Expr *E, unsigned Type, mlir::cir::IntType ResType,
+    mlir::Value EmittedE, bool IsDynamic) {
+  uint64_t ObjectSize;
+  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
+    return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
+  return builder.getConstInt(getLoc(E->getSourceRange()), ResType, ObjectSize);
+}
+
+/// Given a builtin id for a function like "__builtin_fabsf", return a Function*
+/// for "fabsf".
+mlir::cir::FuncOp CIRGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
+                                                      unsigned BuiltinID) {
+  assert(astCtx.BuiltinInfo.isLibFunction(BuiltinID));
+
+  // Get the name, skip over the __builtin_ prefix (if necessary).
+  StringRef Name;
+  GlobalDecl D(FD);
+
+  // TODO: This list should be expanded or refactored after all GCC-compatible
+  // std libcall builtins are implemented.
+  static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
+      {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
+      {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
+      {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
+      {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
+      {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
+      {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
+      {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
+      {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
+      {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
+      {Builtin::BI__builtin_printf, "__printfieee128"},
+      {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
+      {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
+      {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
+      {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
+      {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
+      {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
+      {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
+      {Builtin::BI__builtin_scanf, "__scanfieee128"},
+      {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
+      {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
+      {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
+      {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
+      {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
+  };
+
+  // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
+  // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
+  // if it is 64-bit 'long double' mode.
+  static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
+      {Builtin::BI__builtin_frexpl, "frexp"},
+      {Builtin::BI__builtin_ldexpl, "ldexp"},
+      {Builtin::BI__builtin_modfl, "modf"},
+  };
+
+  // If the builtin has been declared explicitly with an assembler label,
+  // use the mangled name. This differs from the plain label on platforms
+  // that prefix labels.
+  if (FD->hasAttr<AsmLabelAttr>())
+    Name = getMangledName(D);
+  else {
+    // TODO: This mutation should also be applied to other targets other than
+    // PPC, after backend supports IEEE 128-bit style libcalls.
+    if (getTriple().isPPC64() &&
+        &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
+        F128Builtins.find(BuiltinID) != F128Builtins.end())
+      Name = F128Builtins[BuiltinID];
+    else if (getTriple().isOSAIX() &&
+             &getTarget().getLongDoubleFormat() ==
+                 &llvm::APFloat::IEEEdouble() &&
+             AIXLongDouble64Builtins.find(BuiltinID) !=
+                 AIXLongDouble64Builtins.end())
+      Name = AIXLongDouble64Builtins[BuiltinID];
+    else
+      Name = astCtx.BuiltinInfo.getName(BuiltinID).substr(10);
+  }
+
+  auto Ty = getTypes().ConvertType(FD->getType());
+  return GetOrCreateCIRFunction(Name, Ty, D, /*ForVTable=*/false);
+}
\ No newline at end of file
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
new file mode 100644
index 000000000000..5b74321d36f0
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -0,0 +1,2913 @@
+//===---- CIRGenBuiltinAArch64.cpp - Emit CIR for AArch64 builtins --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit ARM64 Builtin calls as CIR or a function call
+// to be later resolved.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenCXXABI.h"
+#include "CIRGenCall.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "TargetInfo.h"
+#include "clang/CIR/MissingFeatures.h"
+
+// TODO(cir): once all builtins are covered, decide whether we still
+// need to use LLVM intrinsics or if there's a better approach to follow. Right
+// now the intrinsics are reused to make it convenient to encode all thousands
+// of them and passing down to LLVM lowering.
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Value.h"
+#include "clang/AST/GlobalDecl.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace cir;
+using namespace clang;
+using namespace mlir::cir;
+using namespace llvm;
+
+enum {
+  AddRetType = (1 << 0),
+  Add1ArgType = (1 << 1),
+  Add2ArgTypes = (1 << 2),
+
+  VectorizeRetType = (1 << 3),
+  VectorizeArgTypes = (1 << 4),
+
+  InventFloatType = (1 << 5),
+  UnsignedAlts = (1 << 6),
+
+  Use64BitVectors = (1 << 7),
+  Use128BitVectors = (1 << 8),
+
+  Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
+  VectorRet = AddRetType | VectorizeRetType,
+  VectorRetGetArgs01 =
+      AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
+  FpCmpzModifiers =
+      AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
+};
+
+namespace {
+struct ARMVectorIntrinsicInfo {
+  const char *NameHint;
+  unsigned BuiltinID;
+  unsigned LLVMIntrinsic;
+  unsigned AltLLVMIntrinsic;
+  uint64_t TypeModifier;
+
+  bool operator<(unsigned RHSBuiltinID) const {
+    return BuiltinID < RHSBuiltinID;
+  }
+  bool operator<(const ARMVectorIntrinsicInfo &TE) const {
+    return BuiltinID < TE.BuiltinID;
+  }
+};
+} // end anonymous namespace
+
+#define NEONMAP0(NameBase)                                                     \
+  {#NameBase, NEON::BI__builtin_neon_##NameBase, 0, 0, 0}
+
+#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)                        \
+  {#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, 0,  \
+   TypeModifier}
+
+#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)      \
+  {#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic,     \
+   Intrinsic::AltLLVMIntrinsic, TypeModifier}
+
+static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
+    NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
+    NEONMAP0(splat_lane_v),
+    NEONMAP0(splat_laneq_v),
+    NEONMAP0(splatq_lane_v),
+    NEONMAP0(splatq_laneq_v),
+    NEONMAP1(vabs_v, aarch64_neon_abs, 0),
+    NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
+    NEONMAP0(vadd_v),
+    NEONMAP0(vaddhn_v),
+    NEONMAP0(vaddq_p128),
+    NEONMAP0(vaddq_v),
+    NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
+    NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
+    NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
+    NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
+    NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
+    NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
+    NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
+    NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
+    NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
+    NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
+    NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
+    NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
+    NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
+    NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
+    NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
+    NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
+    NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
+    NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
+    NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
+    NEONMAP1(vcage_v, aarch64_neon_facge, 0),
+    NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
+    NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
+    NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
+    NEONMAP1(vcale_v, aarch64_neon_facge, 0),
+    NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
+    NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
+    NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
+    NEONMAP0(vceqz_v),
+    NEONMAP0(vceqzq_v),
+    NEONMAP0(vcgez_v),
+    NEONMAP0(vcgezq_v),
+    NEONMAP0(vcgtz_v),
+    NEONMAP0(vcgtzq_v),
+    NEONMAP0(vclez_v),
+    NEONMAP0(vclezq_v),
+    NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
+    NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
+    NEONMAP0(vcltz_v),
+    NEONMAP0(vcltzq_v),
+    NEONMAP1(vclz_v, ctlz, Add1ArgType),
+    NEONMAP1(vclzq_v, ctlz, Add1ArgType),
+    NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
+    NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
+    NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
+    NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
+    NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
+    NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
+    NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
+    NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
+    NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
+    NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
+    NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
+    NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
+    NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
+    NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
+    NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
+    NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
+    NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
+    NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
+    NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
+    NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
+    NEONMAP1(vcnt_v, ctpop, Add1ArgType),
+    NEONMAP1(vcntq_v, ctpop, Add1ArgType),
+    NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
+    NEONMAP0(vcvt_f16_s16),
+    NEONMAP0(vcvt_f16_u16),
+    NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
+    NEONMAP0(vcvt_f32_v),
+    NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
+    NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
+    NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+    NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+    NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
+    NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
+    NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+    NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
+    NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
+    NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
+    NEONMAP0(vcvtq_f16_s16),
+    NEONMAP0(vcvtq_f16_u16),
+    NEONMAP0(vcvtq_f32_v),
+    NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
+    NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
+    NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
+    NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp,
+             0),
+    NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp,
+             0),
+    NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
+    NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
+    NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+    NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
+    NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
+    NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
+    NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
+    NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
+    NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
+    NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
+    NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
+    NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP0(vext_v),
+    NEONMAP0(vextq_v),
+    NEONMAP0(vfma_v),
+    NEONMAP0(vfmaq_v),
+    NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
+    NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
+    NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
+    NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
+    NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
+    NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
+    NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
+    NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
+    NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
+    NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
+    NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
+    NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
+    NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
+    NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
+    NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
+    NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
+    NEONMAP0(vmovl_v),
+    NEONMAP0(vmovn_v),
+    NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
+    NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
+    NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
+    NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
+    NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
+    NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
+    NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
+    NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
+    NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
+    NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
+    NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
+    NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
+    NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
+    NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
+    NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
+    NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
+    NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
+    NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
+    NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
+    NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
+    NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
+    NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
+    NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
+    NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
+    NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
+    NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
+    NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
+    NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
+    NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
+    NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
+    NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
+    NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
+    NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
+    NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
+    NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
+    NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
+    NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
+    NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
+    NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
+    NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
+    NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
+    NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
+    NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
+    NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
+    NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
+    NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
+    NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
+    NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
+    NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
+    NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
+    NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
+    NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
+    NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
+    NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
+    NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
+    NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
+    NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
+    NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
+    NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
+    NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
+    NEONMAP0(vrndi_v),
+    NEONMAP0(vrndiq_v),
+    NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
+    NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
+    NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
+    NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
+    NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
+    NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
+    NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
+    NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
+    NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
+    NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
+    NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
+    NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
+    NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
+    NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
+    NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
+    NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
+    NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
+    NEONMAP0(vshl_n_v),
+    NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP0(vshll_n_v),
+    NEONMAP0(vshlq_n_v),
+    NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl,
+             Add1ArgType | UnsignedAlts),
+    NEONMAP0(vshr_n_v),
+    NEONMAP0(vshrn_n_v),
+    NEONMAP0(vshrq_n_v),
+    NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
+    NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
+    NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
+    NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
+    NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
+    NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
+    NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
+    NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
+    NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
+    NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
+    NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
+    NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
+    NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
+    NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
+    NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
+    NEONMAP0(vsubhn_v),
+    NEONMAP0(vtst_v),
+    NEONMAP0(vtstq_v),
+    NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
+    NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
+    NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
+    NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
+};
+
+static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
+    NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
+    NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
+    NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
+    NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
+    NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
+    NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
+    NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
+    NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
+    NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
+    NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+    NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
+    NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
+    NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
+    NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
+    NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+    NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+    NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
+    NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
+    NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
+    NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
+    NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
+    NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
+    NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
+    NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
+    NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+    NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
+    NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+    NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+    NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+    NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+    NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
+    NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
+    NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+    NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+    NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
+    NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
+    NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+    NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+    NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+    NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
+    NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
+    NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
+    NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
+    NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
+    NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
+    NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
+    NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
+    NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
+    NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
+    NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+    NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+    NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+    NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+    NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+    NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+    NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+    NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+    NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
+    NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
+    NEONMAP1(vqabsb_s8, aarch64_neon_sqabs,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
+    NEONMAP1(vqabsh_s16, aarch64_neon_sqabs,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
+    NEONMAP1(vqaddb_s8, aarch64_neon_sqadd,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqaddb_u8, aarch64_neon_uqadd,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
+    NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
+    NEONMAP1(vqaddh_s16, aarch64_neon_sqadd,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqaddh_u16, aarch64_neon_uqadd,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
+    NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
+    NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
+    NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
+    NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
+    NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
+    NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
+    NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
+    NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
+    NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
+    NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
+    NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
+    NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
+    NEONMAP1(vqnegb_s8, aarch64_neon_sqneg,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
+    NEONMAP1(vqnegh_s16, aarch64_neon_sqneg,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
+    NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
+    NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
+    NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
+    NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
+    NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
+    NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
+    NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
+    NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
+    NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
+    NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
+    NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
+    NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
+    NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
+    NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
+    NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun,
+             VectorRet | Use64BitVectors),
+    NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun,
+             VectorRet | Use64BitVectors),
+    NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqshlb_s8, aarch64_neon_sqshl,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqshlb_u8, aarch64_neon_uqshl,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
+    NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
+    NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqshlh_s16, aarch64_neon_sqshl,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqshlh_u16, aarch64_neon_uqshl,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
+    NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
+    NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
+    NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
+    NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
+    NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
+    NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
+    NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
+    NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
+    NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
+    NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
+    NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
+    NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
+    NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
+    NEONMAP1(vqsubb_s8, aarch64_neon_sqsub,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqsubb_u8, aarch64_neon_uqsub,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
+    NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
+    NEONMAP1(vqsubh_s16, aarch64_neon_sqsub,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqsubh_u16, aarch64_neon_uqsub,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
+    NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
+    NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
+    NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
+    NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
+    NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
+    NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
+    NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
+    NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
+    NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
+    NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
+    NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
+    NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
+    NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
+    NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
+    NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
+    NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
+    NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
+    NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
+    NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
+    NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
+    NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
+    NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
+    NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
+    NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
+    NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd,
+             Vectorize1ArgType | Use64BitVectors),
+    NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
+    // FP16 scalar intrinisics go here.
+    NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
+    NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu,
+             AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+    NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+    NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
+    NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
+    NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
+    NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
+    NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
+};
+
+// Some intrinsics are equivalent for codegen.
+static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
+    {
+        NEON::BI__builtin_neon_splat_lane_bf16,
+        NEON::BI__builtin_neon_splat_lane_v,
+    },
+    {
+        NEON::BI__builtin_neon_splat_laneq_bf16,
+        NEON::BI__builtin_neon_splat_laneq_v,
+    },
+    {
+        NEON::BI__builtin_neon_splatq_lane_bf16,
+        NEON::BI__builtin_neon_splatq_lane_v,
+    },
+    {
+        NEON::BI__builtin_neon_splatq_laneq_bf16,
+        NEON::BI__builtin_neon_splatq_laneq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vabd_f16,
+        NEON::BI__builtin_neon_vabd_v,
+    },
+    {
+        NEON::BI__builtin_neon_vabdq_f16,
+        NEON::BI__builtin_neon_vabdq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vabs_f16,
+        NEON::BI__builtin_neon_vabs_v,
+    },
+    {
+        NEON::BI__builtin_neon_vabsq_f16,
+        NEON::BI__builtin_neon_vabsq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcage_f16,
+        NEON::BI__builtin_neon_vcage_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcageq_f16,
+        NEON::BI__builtin_neon_vcageq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcagt_f16,
+        NEON::BI__builtin_neon_vcagt_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcagtq_f16,
+        NEON::BI__builtin_neon_vcagtq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcale_f16,
+        NEON::BI__builtin_neon_vcale_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcaleq_f16,
+        NEON::BI__builtin_neon_vcaleq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcalt_f16,
+        NEON::BI__builtin_neon_vcalt_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcaltq_f16,
+        NEON::BI__builtin_neon_vcaltq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vceqz_f16,
+        NEON::BI__builtin_neon_vceqz_v,
+    },
+    {
+        NEON::BI__builtin_neon_vceqzq_f16,
+        NEON::BI__builtin_neon_vceqzq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcgez_f16,
+        NEON::BI__builtin_neon_vcgez_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcgezq_f16,
+        NEON::BI__builtin_neon_vcgezq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcgtz_f16,
+        NEON::BI__builtin_neon_vcgtz_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcgtzq_f16,
+        NEON::BI__builtin_neon_vcgtzq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vclez_f16,
+        NEON::BI__builtin_neon_vclez_v,
+    },
+    {
+        NEON::BI__builtin_neon_vclezq_f16,
+        NEON::BI__builtin_neon_vclezq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcltz_f16,
+        NEON::BI__builtin_neon_vcltz_v,
+    },
+    {
+        NEON::BI__builtin_neon_vcltzq_f16,
+        NEON::BI__builtin_neon_vcltzq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vfma_f16,
+        NEON::BI__builtin_neon_vfma_v,
+    },
+    {
+        NEON::BI__builtin_neon_vfma_lane_f16,
+        NEON::BI__builtin_neon_vfma_lane_v,
+    },
+    {
+        NEON::BI__builtin_neon_vfma_laneq_f16,
+        NEON::BI__builtin_neon_vfma_laneq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vfmaq_f16,
+        NEON::BI__builtin_neon_vfmaq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vfmaq_lane_f16,
+        NEON::BI__builtin_neon_vfmaq_lane_v,
+    },
+    {
+        NEON::BI__builtin_neon_vfmaq_laneq_f16,
+        NEON::BI__builtin_neon_vfmaq_laneq_v,
+    },
+    {NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v},
+    {NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v},
+    {NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v},
+    {NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v},
+    {NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v},
+    {NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v},
+    {NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v},
+    {NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v},
+    {NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v},
+    {NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v},
+    {NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v},
+    {NEON::BI__builtin_neon_vld1q_lane_bf16,
+     NEON::BI__builtin_neon_vld1q_lane_v},
+    {NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v},
+    {NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v},
+    {NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v},
+    {NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v},
+    {NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v},
+    {NEON::BI__builtin_neon_vld2q_lane_bf16,
+     NEON::BI__builtin_neon_vld2q_lane_v},
+    {NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v},
+    {NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v},
+    {NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v},
+    {NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v},
+    {NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v},
+    {NEON::BI__builtin_neon_vld3q_lane_bf16,
+     NEON::BI__builtin_neon_vld3q_lane_v},
+    {NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v},
+    {NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v},
+    {NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v},
+    {NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v},
+    {NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v},
+    {NEON::BI__builtin_neon_vld4q_lane_bf16,
+     NEON::BI__builtin_neon_vld4q_lane_v},
+    {
+        NEON::BI__builtin_neon_vmax_f16,
+        NEON::BI__builtin_neon_vmax_v,
+    },
+    {
+        NEON::BI__builtin_neon_vmaxnm_f16,
+        NEON::BI__builtin_neon_vmaxnm_v,
+    },
+    {
+        NEON::BI__builtin_neon_vmaxnmq_f16,
+        NEON::BI__builtin_neon_vmaxnmq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vmaxq_f16,
+        NEON::BI__builtin_neon_vmaxq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vmin_f16,
+        NEON::BI__builtin_neon_vmin_v,
+    },
+    {
+        NEON::BI__builtin_neon_vminnm_f16,
+        NEON::BI__builtin_neon_vminnm_v,
+    },
+    {
+        NEON::BI__builtin_neon_vminnmq_f16,
+        NEON::BI__builtin_neon_vminnmq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vminq_f16,
+        NEON::BI__builtin_neon_vminq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vmulx_f16,
+        NEON::BI__builtin_neon_vmulx_v,
+    },
+    {
+        NEON::BI__builtin_neon_vmulxq_f16,
+        NEON::BI__builtin_neon_vmulxq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vpadd_f16,
+        NEON::BI__builtin_neon_vpadd_v,
+    },
+    {
+        NEON::BI__builtin_neon_vpaddq_f16,
+        NEON::BI__builtin_neon_vpaddq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vpmax_f16,
+        NEON::BI__builtin_neon_vpmax_v,
+    },
+    {
+        NEON::BI__builtin_neon_vpmaxnm_f16,
+        NEON::BI__builtin_neon_vpmaxnm_v,
+    },
+    {
+        NEON::BI__builtin_neon_vpmaxnmq_f16,
+        NEON::BI__builtin_neon_vpmaxnmq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vpmaxq_f16,
+        NEON::BI__builtin_neon_vpmaxq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vpmin_f16,
+        NEON::BI__builtin_neon_vpmin_v,
+    },
+    {
+        NEON::BI__builtin_neon_vpminnm_f16,
+        NEON::BI__builtin_neon_vpminnm_v,
+    },
+    {
+        NEON::BI__builtin_neon_vpminnmq_f16,
+        NEON::BI__builtin_neon_vpminnmq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vpminq_f16,
+        NEON::BI__builtin_neon_vpminq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrecpe_f16,
+        NEON::BI__builtin_neon_vrecpe_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrecpeq_f16,
+        NEON::BI__builtin_neon_vrecpeq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrecps_f16,
+        NEON::BI__builtin_neon_vrecps_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrecpsq_f16,
+        NEON::BI__builtin_neon_vrecpsq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrnd_f16,
+        NEON::BI__builtin_neon_vrnd_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrnda_f16,
+        NEON::BI__builtin_neon_vrnda_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrndaq_f16,
+        NEON::BI__builtin_neon_vrndaq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrndi_f16,
+        NEON::BI__builtin_neon_vrndi_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrndiq_f16,
+        NEON::BI__builtin_neon_vrndiq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrndm_f16,
+        NEON::BI__builtin_neon_vrndm_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrndmq_f16,
+        NEON::BI__builtin_neon_vrndmq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrndn_f16,
+        NEON::BI__builtin_neon_vrndn_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrndnq_f16,
+        NEON::BI__builtin_neon_vrndnq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrndp_f16,
+        NEON::BI__builtin_neon_vrndp_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrndpq_f16,
+        NEON::BI__builtin_neon_vrndpq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrndq_f16,
+        NEON::BI__builtin_neon_vrndq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrndx_f16,
+        NEON::BI__builtin_neon_vrndx_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrndxq_f16,
+        NEON::BI__builtin_neon_vrndxq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrsqrte_f16,
+        NEON::BI__builtin_neon_vrsqrte_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrsqrteq_f16,
+        NEON::BI__builtin_neon_vrsqrteq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrsqrts_f16,
+        NEON::BI__builtin_neon_vrsqrts_v,
+    },
+    {
+        NEON::BI__builtin_neon_vrsqrtsq_f16,
+        NEON::BI__builtin_neon_vrsqrtsq_v,
+    },
+    {
+        NEON::BI__builtin_neon_vsqrt_f16,
+        NEON::BI__builtin_neon_vsqrt_v,
+    },
+    {
+        NEON::BI__builtin_neon_vsqrtq_f16,
+        NEON::BI__builtin_neon_vsqrtq_v,
+    },
+    {NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v},
+    {NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v},
+    {NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v},
+    {NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v},
+    {NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v},
+    {NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v},
+    {NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v},
+    {NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v},
+    {NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v},
+    {NEON::BI__builtin_neon_vst1q_lane_bf16,
+     NEON::BI__builtin_neon_vst1q_lane_v},
+    {NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v},
+    {NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v},
+    {NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v},
+    {NEON::BI__builtin_neon_vst2q_lane_bf16,
+     NEON::BI__builtin_neon_vst2q_lane_v},
+    {NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v},
+    {NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v},
+    {NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v},
+    {NEON::BI__builtin_neon_vst3q_lane_bf16,
+     NEON::BI__builtin_neon_vst3q_lane_v},
+    {NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v},
+    {NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v},
+    {NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v},
+    {NEON::BI__builtin_neon_vst4q_lane_bf16,
+     NEON::BI__builtin_neon_vst4q_lane_v},
+    // The mangling rules cause us to have one ID for each type for
+    // vldap1(q)_lane and vstl1(q)_lane, but codegen is equivalent for all of
+    // them. Choose an arbitrary one to be handled as tha canonical variation.
+    {NEON::BI__builtin_neon_vldap1_lane_u64,
+     NEON::BI__builtin_neon_vldap1_lane_s64},
+    {NEON::BI__builtin_neon_vldap1_lane_f64,
+     NEON::BI__builtin_neon_vldap1_lane_s64},
+    {NEON::BI__builtin_neon_vldap1_lane_p64,
+     NEON::BI__builtin_neon_vldap1_lane_s64},
+    {NEON::BI__builtin_neon_vldap1q_lane_u64,
+     NEON::BI__builtin_neon_vldap1q_lane_s64},
+    {NEON::BI__builtin_neon_vldap1q_lane_f64,
+     NEON::BI__builtin_neon_vldap1q_lane_s64},
+    {NEON::BI__builtin_neon_vldap1q_lane_p64,
+     NEON::BI__builtin_neon_vldap1q_lane_s64},
+    {NEON::BI__builtin_neon_vstl1_lane_u64,
+     NEON::BI__builtin_neon_vstl1_lane_s64},
+    {NEON::BI__builtin_neon_vstl1_lane_f64,
+     NEON::BI__builtin_neon_vstl1_lane_s64},
+    {NEON::BI__builtin_neon_vstl1_lane_p64,
+     NEON::BI__builtin_neon_vstl1_lane_s64},
+    {NEON::BI__builtin_neon_vstl1q_lane_u64,
+     NEON::BI__builtin_neon_vstl1q_lane_s64},
+    {NEON::BI__builtin_neon_vstl1q_lane_f64,
+     NEON::BI__builtin_neon_vstl1q_lane_s64},
+    {NEON::BI__builtin_neon_vstl1q_lane_p64,
+     NEON::BI__builtin_neon_vstl1q_lane_s64},
+};
+
+#undef NEONMAP0
+#undef NEONMAP1
+#undef NEONMAP2
+
+#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier)                         \
+  {#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0,    \
+   TypeModifier}
+
+#define SVEMAP2(NameBase, TypeModifier)                                        \
+  {#NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier}
+static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
+#define GET_SVE_LLVM_INTRINSIC_MAP
+#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
+#include "clang/Basic/arm_sve_builtin_cg.inc"
+#undef GET_SVE_LLVM_INTRINSIC_MAP
+};
+
+#undef SVEMAP1
+#undef SVEMAP2
+
+#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier)                         \
+  {#NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0,    \
+   TypeModifier}
+
+#define SMEMAP2(NameBase, TypeModifier)                                        \
+  {#NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier}
+static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
+#define GET_SME_LLVM_INTRINSIC_MAP
+#include "clang/Basic/arm_sme_builtin_cg.inc"
+#undef GET_SME_LLVM_INTRINSIC_MAP
+};
+
+#undef SMEMAP1
+#undef SMEMAP2
+
+// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
+// we handle them here.
+enum class CIRGenFunction::MSVCIntrin {
+  _BitScanForward,
+  _BitScanReverse,
+  _InterlockedAnd,
+  _InterlockedDecrement,
+  _InterlockedExchange,
+  _InterlockedExchangeAdd,
+  _InterlockedExchangeSub,
+  _InterlockedIncrement,
+  _InterlockedOr,
+  _InterlockedXor,
+  _InterlockedExchangeAdd_acq,
+  _InterlockedExchangeAdd_rel,
+  _InterlockedExchangeAdd_nf,
+  _InterlockedExchange_acq,
+  _InterlockedExchange_rel,
+  _InterlockedExchange_nf,
+  _InterlockedCompareExchange_acq,
+  _InterlockedCompareExchange_rel,
+  _InterlockedCompareExchange_nf,
+  _InterlockedCompareExchange128,
+  _InterlockedCompareExchange128_acq,
+  _InterlockedCompareExchange128_rel,
+  _InterlockedCompareExchange128_nf,
+  _InterlockedOr_acq,
+  _InterlockedOr_rel,
+  _InterlockedOr_nf,
+  _InterlockedXor_acq,
+  _InterlockedXor_rel,
+  _InterlockedXor_nf,
+  _InterlockedAnd_acq,
+  _InterlockedAnd_rel,
+  _InterlockedAnd_nf,
+  _InterlockedIncrement_acq,
+  _InterlockedIncrement_rel,
+  _InterlockedIncrement_nf,
+  _InterlockedDecrement_acq,
+  _InterlockedDecrement_rel,
+  _InterlockedDecrement_nf,
+  __fastfail,
+};
+
+static std::optional<CIRGenFunction::MSVCIntrin>
+translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
+  using MSVCIntrin = CIRGenFunction::MSVCIntrin;
+  switch (BuiltinID) {
+  default:
+    return std::nullopt;
+  case clang::AArch64::BI_BitScanForward:
+  case clang::AArch64::BI_BitScanForward64:
+    return MSVCIntrin::_BitScanForward;
+  case clang::AArch64::BI_BitScanReverse:
+  case clang::AArch64::BI_BitScanReverse64:
+    return MSVCIntrin::_BitScanReverse;
+  case clang::AArch64::BI_InterlockedAnd64:
+    return MSVCIntrin::_InterlockedAnd;
+  case clang::AArch64::BI_InterlockedExchange64:
+    return MSVCIntrin::_InterlockedExchange;
+  case clang::AArch64::BI_InterlockedExchangeAdd64:
+    return MSVCIntrin::_InterlockedExchangeAdd;
+  case clang::AArch64::BI_InterlockedExchangeSub64:
+    return MSVCIntrin::_InterlockedExchangeSub;
+  case clang::AArch64::BI_InterlockedOr64:
+    return MSVCIntrin::_InterlockedOr;
+  case clang::AArch64::BI_InterlockedXor64:
+    return MSVCIntrin::_InterlockedXor;
+  case clang::AArch64::BI_InterlockedDecrement64:
+    return MSVCIntrin::_InterlockedDecrement;
+  case clang::AArch64::BI_InterlockedIncrement64:
+    return MSVCIntrin::_InterlockedIncrement;
+  case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
+  case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
+  case clang::AArch64::BI_InterlockedExchangeAdd_acq:
+  case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
+    return MSVCIntrin::_InterlockedExchangeAdd_acq;
+  case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
+  case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
+  case clang::AArch64::BI_InterlockedExchangeAdd_rel:
+  case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
+    return MSVCIntrin::_InterlockedExchangeAdd_rel;
+  case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
+  case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
+  case clang::AArch64::BI_InterlockedExchangeAdd_nf:
+  case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
+    return MSVCIntrin::_InterlockedExchangeAdd_nf;
+  case clang::AArch64::BI_InterlockedExchange8_acq:
+  case clang::AArch64::BI_InterlockedExchange16_acq:
+  case clang::AArch64::BI_InterlockedExchange_acq:
+  case clang::AArch64::BI_InterlockedExchange64_acq:
+    return MSVCIntrin::_InterlockedExchange_acq;
+  case clang::AArch64::BI_InterlockedExchange8_rel:
+  case clang::AArch64::BI_InterlockedExchange16_rel:
+  case clang::AArch64::BI_InterlockedExchange_rel:
+  case clang::AArch64::BI_InterlockedExchange64_rel:
+    return MSVCIntrin::_InterlockedExchange_rel;
+  case clang::AArch64::BI_InterlockedExchange8_nf:
+  case clang::AArch64::BI_InterlockedExchange16_nf:
+  case clang::AArch64::BI_InterlockedExchange_nf:
+  case clang::AArch64::BI_InterlockedExchange64_nf:
+    return MSVCIntrin::_InterlockedExchange_nf;
+  case clang::AArch64::BI_InterlockedCompareExchange8_acq:
+  case clang::AArch64::BI_InterlockedCompareExchange16_acq:
+  case clang::AArch64::BI_InterlockedCompareExchange_acq:
+  case clang::AArch64::BI_InterlockedCompareExchange64_acq:
+    return MSVCIntrin::_InterlockedCompareExchange_acq;
+  case clang::AArch64::BI_InterlockedCompareExchange8_rel:
+  case clang::AArch64::BI_InterlockedCompareExchange16_rel:
+  case clang::AArch64::BI_InterlockedCompareExchange_rel:
+  case clang::AArch64::BI_InterlockedCompareExchange64_rel:
+    return MSVCIntrin::_InterlockedCompareExchange_rel;
+  case clang::AArch64::BI_InterlockedCompareExchange8_nf:
+  case clang::AArch64::BI_InterlockedCompareExchange16_nf:
+  case clang::AArch64::BI_InterlockedCompareExchange_nf:
+  case clang::AArch64::BI_InterlockedCompareExchange64_nf:
+    return MSVCIntrin::_InterlockedCompareExchange_nf;
+  case clang::AArch64::BI_InterlockedCompareExchange128:
+    return MSVCIntrin::_InterlockedCompareExchange128;
+  case clang::AArch64::BI_InterlockedCompareExchange128_acq:
+    return MSVCIntrin::_InterlockedCompareExchange128_acq;
+  case clang::AArch64::BI_InterlockedCompareExchange128_nf:
+    return MSVCIntrin::_InterlockedCompareExchange128_nf;
+  case clang::AArch64::BI_InterlockedCompareExchange128_rel:
+    return MSVCIntrin::_InterlockedCompareExchange128_rel;
+  case clang::AArch64::BI_InterlockedOr8_acq:
+  case clang::AArch64::BI_InterlockedOr16_acq:
+  case clang::AArch64::BI_InterlockedOr_acq:
+  case clang::AArch64::BI_InterlockedOr64_acq:
+    return MSVCIntrin::_InterlockedOr_acq;
+  case clang::AArch64::BI_InterlockedOr8_rel:
+  case clang::AArch64::BI_InterlockedOr16_rel:
+  case clang::AArch64::BI_InterlockedOr_rel:
+  case clang::AArch64::BI_InterlockedOr64_rel:
+    return MSVCIntrin::_InterlockedOr_rel;
+  case clang::AArch64::BI_InterlockedOr8_nf:
+  case clang::AArch64::BI_InterlockedOr16_nf:
+  case clang::AArch64::BI_InterlockedOr_nf:
+  case clang::AArch64::BI_InterlockedOr64_nf:
+    return MSVCIntrin::_InterlockedOr_nf;
+  case clang::AArch64::BI_InterlockedXor8_acq:
+  case clang::AArch64::BI_InterlockedXor16_acq:
+  case clang::AArch64::BI_InterlockedXor_acq:
+  case clang::AArch64::BI_InterlockedXor64_acq:
+    return MSVCIntrin::_InterlockedXor_acq;
+  case clang::AArch64::BI_InterlockedXor8_rel:
+  case clang::AArch64::BI_InterlockedXor16_rel:
+  case clang::AArch64::BI_InterlockedXor_rel:
+  case clang::AArch64::BI_InterlockedXor64_rel:
+    return MSVCIntrin::_InterlockedXor_rel;
+  case clang::AArch64::BI_InterlockedXor8_nf:
+  case clang::AArch64::BI_InterlockedXor16_nf:
+  case clang::AArch64::BI_InterlockedXor_nf:
+  case clang::AArch64::BI_InterlockedXor64_nf:
+    return MSVCIntrin::_InterlockedXor_nf;
+  case clang::AArch64::BI_InterlockedAnd8_acq:
+  case clang::AArch64::BI_InterlockedAnd16_acq:
+  case clang::AArch64::BI_InterlockedAnd_acq:
+  case clang::AArch64::BI_InterlockedAnd64_acq:
+    return MSVCIntrin::_InterlockedAnd_acq;
+  case clang::AArch64::BI_InterlockedAnd8_rel:
+  case clang::AArch64::BI_InterlockedAnd16_rel:
+  case clang::AArch64::BI_InterlockedAnd_rel:
+  case clang::AArch64::BI_InterlockedAnd64_rel:
+    return MSVCIntrin::_InterlockedAnd_rel;
+  case clang::AArch64::BI_InterlockedAnd8_nf:
+  case clang::AArch64::BI_InterlockedAnd16_nf:
+  case clang::AArch64::BI_InterlockedAnd_nf:
+  case clang::AArch64::BI_InterlockedAnd64_nf:
+    return MSVCIntrin::_InterlockedAnd_nf;
+  case clang::AArch64::BI_InterlockedIncrement16_acq:
+  case clang::AArch64::BI_InterlockedIncrement_acq:
+  case clang::AArch64::BI_InterlockedIncrement64_acq:
+    return MSVCIntrin::_InterlockedIncrement_acq;
+  case clang::AArch64::BI_InterlockedIncrement16_rel:
+  case clang::AArch64::BI_InterlockedIncrement_rel:
+  case clang::AArch64::BI_InterlockedIncrement64_rel:
+    return MSVCIntrin::_InterlockedIncrement_rel;
+  case clang::AArch64::BI_InterlockedIncrement16_nf:
+  case clang::AArch64::BI_InterlockedIncrement_nf:
+  case clang::AArch64::BI_InterlockedIncrement64_nf:
+    return MSVCIntrin::_InterlockedIncrement_nf;
+  case clang::AArch64::BI_InterlockedDecrement16_acq:
+  case clang::AArch64::BI_InterlockedDecrement_acq:
+  case clang::AArch64::BI_InterlockedDecrement64_acq:
+    return MSVCIntrin::_InterlockedDecrement_acq;
+  case clang::AArch64::BI_InterlockedDecrement16_rel:
+  case clang::AArch64::BI_InterlockedDecrement_rel:
+  case clang::AArch64::BI_InterlockedDecrement64_rel:
+    return MSVCIntrin::_InterlockedDecrement_rel;
+  case clang::AArch64::BI_InterlockedDecrement16_nf:
+  case clang::AArch64::BI_InterlockedDecrement_nf:
+  case clang::AArch64::BI_InterlockedDecrement64_nf:
+    return MSVCIntrin::_InterlockedDecrement_nf;
+  }
+  llvm_unreachable("must return from switch");
+}
+
+static bool AArch64SIMDIntrinsicsProvenSorted = false;
+static bool AArch64SISDIntrinsicsProvenSorted = false;
+static bool AArch64SVEIntrinsicsProvenSorted = false;
+static bool AArch64SMEIntrinsicsProvenSorted = false;
+
+static const ARMVectorIntrinsicInfo *
+findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
+                            unsigned BuiltinID, bool &MapProvenSorted) {
+
+#ifndef NDEBUG
+  if (!MapProvenSorted) {
+    assert(llvm::is_sorted(IntrinsicMap));
+    MapProvenSorted = true;
+  }
+#endif
+
+  const ARMVectorIntrinsicInfo *Builtin =
+      llvm::lower_bound(IntrinsicMap, BuiltinID);
+
+  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
+    return Builtin;
+
+  return nullptr;
+}
+
+static mlir::Type GetNeonType(CIRGenFunction *CGF, NeonTypeFlags TypeFlags,
+                              bool HasLegalHalfType = true, bool V1Ty = false,
+                              bool AllowBFloatArgsAndRet = true) {
+  int IsQuad = TypeFlags.isQuad();
+  switch (TypeFlags.getEltType()) {
+  case NeonTypeFlags::Int8:
+  case NeonTypeFlags::Poly8:
+    return mlir::cir::VectorType::get(CGF->getBuilder().getContext(),
+                                      TypeFlags.isUnsigned() ? CGF->UInt8Ty
+                                                             : CGF->SInt8Ty,
+                                      V1Ty ? 1 : (8 << IsQuad));
+  case NeonTypeFlags::Int16:
+  case NeonTypeFlags::Poly16:
+    return mlir::cir::VectorType::get(CGF->getBuilder().getContext(),
+                                      TypeFlags.isUnsigned() ? CGF->UInt16Ty
+                                                             : CGF->SInt16Ty,
+                                      V1Ty ? 1 : (4 << IsQuad));
+  case NeonTypeFlags::BFloat16:
+    if (AllowBFloatArgsAndRet)
+      llvm_unreachable("NYI");
+    else
+      llvm_unreachable("NYI");
+  case NeonTypeFlags::Float16:
+    if (HasLegalHalfType)
+      llvm_unreachable("NYI");
+    else
+      llvm_unreachable("NYI");
+  case NeonTypeFlags::Int32:
+    return mlir::cir::VectorType::get(CGF->getBuilder().getContext(),
+                                      TypeFlags.isUnsigned() ? CGF->UInt32Ty
+                                                             : CGF->SInt32Ty,
+                                      V1Ty ? 1 : (2 << IsQuad));
+  case NeonTypeFlags::Int64:
+  case NeonTypeFlags::Poly64:
+    return mlir::cir::VectorType::get(CGF->getBuilder().getContext(),
+                                      TypeFlags.isUnsigned() ? CGF->UInt64Ty
+                                                             : CGF->SInt64Ty,
+                                      V1Ty ? 1 : (1 << IsQuad));
+  case NeonTypeFlags::Poly128:
+    // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
+    // There is a lot of i128 and f128 API missing.
+    // so we use v16i8 to represent poly128 and get pattern matched.
+    llvm_unreachable("NYI");
+  case NeonTypeFlags::Float32:
+    return mlir::cir::VectorType::get(CGF->getBuilder().getContext(),
+                                      CGF->getCIRGenModule().FloatTy,
+                                      V1Ty ? 1 : (2 << IsQuad));
+  case NeonTypeFlags::Float64:
+    llvm_unreachable("NYI");
+  }
+  llvm_unreachable("Unknown vector element type!");
+}
+
+static mlir::Value buildAArch64TblBuiltinExpr(CIRGenFunction &CGF,
+                                              unsigned BuiltinID,
+                                              const CallExpr *E,
+                                              SmallVectorImpl<mlir::Value> &Ops,
+                                              llvm::Triple::ArchType Arch) {
+  unsigned int Int = 0;
+  [[maybe_unused]] const char *s = nullptr;
+
+  switch (BuiltinID) {
+  default:
+    return {};
+  case NEON::BI__builtin_neon_vtbl1_v:
+  case NEON::BI__builtin_neon_vqtbl1_v:
+  case NEON::BI__builtin_neon_vqtbl1q_v:
+  case NEON::BI__builtin_neon_vtbl2_v:
+  case NEON::BI__builtin_neon_vqtbl2_v:
+  case NEON::BI__builtin_neon_vqtbl2q_v:
+  case NEON::BI__builtin_neon_vtbl3_v:
+  case NEON::BI__builtin_neon_vqtbl3_v:
+  case NEON::BI__builtin_neon_vqtbl3q_v:
+  case NEON::BI__builtin_neon_vtbl4_v:
+  case NEON::BI__builtin_neon_vqtbl4_v:
+  case NEON::BI__builtin_neon_vqtbl4q_v:
+    break;
+  case NEON::BI__builtin_neon_vtbx1_v:
+  case NEON::BI__builtin_neon_vqtbx1_v:
+  case NEON::BI__builtin_neon_vqtbx1q_v:
+  case NEON::BI__builtin_neon_vtbx2_v:
+  case NEON::BI__builtin_neon_vqtbx2_v:
+  case NEON::BI__builtin_neon_vqtbx2q_v:
+  case NEON::BI__builtin_neon_vtbx3_v:
+  case NEON::BI__builtin_neon_vqtbx3_v:
+  case NEON::BI__builtin_neon_vqtbx3q_v:
+  case NEON::BI__builtin_neon_vtbx4_v:
+  case NEON::BI__builtin_neon_vqtbx4_v:
+  case NEON::BI__builtin_neon_vqtbx4q_v:
+    break;
+  }
+
+  assert(E->getNumArgs() >= 3);
+
+  // Get the last argument, which specifies the vector type.
+  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
+  std::optional<llvm::APSInt> Result =
+      Arg->getIntegerConstantExpr(CGF.getContext());
+  if (!Result)
+    return nullptr;
+
+  // Determine the type of this overloaded NEON intrinsic.
+  NeonTypeFlags Type = Result->getZExtValue();
+  auto Ty = GetNeonType(&CGF, Type);
+  if (!Ty)
+    return nullptr;
+
+  // AArch64 scalar builtins are not overloaded, they do not have an extra
+  // argument that specifies the vector type, need to handle each case.
+  switch (BuiltinID) {
+  case NEON::BI__builtin_neon_vtbl1_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vtbl2_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vtbl3_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vtbl4_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vtbx1_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vtbx2_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vtbx3_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqtbl1_v:
+  case NEON::BI__builtin_neon_vqtbl1q_v:
+    Int = Intrinsic::aarch64_neon_tbl1;
+    s = "vtbl1";
+    break;
+  case NEON::BI__builtin_neon_vqtbl2_v:
+  case NEON::BI__builtin_neon_vqtbl2q_v: {
+    Int = Intrinsic::aarch64_neon_tbl2;
+    s = "vtbl2";
+    break;
+  case NEON::BI__builtin_neon_vqtbl3_v:
+  case NEON::BI__builtin_neon_vqtbl3q_v:
+    Int = Intrinsic::aarch64_neon_tbl3;
+    s = "vtbl3";
+    break;
+  case NEON::BI__builtin_neon_vqtbl4_v:
+  case NEON::BI__builtin_neon_vqtbl4q_v:
+    Int = Intrinsic::aarch64_neon_tbl4;
+    s = "vtbl4";
+    break;
+  case NEON::BI__builtin_neon_vqtbx1_v:
+  case NEON::BI__builtin_neon_vqtbx1q_v:
+    Int = Intrinsic::aarch64_neon_tbx1;
+    s = "vtbx1";
+    break;
+  case NEON::BI__builtin_neon_vqtbx2_v:
+  case NEON::BI__builtin_neon_vqtbx2q_v:
+    Int = Intrinsic::aarch64_neon_tbx2;
+    s = "vtbx2";
+    break;
+  case NEON::BI__builtin_neon_vqtbx3_v:
+  case NEON::BI__builtin_neon_vqtbx3q_v:
+    Int = Intrinsic::aarch64_neon_tbx3;
+    s = "vtbx3";
+    break;
+  case NEON::BI__builtin_neon_vqtbx4_v:
+  case NEON::BI__builtin_neon_vqtbx4q_v:
+    Int = Intrinsic::aarch64_neon_tbx4;
+    s = "vtbx4";
+    break;
+  }
+  }
+
+  if (!Int)
+    return nullptr;
+
+  llvm_unreachable("NYI");
+}
+
+mlir::Value CIRGenFunction::buildAArch64SMEBuiltinExpr(unsigned BuiltinID,
+                                                       const CallExpr *E) {
+  auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
+                                              AArch64SMEIntrinsicsProvenSorted);
+  (void)Builtin;
+  llvm_unreachable("NYI");
+}
+
+mlir::Value CIRGenFunction::buildAArch64SVEBuiltinExpr(unsigned BuiltinID,
+                                                       const CallExpr *E) {
+  if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
+      BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
+    llvm_unreachable("NYI");
+  }
+  auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
+                                              AArch64SVEIntrinsicsProvenSorted);
+  (void)Builtin;
+  llvm_unreachable("NYI");
+}
+
+mlir::Value CIRGenFunction::buildScalarOrConstFoldImmArg(unsigned ICEArguments,
+                                                         unsigned Idx,
+                                                         const CallExpr *E) {
+  mlir::Value Arg = {};
+  if ((ICEArguments & (1 << Idx)) == 0) {
+    Arg = buildScalarExpr(E->getArg(Idx));
+  } else {
+    // If this is required to be a constant, constant fold it so that we
+    // know that the generated intrinsic gets a ConstantInt.
+    std::optional<llvm::APSInt> Result =
+        E->getArg(Idx)->getIntegerConstantExpr(getContext());
+    assert(Result && "Expected argument to be a constant");
+    Arg = builder.getConstInt(getLoc(E->getSourceRange()), *Result);
+  }
+  return Arg;
+}
+
+static mlir::Value buildArmLdrexNon128Intrinsic(unsigned int builtinID,
+                                                const CallExpr *clangCallExpr,
+                                                CIRGenFunction &cgf) {
+  StringRef intrinsicName;
+  if (builtinID == clang::AArch64::BI__builtin_arm_ldrex) {
+    intrinsicName = "llvm.aarch64.ldxr";
+  } else {
+    llvm_unreachable("Unknown builtinID");
+  }
+  // Argument
+  mlir::Value loadAddr = cgf.buildScalarExpr(clangCallExpr->getArg(0));
+  // Get Instrinc call
+  CIRGenBuilderTy &builder = cgf.getBuilder();
+  QualType clangResTy = clangCallExpr->getType();
+  mlir::Type realResTy = cgf.ConvertType(clangResTy);
+  // Return type of LLVM intrinsic is defined in Intrinsic<arch_type>.td,
+  // which can be found under LLVM IR directory.
+  mlir::Type funcResTy = builder.getSInt64Ty();
+  mlir::Location loc = cgf.getLoc(clangCallExpr->getExprLoc());
+  mlir::cir::IntrinsicCallOp op = builder.create<mlir::cir::IntrinsicCallOp>(
+      loc, builder.getStringAttr(intrinsicName), funcResTy, loadAddr);
+  mlir::Value res = op.getResult();
+
+  // Convert result type to the expected type.
+  if (mlir::isa<mlir::cir::PointerType>(realResTy)) {
+    return builder.createIntToPtr(res, realResTy);
+  }
+  mlir::cir::IntType intResTy =
+      builder.getSIntNTy(cgf.CGM.getDataLayout().getTypeSizeInBits(realResTy));
+  mlir::Value intCastRes = builder.createIntCast(res, intResTy);
+  if (mlir::isa<mlir::cir::IntType>(realResTy)) {
+    return builder.createIntCast(intCastRes, realResTy);
+  } else {
+    // Above cases should cover most situations and we have test coverage.
+    llvm_unreachable("Unsupported return type for now");
+  }
+}
+
+mlir::Value buildNeonCall(unsigned int builtinID, CIRGenFunction &cgf,
+                          llvm::SmallVector<mlir::Type> argTypes,
+                          llvm::SmallVector<mlir::Value, 4> args,
+                          llvm::StringRef intrinsicName, mlir::Type funcResTy,
+                          mlir::Location loc,
+                          bool isConstrainedFPIntrinsic = false,
+                          unsigned shift = 0, bool rightshift = false) {
+  // TODO: Consider removing the following unreachable when we have
+  // buildConstrainedFPCall feature implemented
+  assert(!MissingFeatures::buildConstrainedFPCall());
+  if (isConstrainedFPIntrinsic)
+    llvm_unreachable("isConstrainedFPIntrinsic NYI");
+  // TODO: Remove the following unreachable and call it in the loop once
+  // there is an implementation of buildNeonShiftVector
+  if (shift > 0)
+    llvm_unreachable("Argument shift NYI");
+
+  CIRGenBuilderTy &builder = cgf.getBuilder();
+  for (unsigned j = 0; j < argTypes.size(); ++j) {
+    if (isConstrainedFPIntrinsic) {
+      assert(!MissingFeatures::buildConstrainedFPCall());
+    }
+    if (shift > 0 && shift == j) {
+      assert(!MissingFeatures::buildNeonShiftVector());
+    } else {
+      args[j] = builder.createBitcast(args[j], argTypes[j]);
+    }
+  }
+  if (isConstrainedFPIntrinsic) {
+    assert(!MissingFeatures::buildConstrainedFPCall());
+    return nullptr;
+  } else {
+    return builder
+        .create<mlir::cir::IntrinsicCallOp>(
+            loc, builder.getStringAttr(intrinsicName), funcResTy, args)
+        .getResult();
+  }
+}
+
+mlir::Value
+CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                        ReturnValueSlot ReturnValue,
+                                        llvm::Triple::ArchType Arch) {
+  if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
+      BuiltinID <= clang::AArch64::LastSVEBuiltin)
+    return buildAArch64SVEBuiltinExpr(BuiltinID, E);
+
+  if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
+      BuiltinID <= clang::AArch64::LastSMEBuiltin)
+    return buildAArch64SMEBuiltinExpr(BuiltinID, E);
+
+  if (BuiltinID == Builtin::BI__builtin_cpu_supports)
+    llvm_unreachable("NYI");
+
+  unsigned HintID = static_cast<unsigned>(-1);
+  switch (BuiltinID) {
+  default:
+    break;
+  case clang::AArch64::BI__builtin_arm_nop:
+    HintID = 0;
+    break;
+  case clang::AArch64::BI__builtin_arm_yield:
+  case clang::AArch64::BI__yield:
+    HintID = 1;
+    break;
+  case clang::AArch64::BI__builtin_arm_wfe:
+  case clang::AArch64::BI__wfe:
+    HintID = 2;
+    break;
+  case clang::AArch64::BI__builtin_arm_wfi:
+  case clang::AArch64::BI__wfi:
+    HintID = 3;
+    break;
+  case clang::AArch64::BI__builtin_arm_sev:
+  case clang::AArch64::BI__sev:
+    HintID = 4;
+    break;
+  case clang::AArch64::BI__builtin_arm_sevl:
+  case clang::AArch64::BI__sevl:
+    HintID = 5;
+    break;
+  }
+
+  if (HintID != static_cast<unsigned>(-1)) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
+    // Create call to __arm_sme_state and store the results to the two pointers.
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
+    assert((getContext().getTypeSize(E->getType()) == 32) &&
+           "rbit of unusual size!");
+    llvm_unreachable("NYI");
+  }
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
+    assert((getContext().getTypeSize(E->getType()) == 64) &&
+           "rbit of unusual size!");
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
+    llvm_unreachable("NYI");
+  }
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
+    assert((getContext().getTypeSize(E->getType()) == 32) &&
+           "__jcvt of unusual size!");
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
+    llvm_unreachable("NYI");
+
+    if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
+      // Load from the address via an LLVM intrinsic, receiving a
+      // tuple of 8 i64 words, and store each one to ValPtr.
+      llvm_unreachable("NYI");
+    } else {
+      // Load 8 i64 words from ValPtr, and store them to the address
+      // via an LLVM intrinsic.
+      llvm_unreachable("NYI");
+    }
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__clear_cache) {
+    assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
+    llvm_unreachable("NYI");
+  }
+
+  if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
+       BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
+      getContext().getTypeSize(E->getType()) == 128) {
+    llvm_unreachable("NYI");
+  } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
+             BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
+    return buildArmLdrexNon128Intrinsic(BuiltinID, E, *this);
+  }
+
+  if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
+       BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
+      getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__getReg) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__break) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
+    llvm_unreachable("NYI");
+
+  // CRC32
+  // FIXME(cir): get rid of LLVM when this gets implemented.
+  llvm::Intrinsic::ID CRCIntrinsicID = llvm::Intrinsic::not_intrinsic;
+  switch (BuiltinID) {
+  case clang::AArch64::BI__builtin_arm_crc32b:
+  case clang::AArch64::BI__builtin_arm_crc32cb:
+  case clang::AArch64::BI__builtin_arm_crc32h:
+  case clang::AArch64::BI__builtin_arm_crc32ch:
+  case clang::AArch64::BI__builtin_arm_crc32w:
+  case clang::AArch64::BI__builtin_arm_crc32cw:
+  case clang::AArch64::BI__builtin_arm_crc32d:
+  case clang::AArch64::BI__builtin_arm_crc32cd:
+    llvm_unreachable("NYI");
+  }
+
+  if (CRCIntrinsicID != llvm::Intrinsic::not_intrinsic) {
+    llvm_unreachable("NYI");
+  }
+
+  // Memory Operations (MOPS)
+  if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
+    llvm_unreachable("NYI");
+  }
+
+  // Memory Tagging Extensions (MTE) Intrinsics
+  // FIXME(cir): get rid of LLVM when this gets implemented.
+  llvm::Intrinsic::ID MTEIntrinsicID = llvm::Intrinsic::not_intrinsic;
+  switch (BuiltinID) {
+  case clang::AArch64::BI__builtin_arm_irg:
+  case clang::AArch64::BI__builtin_arm_addg:
+  case clang::AArch64::BI__builtin_arm_gmi:
+  case clang::AArch64::BI__builtin_arm_ldg:
+  case clang::AArch64::BI__builtin_arm_stg:
+  case clang::AArch64::BI__builtin_arm_subp:
+    llvm_unreachable("NYI");
+  }
+
+  if (MTEIntrinsicID != llvm::Intrinsic::not_intrinsic) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
+      BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
+
+    llvm_unreachable("NYI");
+    if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
+        BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
+        BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
+        BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
+      llvm_unreachable("NYI");
+
+    bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
+                            BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
+
+    bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
+                   BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
+
+    bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
+                    BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
+
+    if (Is32Bit) {
+      llvm_unreachable("NYI");
+    } else if (Is128Bit) {
+      llvm_unreachable("NYI");
+    } else if (IsPointerBuiltin) {
+      llvm_unreachable("NYI");
+    } else {
+      llvm_unreachable("NYI");
+    };
+
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
+      BuiltinID == clang::AArch64::BI_WriteStatusReg) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == clang::AArch64::BI__mulh ||
+      BuiltinID == clang::AArch64::BI__umulh) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == AArch64::BI__writex18byte ||
+      BuiltinID == AArch64::BI__writex18word ||
+      BuiltinID == AArch64::BI__writex18dword ||
+      BuiltinID == AArch64::BI__writex18qword) {
+    // Read x18 as i8*
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == AArch64::BI__readx18byte ||
+      BuiltinID == AArch64::BI__readx18word ||
+      BuiltinID == AArch64::BI__readx18dword ||
+      BuiltinID == AArch64::BI__readx18qword) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
+      BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
+      BuiltinID == AArch64::BI_CopyInt32FromFloat ||
+      BuiltinID == AArch64::BI_CopyInt64FromDouble) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == AArch64::BI_CountLeadingOnes ||
+      BuiltinID == AArch64::BI_CountLeadingOnes64 ||
+      BuiltinID == AArch64::BI_CountLeadingZeros ||
+      BuiltinID == AArch64::BI_CountLeadingZeros64) {
+    llvm_unreachable("NYI");
+
+    if (BuiltinID == AArch64::BI_CountLeadingOnes ||
+        BuiltinID == AArch64::BI_CountLeadingOnes64)
+      llvm_unreachable("NYI");
+
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == AArch64::BI_CountLeadingSigns ||
+      BuiltinID == AArch64::BI_CountLeadingSigns64) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == AArch64::BI_CountOneBits ||
+      BuiltinID == AArch64::BI_CountOneBits64) {
+    llvm_unreachable("NYI");
+  }
+
+  if (BuiltinID == AArch64::BI__prefetch) {
+    llvm_unreachable("NYI");
+  }
+
+  // Handle MSVC intrinsics before argument evaluation to prevent double
+  // evaluation.
+  if (std::optional<CIRGenFunction::MSVCIntrin> MsvcIntId =
+          translateAarch64ToMsvcIntrin(BuiltinID))
+    llvm_unreachable("NYI");
+
+  // Some intrinsics are equivalent - if they are use the base intrinsic ID.
+  auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
+    return P.first == BuiltinID;
+  });
+  if (It != end(NEONEquivalentIntrinsicMap))
+    BuiltinID = It->second;
+
+  // Find out if any arguments are required to be integer constant
+  // expressions.
+  unsigned ICEArguments = 0;
+  ASTContext::GetBuiltinTypeError Error;
+  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+  assert(Error == ASTContext::GE_None && "Should not codegen an error");
+
+  llvm::SmallVector<mlir::Value, 4> Ops;
+  Address PtrOp0 = Address::invalid();
+  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
+    if (i == 0) {
+      switch (BuiltinID) {
+      case NEON::BI__builtin_neon_vld1_v:
+      case NEON::BI__builtin_neon_vld1q_v:
+      case NEON::BI__builtin_neon_vld1_dup_v:
+      case NEON::BI__builtin_neon_vld1q_dup_v:
+      case NEON::BI__builtin_neon_vld1_lane_v:
+      case NEON::BI__builtin_neon_vld1q_lane_v:
+      case NEON::BI__builtin_neon_vst1_v:
+      case NEON::BI__builtin_neon_vst1q_v:
+      case NEON::BI__builtin_neon_vst1_lane_v:
+      case NEON::BI__builtin_neon_vst1q_lane_v:
+      case NEON::BI__builtin_neon_vldap1_lane_s64:
+      case NEON::BI__builtin_neon_vldap1q_lane_s64:
+      case NEON::BI__builtin_neon_vstl1_lane_s64:
+      case NEON::BI__builtin_neon_vstl1q_lane_s64:
+        // Get the alignment for the argument in addition to the value;
+        // we'll use it later.
+        PtrOp0 = buildPointerWithAlignment(E->getArg(0));
+        Ops.push_back(PtrOp0.emitRawPointer());
+        continue;
+      }
+    }
+    Ops.push_back(buildScalarOrConstFoldImmArg(ICEArguments, i, E));
+  }
+
+  auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
+  const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
+      SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
+
+  if (Builtin) {
+    llvm_unreachable("NYI");
+  }
+
+  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
+  NeonTypeFlags Type(0);
+  if (std::optional<llvm::APSInt> Result =
+          Arg->getIntegerConstantExpr(getContext()))
+    // Determine the type of this overloaded NEON intrinsic.
+    Type = NeonTypeFlags(Result->getZExtValue());
+
+  bool usgn = Type.isUnsigned();
+
+  // Handle non-overloaded intrinsics first.
+  switch (BuiltinID) {
+  default:
+    break;
+  case NEON::BI__builtin_neon_vabsh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vaddq_p128: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vldrq_p128: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vstrq_p128: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcvts_f32_u32:
+  case NEON::BI__builtin_neon_vcvtd_f64_u64:
+    usgn = true;
+    [[fallthrough]];
+  case NEON::BI__builtin_neon_vcvts_f32_s32:
+  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
+    if (usgn)
+      llvm_unreachable("NYI");
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcvth_f16_u16:
+  case NEON::BI__builtin_neon_vcvth_f16_u32:
+  case NEON::BI__builtin_neon_vcvth_f16_u64:
+    usgn = true;
+    [[fallthrough]];
+  case NEON::BI__builtin_neon_vcvth_f16_s16:
+  case NEON::BI__builtin_neon_vcvth_f16_s32:
+  case NEON::BI__builtin_neon_vcvth_f16_s64: {
+    if (usgn)
+      llvm_unreachable("NYI");
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcvtah_u16_f16:
+  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
+  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
+  case NEON::BI__builtin_neon_vcvtph_u16_f16:
+  case NEON::BI__builtin_neon_vcvth_u16_f16:
+  case NEON::BI__builtin_neon_vcvtah_s16_f16:
+  case NEON::BI__builtin_neon_vcvtmh_s16_f16:
+  case NEON::BI__builtin_neon_vcvtnh_s16_f16:
+  case NEON::BI__builtin_neon_vcvtph_s16_f16:
+  case NEON::BI__builtin_neon_vcvth_s16_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcaleh_f16:
+  case NEON::BI__builtin_neon_vcalth_f16:
+  case NEON::BI__builtin_neon_vcageh_f16:
+  case NEON::BI__builtin_neon_vcagth_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcvth_n_s16_f16:
+  case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcvth_n_f16_s16:
+  case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vpaddd_s64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vpaddd_f64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vpadds_f32: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vceqzd_s64:
+  case NEON::BI__builtin_neon_vceqzd_f64:
+  case NEON::BI__builtin_neon_vceqzs_f32:
+  case NEON::BI__builtin_neon_vceqzh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vcgezd_s64:
+  case NEON::BI__builtin_neon_vcgezd_f64:
+  case NEON::BI__builtin_neon_vcgezs_f32:
+  case NEON::BI__builtin_neon_vcgezh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vclezd_s64:
+  case NEON::BI__builtin_neon_vclezd_f64:
+  case NEON::BI__builtin_neon_vclezs_f32:
+  case NEON::BI__builtin_neon_vclezh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vcgtzd_s64:
+  case NEON::BI__builtin_neon_vcgtzd_f64:
+  case NEON::BI__builtin_neon_vcgtzs_f32:
+  case NEON::BI__builtin_neon_vcgtzh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vcltzd_s64:
+  case NEON::BI__builtin_neon_vcltzd_f64:
+  case NEON::BI__builtin_neon_vcltzs_f32:
+  case NEON::BI__builtin_neon_vcltzh_f16:
+    llvm_unreachable("NYI");
+
+  case NEON::BI__builtin_neon_vceqzd_u64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vceqd_f64:
+  case NEON::BI__builtin_neon_vcled_f64:
+  case NEON::BI__builtin_neon_vcltd_f64:
+  case NEON::BI__builtin_neon_vcged_f64:
+  case NEON::BI__builtin_neon_vcgtd_f64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vceqs_f32:
+  case NEON::BI__builtin_neon_vcles_f32:
+  case NEON::BI__builtin_neon_vclts_f32:
+  case NEON::BI__builtin_neon_vcges_f32:
+  case NEON::BI__builtin_neon_vcgts_f32: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vceqh_f16:
+  case NEON::BI__builtin_neon_vcleh_f16:
+  case NEON::BI__builtin_neon_vclth_f16:
+  case NEON::BI__builtin_neon_vcgeh_f16:
+  case NEON::BI__builtin_neon_vcgth_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vceqd_s64:
+  case NEON::BI__builtin_neon_vceqd_u64:
+  case NEON::BI__builtin_neon_vcgtd_s64:
+  case NEON::BI__builtin_neon_vcgtd_u64:
+  case NEON::BI__builtin_neon_vcltd_s64:
+  case NEON::BI__builtin_neon_vcltd_u64:
+  case NEON::BI__builtin_neon_vcged_u64:
+  case NEON::BI__builtin_neon_vcged_s64:
+  case NEON::BI__builtin_neon_vcled_u64:
+  case NEON::BI__builtin_neon_vcled_s64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vtstd_s64:
+  case NEON::BI__builtin_neon_vtstd_u64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vset_lane_i8:
+  case NEON::BI__builtin_neon_vset_lane_i16:
+  case NEON::BI__builtin_neon_vset_lane_i32:
+  case NEON::BI__builtin_neon_vset_lane_i64:
+  case NEON::BI__builtin_neon_vset_lane_f32:
+  case NEON::BI__builtin_neon_vsetq_lane_i8:
+  case NEON::BI__builtin_neon_vsetq_lane_i16:
+  case NEON::BI__builtin_neon_vsetq_lane_i32:
+  case NEON::BI__builtin_neon_vsetq_lane_i64:
+  case NEON::BI__builtin_neon_vsetq_lane_f32:
+    Ops.push_back(buildScalarExpr(E->getArg(2)));
+    return builder.create<mlir::cir::VecInsertOp>(getLoc(E->getExprLoc()),
+                                                  Ops[1], Ops[0], Ops[2]);
+  case NEON::BI__builtin_neon_vset_lane_bf16:
+  case NEON::BI__builtin_neon_vsetq_lane_bf16:
+    // No support for now as no real/test case for them
+    // at the moment, the implementation should be the same as above
+    // vset_lane or vsetq_lane intrinsics
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vset_lane_f64:
+    // The vector type needs a cast for the v1f64 variant.
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vsetq_lane_f64:
+    // The vector type needs a cast for the v2f64 variant.
+    llvm_unreachable("NYI");
+
+  case NEON::BI__builtin_neon_vget_lane_i8:
+  case NEON::BI__builtin_neon_vdupb_lane_i8:
+    Ops[0] = builder.createBitcast(
+        Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt8Ty, 8));
+    return builder.create<mlir::cir::VecExtractOp>(
+        getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
+  case NEON::BI__builtin_neon_vgetq_lane_i8:
+  case NEON::BI__builtin_neon_vdupb_laneq_i8:
+    Ops[0] = builder.createBitcast(
+        Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt8Ty, 16));
+    return builder.create<mlir::cir::VecExtractOp>(
+        getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
+  case NEON::BI__builtin_neon_vget_lane_i16:
+  case NEON::BI__builtin_neon_vduph_lane_i16:
+    Ops[0] = builder.createBitcast(
+        Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt16Ty, 4));
+    return builder.create<mlir::cir::VecExtractOp>(
+        getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
+  case NEON::BI__builtin_neon_vgetq_lane_i16:
+  case NEON::BI__builtin_neon_vduph_laneq_i16:
+    Ops[0] = builder.createBitcast(
+        Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt16Ty, 8));
+    return builder.create<mlir::cir::VecExtractOp>(
+        getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
+  case NEON::BI__builtin_neon_vget_lane_i32:
+  case NEON::BI__builtin_neon_vdups_lane_i32:
+    Ops[0] = builder.createBitcast(
+        Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt32Ty, 2));
+    return builder.create<mlir::cir::VecExtractOp>(
+        getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
+  case NEON::BI__builtin_neon_vget_lane_f32:
+  case NEON::BI__builtin_neon_vdups_lane_f32:
+    Ops[0] = builder.createBitcast(
+        Ops[0], mlir::cir::VectorType::get(builder.getContext(), FloatTy, 2));
+    return builder.create<mlir::cir::VecExtractOp>(
+        getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
+  case NEON::BI__builtin_neon_vgetq_lane_i32:
+  case NEON::BI__builtin_neon_vdups_laneq_i32:
+    Ops[0] = builder.createBitcast(
+        Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt32Ty, 4));
+    return builder.create<mlir::cir::VecExtractOp>(
+        getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
+  case NEON::BI__builtin_neon_vget_lane_i64:
+  case NEON::BI__builtin_neon_vdupd_lane_i64:
+    Ops[0] = builder.createBitcast(
+        Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt64Ty, 1));
+    return builder.create<mlir::cir::VecExtractOp>(
+        getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
+  case NEON::BI__builtin_neon_vdupd_lane_f64:
+  case NEON::BI__builtin_neon_vget_lane_f64:
+    Ops[0] = builder.createBitcast(
+        Ops[0], mlir::cir::VectorType::get(builder.getContext(), DoubleTy, 1));
+    return builder.create<mlir::cir::VecExtractOp>(
+        getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
+  case NEON::BI__builtin_neon_vgetq_lane_i64:
+  case NEON::BI__builtin_neon_vdupd_laneq_i64:
+    Ops[0] = builder.createBitcast(
+        Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt64Ty, 2));
+    return builder.create<mlir::cir::VecExtractOp>(
+        getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
+  case NEON::BI__builtin_neon_vgetq_lane_f32:
+  case NEON::BI__builtin_neon_vdups_laneq_f32:
+    Ops[0] = builder.createBitcast(
+        Ops[0], mlir::cir::VectorType::get(builder.getContext(), FloatTy, 4));
+    return builder.create<mlir::cir::VecExtractOp>(
+        getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
+  case NEON::BI__builtin_neon_vgetq_lane_f64:
+  case NEON::BI__builtin_neon_vdupd_laneq_f64:
+    Ops[0] = builder.createBitcast(
+        Ops[0], mlir::cir::VectorType::get(builder.getContext(), DoubleTy, 2));
+    return builder.create<mlir::cir::VecExtractOp>(
+        getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
+  case NEON::BI__builtin_neon_vaddh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vsubh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vmulh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vdivh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vfmah_f16:
+    // NEON intrinsic puts accumulator first, unlike the LLVM fma.
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vfmsh_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddd_s64:
+  case NEON::BI__builtin_neon_vaddd_u64:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vsubd_s64:
+  case NEON::BI__builtin_neon_vsubd_u64:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vqdmlalh_s16:
+  case NEON::BI__builtin_neon_vqdmlslh_s16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqshlud_n_s64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqshld_n_u64:
+  case NEON::BI__builtin_neon_vqshld_n_s64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrshrd_n_u64:
+  case NEON::BI__builtin_neon_vrshrd_n_s64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrsrad_n_u64:
+  case NEON::BI__builtin_neon_vrsrad_n_s64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vshld_n_s64:
+  case NEON::BI__builtin_neon_vshld_n_u64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vshrd_n_s64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vshrd_n_u64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vsrad_n_s64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vsrad_n_u64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
+  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
+  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
+  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqdmlals_s32:
+  case NEON::BI__builtin_neon_vqdmlsls_s32: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
+  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
+  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
+  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vget_lane_bf16:
+  case NEON::BI__builtin_neon_vduph_lane_bf16:
+  case NEON::BI__builtin_neon_vduph_lane_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vgetq_lane_bf16:
+  case NEON::BI__builtin_neon_vduph_laneq_bf16:
+  case NEON::BI__builtin_neon_vduph_laneq_f16: {
+    llvm_unreachable("NYI");
+  }
+
+  case clang::AArch64::BI_InterlockedAdd:
+  case clang::AArch64::BI_InterlockedAdd64: {
+    llvm_unreachable("NYI");
+  }
+  }
+
+  auto Ty = GetNeonType(this, Type);
+  if (!Ty)
+    return nullptr;
+
+  // Not all intrinsics handled by the common case work for AArch64 yet, so only
+  // defer to common code if it's been added to our special map.
+  Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
+                                        AArch64SIMDIntrinsicsProvenSorted);
+  if (Builtin) {
+    llvm_unreachable("NYI");
+  }
+
+  if (mlir::Value V =
+          buildAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
+    return V;
+
+  mlir::Type VTy = Ty;
+  llvm::SmallVector<mlir::Value, 4> args;
+  switch (BuiltinID) {
+  default:
+    return nullptr;
+  case NEON::BI__builtin_neon_vbsl_v:
+  case NEON::BI__builtin_neon_vbslq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vfma_lane_v:
+  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
+    // The ARM builtins (and instructions) have the addend as the first
+    // operand, but the 'fma' intrinsics have it last. Swap it around here.
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vfma_laneq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vfmah_lane_f16:
+  case NEON::BI__builtin_neon_vfmas_lane_f32:
+  case NEON::BI__builtin_neon_vfmah_laneq_f16:
+  case NEON::BI__builtin_neon_vfmas_laneq_f32:
+  case NEON::BI__builtin_neon_vfmad_lane_f64:
+  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmull_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vmax_v:
+  case NEON::BI__builtin_neon_vmaxq_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vmaxh_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmin_v:
+  case NEON::BI__builtin_neon_vminq_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vminh_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vabd_v:
+  case NEON::BI__builtin_neon_vabdq_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vpadal_v:
+  case NEON::BI__builtin_neon_vpadalq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vpmin_v:
+  case NEON::BI__builtin_neon_vpminq_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vpmax_v:
+  case NEON::BI__builtin_neon_vpmaxq_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vminnm_v:
+  case NEON::BI__builtin_neon_vminnmq_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vminnmh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vmaxnm_v:
+  case NEON::BI__builtin_neon_vmaxnmq_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vmaxnmh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vrecpss_f32: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrecpsd_f64:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vrecpsh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vqshrun_n_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vqrshrun_n_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vqshrn_n_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vrshrn_n_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vqrshrn_n_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vrndah_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrnda_v:
+  case NEON::BI__builtin_neon_vrndaq_v: {
+    assert(!MissingFeatures::buildConstrainedFPCall());
+    return buildNeonCall(BuiltinID, *this, {Ty}, Ops, "llvm.round", Ty,
+                         getLoc(E->getExprLoc()));
+  }
+  case NEON::BI__builtin_neon_vrndih_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrndmh_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrndm_v:
+  case NEON::BI__builtin_neon_vrndmq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrndnh_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrndn_v:
+  case NEON::BI__builtin_neon_vrndnq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrndns_f32: {
+    mlir::Value arg0 = buildScalarExpr(E->getArg(0));
+    args.push_back(arg0);
+    return buildNeonCall(NEON::BI__builtin_neon_vrndns_f32, *this,
+                         {arg0.getType()}, args, "llvm.roundeven.f32",
+                         getCIRGenModule().FloatTy, getLoc(E->getExprLoc()));
+  }
+  case NEON::BI__builtin_neon_vrndph_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrndp_v:
+  case NEON::BI__builtin_neon_vrndpq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrndxh_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrndx_v:
+  case NEON::BI__builtin_neon_vrndxq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrndh_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrnd32x_f32:
+  case NEON::BI__builtin_neon_vrnd32xq_f32:
+  case NEON::BI__builtin_neon_vrnd32x_f64:
+  case NEON::BI__builtin_neon_vrnd32xq_f64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrnd32z_f32:
+  case NEON::BI__builtin_neon_vrnd32zq_f32:
+  case NEON::BI__builtin_neon_vrnd32z_f64:
+  case NEON::BI__builtin_neon_vrnd32zq_f64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrnd64x_f32:
+  case NEON::BI__builtin_neon_vrnd64xq_f32:
+  case NEON::BI__builtin_neon_vrnd64x_f64:
+  case NEON::BI__builtin_neon_vrnd64xq_f64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrnd64z_f32:
+  case NEON::BI__builtin_neon_vrnd64zq_f32:
+  case NEON::BI__builtin_neon_vrnd64z_f64:
+  case NEON::BI__builtin_neon_vrnd64zq_f64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrnd_v:
+  case NEON::BI__builtin_neon_vrndq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcvt_f64_v:
+  case NEON::BI__builtin_neon_vcvtq_f64_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vcvt_f64_f32: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcvt_f32_f64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcvt_s32_v:
+  case NEON::BI__builtin_neon_vcvt_u32_v:
+  case NEON::BI__builtin_neon_vcvt_s64_v:
+  case NEON::BI__builtin_neon_vcvt_u64_v:
+  case NEON::BI__builtin_neon_vcvt_s16_f16:
+  case NEON::BI__builtin_neon_vcvt_u16_f16:
+  case NEON::BI__builtin_neon_vcvtq_s32_v:
+  case NEON::BI__builtin_neon_vcvtq_u32_v:
+  case NEON::BI__builtin_neon_vcvtq_s64_v:
+  case NEON::BI__builtin_neon_vcvtq_u64_v:
+  case NEON::BI__builtin_neon_vcvtq_s16_f16:
+  case NEON::BI__builtin_neon_vcvtq_u16_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcvta_s16_f16:
+  case NEON::BI__builtin_neon_vcvta_u16_f16:
+  case NEON::BI__builtin_neon_vcvta_s32_v:
+  case NEON::BI__builtin_neon_vcvtaq_s16_f16:
+  case NEON::BI__builtin_neon_vcvtaq_s32_v:
+  case NEON::BI__builtin_neon_vcvta_u32_v:
+  case NEON::BI__builtin_neon_vcvtaq_u16_f16:
+  case NEON::BI__builtin_neon_vcvtaq_u32_v:
+  case NEON::BI__builtin_neon_vcvta_s64_v:
+  case NEON::BI__builtin_neon_vcvtaq_s64_v:
+  case NEON::BI__builtin_neon_vcvta_u64_v:
+  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcvtm_s16_f16:
+  case NEON::BI__builtin_neon_vcvtm_s32_v:
+  case NEON::BI__builtin_neon_vcvtmq_s16_f16:
+  case NEON::BI__builtin_neon_vcvtmq_s32_v:
+  case NEON::BI__builtin_neon_vcvtm_u16_f16:
+  case NEON::BI__builtin_neon_vcvtm_u32_v:
+  case NEON::BI__builtin_neon_vcvtmq_u16_f16:
+  case NEON::BI__builtin_neon_vcvtmq_u32_v:
+  case NEON::BI__builtin_neon_vcvtm_s64_v:
+  case NEON::BI__builtin_neon_vcvtmq_s64_v:
+  case NEON::BI__builtin_neon_vcvtm_u64_v:
+  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcvtn_s16_f16:
+  case NEON::BI__builtin_neon_vcvtn_s32_v:
+  case NEON::BI__builtin_neon_vcvtnq_s16_f16:
+  case NEON::BI__builtin_neon_vcvtnq_s32_v:
+  case NEON::BI__builtin_neon_vcvtn_u16_f16:
+  case NEON::BI__builtin_neon_vcvtn_u32_v:
+  case NEON::BI__builtin_neon_vcvtnq_u16_f16:
+  case NEON::BI__builtin_neon_vcvtnq_u32_v:
+  case NEON::BI__builtin_neon_vcvtn_s64_v:
+  case NEON::BI__builtin_neon_vcvtnq_s64_v:
+  case NEON::BI__builtin_neon_vcvtn_u64_v:
+  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vcvtp_s16_f16:
+  case NEON::BI__builtin_neon_vcvtp_s32_v:
+  case NEON::BI__builtin_neon_vcvtpq_s16_f16:
+  case NEON::BI__builtin_neon_vcvtpq_s32_v:
+  case NEON::BI__builtin_neon_vcvtp_u16_f16:
+  case NEON::BI__builtin_neon_vcvtp_u32_v:
+  case NEON::BI__builtin_neon_vcvtpq_u16_f16:
+  case NEON::BI__builtin_neon_vcvtpq_u32_v:
+  case NEON::BI__builtin_neon_vcvtp_s64_v:
+  case NEON::BI__builtin_neon_vcvtpq_s64_v:
+  case NEON::BI__builtin_neon_vcvtp_u64_v:
+  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmulx_v:
+  case NEON::BI__builtin_neon_vmulxq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmulxh_lane_f16:
+  case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmul_lane_v:
+  case NEON::BI__builtin_neon_vmul_laneq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vnegd_s64:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vnegh_f16:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vpmaxnm_v:
+  case NEON::BI__builtin_neon_vpmaxnmq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vpminnm_v:
+  case NEON::BI__builtin_neon_vpminnmq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vsqrth_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vsqrt_v:
+  case NEON::BI__builtin_neon_vsqrtq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vrbit_v:
+  case NEON::BI__builtin_neon_vrbitq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddv_u8:
+    // FIXME: These are handled by the AArch64 scalar code.
+    llvm_unreachable("NYI");
+    [[fallthrough]];
+  case NEON::BI__builtin_neon_vaddv_s8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddv_u16:
+    llvm_unreachable("NYI");
+    [[fallthrough]];
+  case NEON::BI__builtin_neon_vaddv_s16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddvq_u8:
+    llvm_unreachable("NYI");
+    [[fallthrough]];
+  case NEON::BI__builtin_neon_vaddvq_s8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddvq_u16:
+    llvm_unreachable("NYI");
+    [[fallthrough]];
+  case NEON::BI__builtin_neon_vaddvq_s16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmaxv_u8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmaxv_u16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmaxvq_u8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmaxvq_u16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmaxv_s8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmaxv_s16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmaxvq_s8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmaxvq_s16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmaxv_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmaxvq_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vminv_u8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vminv_u16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vminvq_u8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vminvq_u16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vminv_s8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vminv_s16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vminvq_s8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vminvq_s16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vminv_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vminvq_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmaxnmv_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmaxnmvq_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vminnmv_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vminnmvq_f16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vmul_n_f64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddlv_u8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddlv_u16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddlvq_u8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddlvq_u16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddlv_s8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddlv_s16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddlvq_s8: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vaddlvq_s16: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vsri_n_v:
+  case NEON::BI__builtin_neon_vsriq_n_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vsli_n_v:
+  case NEON::BI__builtin_neon_vsliq_n_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vsra_n_v:
+  case NEON::BI__builtin_neon_vsraq_n_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vrsra_n_v:
+  case NEON::BI__builtin_neon_vrsraq_n_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vld1_v:
+  case NEON::BI__builtin_neon_vld1q_v: {
+    return builder.createAlignedLoad(Ops[0].getLoc(), VTy, Ops[0],
+                                     PtrOp0.getAlignment());
+  }
+  case NEON::BI__builtin_neon_vst1_v:
+  case NEON::BI__builtin_neon_vst1q_v: {
+    Ops[1] = builder.createBitcast(Ops[1], VTy);
+    (void)builder.createAlignedStore(Ops[1].getLoc(), Ops[1], Ops[0],
+                                     PtrOp0.getAlignment());
+    return Ops[1];
+  }
+  case NEON::BI__builtin_neon_vld1_lane_v:
+  case NEON::BI__builtin_neon_vld1q_lane_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vldap1_lane_s64:
+  case NEON::BI__builtin_neon_vldap1q_lane_s64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vld1_dup_v:
+  case NEON::BI__builtin_neon_vld1q_dup_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vst1_lane_v:
+  case NEON::BI__builtin_neon_vst1q_lane_v:
+    llvm_unreachable("NYI");
+  case NEON::BI__builtin_neon_vstl1_lane_s64:
+  case NEON::BI__builtin_neon_vstl1q_lane_s64: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vld2_v:
+  case NEON::BI__builtin_neon_vld2q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vld3_v:
+  case NEON::BI__builtin_neon_vld3q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vld4_v:
+  case NEON::BI__builtin_neon_vld4q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vld2_dup_v:
+  case NEON::BI__builtin_neon_vld2q_dup_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vld3_dup_v:
+  case NEON::BI__builtin_neon_vld3q_dup_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vld4_dup_v:
+  case NEON::BI__builtin_neon_vld4q_dup_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vld2_lane_v:
+  case NEON::BI__builtin_neon_vld2q_lane_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vld3_lane_v:
+  case NEON::BI__builtin_neon_vld3q_lane_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vld4_lane_v:
+  case NEON::BI__builtin_neon_vld4q_lane_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vst2_v:
+  case NEON::BI__builtin_neon_vst2q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vst2_lane_v:
+  case NEON::BI__builtin_neon_vst2q_lane_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vst3_v:
+  case NEON::BI__builtin_neon_vst3q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vst3_lane_v:
+  case NEON::BI__builtin_neon_vst3q_lane_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vst4_v:
+  case NEON::BI__builtin_neon_vst4q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vst4_lane_v:
+  case NEON::BI__builtin_neon_vst4q_lane_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vtrn_v:
+  case NEON::BI__builtin_neon_vtrnq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vuzp_v:
+  case NEON::BI__builtin_neon_vuzpq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vzip_v:
+  case NEON::BI__builtin_neon_vzipq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqtbl1q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqtbl2q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqtbl3q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqtbl4q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqtbx1q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqtbx2q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqtbx3q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vqtbx4q_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vsqadd_v:
+  case NEON::BI__builtin_neon_vsqaddq_v: {
+    llvm_unreachable("NYI");
+  }
+  case NEON::BI__builtin_neon_vuqadd_v:
+  case NEON::BI__builtin_neon_vuqaddq_v: {
+    llvm_unreachable("NYI");
+  }
+  }
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
new file mode 100644
index 000000000000..d6e23a9f0a25
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -0,0 +1,37 @@
+//===---- CIRGenBuiltinX86.cpp - Emit CIR for X86 builtins ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit x86/x86_64 Builtin calls as CIR or a function
+// call to be later resolved.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenCXXABI.h"
+#include "CIRGenCall.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "TargetInfo.h"
+#include "clang/CIR/MissingFeatures.h"
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Value.h"
+#include "clang/AST/GlobalDecl.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace cir;
+using namespace clang;
+using namespace mlir::cir;
+
+mlir::Value CIRGenFunction::buildX86BuiltinExpr(unsigned BuiltinID,
+                                                const CallExpr *E) {
+  llvm_unreachable("NYI");
+}
\ No newline at end of file
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenCXX.cpp
new file mode 100644
index 000000000000..179e128ac2f8
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenCXX.cpp
@@ -0,0 +1,357 @@
+//===--- CGCXX.cpp - Emit LLVM Code for declarations ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code dealing with C++ code generation.
+//
+//===----------------------------------------------------------------------===//
+
+// We might split this into multiple files if it gets too unwieldy
+
+#include "CIRGenCXXABI.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+
+#include "clang/AST/GlobalDecl.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+
+using namespace clang;
+using namespace cir;
+
+/// Try to emit a base destructor as an alias to its primary
+/// base-class destructor.
+bool CIRGenModule::tryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) {
+  if (!getCodeGenOpts().CXXCtorDtorAliases)
+    return true;
+
+  // Producing an alias to a base class ctor/dtor can degrade debug quality
+  // as the debugger cannot tell them apart.
+  if (getCodeGenOpts().OptimizationLevel == 0)
+    return true;
+
+  // If sanitizing memory to check for use-after-dtor, do not emit as
+  //  an alias, unless this class owns no members.
+  if (getCodeGenOpts().SanitizeMemoryUseAfterDtor &&
+      !D->getParent()->field_empty())
+    assert(!MissingFeatures::sanitizeDtor());
+
+  // If the destructor doesn't have a trivial body, we have to emit it
+  // separately.
+  if (!D->hasTrivialBody())
+    return true;
+
+  const CXXRecordDecl *Class = D->getParent();
+
+  // We are going to instrument this destructor, so give up even if it is
+  // currently empty.
+  if (Class->mayInsertExtraPadding())
+    return true;
+
+  // If we need to manipulate a VTT parameter, give up.
+  if (Class->getNumVBases()) {
+    // Extra Credit:  passing extra parameters is perfectly safe
+    // in many calling conventions, so only bail out if the ctor's
+    // calling convention is nonstandard.
+    return true;
+  }
+
+  // If any field has a non-trivial destructor, we have to emit the
+  // destructor separately.
+  for (const auto *I : Class->fields())
+    if (I->getType().isDestructedType())
+      return true;
+
+  // Try to find a unique base class with a non-trivial destructor.
+  const CXXRecordDecl *UniqueBase = nullptr;
+  for (const auto &I : Class->bases()) {
+
+    // We're in the base destructor, so skip virtual bases.
+    if (I.isVirtual())
+      continue;
+
+    // Skip base classes with trivial destructors.
+    const auto *Base =
+        cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
+    if (Base->hasTrivialDestructor())
+      continue;
+
+    // If we've already found a base class with a non-trivial
+    // destructor, give up.
+    if (UniqueBase)
+      return true;
+    UniqueBase = Base;
+  }
+
+  // If we didn't find any bases with a non-trivial destructor, then
+  // the base destructor is actually effectively trivial, which can
+  // happen if it was needlessly user-defined or if there are virtual
+  // bases with non-trivial destructors.
+  if (!UniqueBase)
+    return true;
+
+  // If the base is at a non-zero offset, give up.
+  const ASTRecordLayout &ClassLayout = astCtx.getASTRecordLayout(Class);
+  if (!ClassLayout.getBaseClassOffset(UniqueBase).isZero())
+    return true;
+
+  // Give up if the calling conventions don't match. We could update the call,
+  // but it is probably not worth it.
+  const CXXDestructorDecl *BaseD = UniqueBase->getDestructor();
+  if (BaseD->getType()->castAs<FunctionType>()->getCallConv() !=
+      D->getType()->castAs<FunctionType>()->getCallConv())
+    return true;
+
+  GlobalDecl AliasDecl(D, Dtor_Base);
+  GlobalDecl TargetDecl(BaseD, Dtor_Base);
+
+  // The alias will use the linkage of the referent.  If we can't
+  // support aliases with that linkage, fail.
+  auto Linkage = getFunctionLinkage(AliasDecl);
+
+  // We can't use an alias if the linkage is not valid for one.
+  if (!mlir::cir::isValidLinkage(Linkage))
+    return true;
+
+  auto TargetLinkage = getFunctionLinkage(TargetDecl);
+
+  // Check if we have it already.
+  StringRef MangledName = getMangledName(AliasDecl);
+  auto Entry = getGlobalValue(MangledName);
+  auto globalValue = dyn_cast<mlir::cir::CIRGlobalValueInterface>(Entry);
+  if (Entry && globalValue && !globalValue.isDeclaration())
+    return false;
+  if (Replacements.count(MangledName))
+    return false;
+
+  assert(globalValue && "only knows how to handle GlobalValue");
+  [[maybe_unused]] auto AliasValueType = getTypes().GetFunctionType(AliasDecl);
+
+  // Find the referent.
+  auto Aliasee = cast<mlir::cir::FuncOp>(GetAddrOfGlobal(TargetDecl));
+  auto AliaseeGV = dyn_cast_or_null<mlir::cir::CIRGlobalValueInterface>(
+      GetAddrOfGlobal(TargetDecl));
+  // Instead of creating as alias to a linkonce_odr, replace all of the uses
+  // of the aliasee.
+  if (mlir::cir::isDiscardableIfUnused(Linkage) &&
+      !(TargetLinkage ==
+            mlir::cir::GlobalLinkageKind::AvailableExternallyLinkage &&
+        TargetDecl.getDecl()->hasAttr<AlwaysInlineAttr>())) {
+    // FIXME: An extern template instantiation will create functions with
+    // linkage "AvailableExternally". In libc++, some classes also define
+    // members with attribute "AlwaysInline" and expect no reference to
+    // be generated. It is desirable to reenable this optimisation after
+    // corresponding LLVM changes.
+    llvm_unreachable("NYI");
+  }
+
+  // If we have a weak, non-discardable alias (weak, weak_odr), like an
+  // extern template instantiation or a dllexported class, avoid forming it on
+  // COFF. A COFF weak external alias cannot satisfy a normal undefined
+  // symbol reference from another TU. The other TU must also mark the
+  // referenced symbol as weak, which we cannot rely on.
+  if (mlir::cir::isWeakForLinker(Linkage) && getTriple().isOSBinFormatCOFF()) {
+    llvm_unreachable("NYI");
+  }
+
+  // If we don't have a definition for the destructor yet or the definition
+  // is
+  // avaialable_externally, don't emit an alias.  We can't emit aliases to
+  // declarations; that's just not how aliases work.
+  if (AliaseeGV && AliaseeGV.isDeclarationForLinker())
+    return true;
+
+  // Don't create an alias to a linker weak symbol. This avoids producing
+  // different COMDATs in different TUs. Another option would be to
+  // output the alias both for weak_odr and linkonce_odr, but that
+  // requires explicit comdat support in the IL.
+  if (mlir::cir::isWeakForLinker(TargetLinkage))
+    llvm_unreachable("NYI");
+
+  // Create the alias with no name.
+  buildAliasForGlobal("", Entry, AliasDecl, Aliasee, Linkage);
+  return false;
+}
+
+static void buildDeclInit(CIRGenFunction &CGF, const VarDecl *D,
+                          Address DeclPtr) {
+  assert((D->hasGlobalStorage() ||
+          (D->hasLocalStorage() &&
+           CGF.getContext().getLangOpts().OpenCLCPlusPlus)) &&
+         "VarDecl must have global or local (in the case of OpenCL) storage!");
+  assert(!D->getType()->isReferenceType() &&
+         "Should not call buildDeclInit on a reference!");
+
+  QualType type = D->getType();
+  LValue lv = CGF.makeAddrLValue(DeclPtr, type);
+
+  const Expr *Init = D->getInit();
+  switch (CIRGenFunction::getEvaluationKind(type)) {
+  case TEK_Aggregate:
+    CGF.buildAggExpr(
+        Init, AggValueSlot::forLValue(lv, AggValueSlot::IsDestructed,
+                                      AggValueSlot::DoesNotNeedGCBarriers,
+                                      AggValueSlot::IsNotAliased,
+                                      AggValueSlot::DoesNotOverlap));
+    return;
+  case TEK_Scalar:
+    CGF.buildScalarInit(Init, CGF.getLoc(D->getLocation()), lv, false);
+    return;
+  case TEK_Complex:
+    llvm_unreachable("complext evaluation NYI");
+  }
+}
+
+static void buildDeclDestroy(CIRGenFunction &CGF, const VarDecl *D) {
+  // Honor __attribute__((no_destroy)) and bail instead of attempting
+  // to emit a reference to a possibly nonexistent destructor, which
+  // in turn can cause a crash. This will result in a global constructor
+  // that isn't balanced out by a destructor call as intended by the
+  // attribute. This also checks for -fno-c++-static-destructors and
+  // bails even if the attribute is not present.
+  QualType::DestructionKind DtorKind = D->needsDestruction(CGF.getContext());
+
+  // FIXME:  __attribute__((cleanup)) ?
+
+  switch (DtorKind) {
+  case QualType::DK_none:
+    return;
+
+  case QualType::DK_cxx_destructor:
+    break;
+
+  case QualType::DK_objc_strong_lifetime:
+  case QualType::DK_objc_weak_lifetime:
+  case QualType::DK_nontrivial_c_struct:
+    // We don't care about releasing objects during process teardown.
+    assert(!D->getTLSKind() && "should have rejected this");
+    return;
+  }
+
+  auto &CGM = CGF.CGM;
+  QualType type = D->getType();
+
+  // Special-case non-array C++ destructors, if they have the right signature.
+  // Under some ABIs, destructors return this instead of void, and cannot be
+  // passed directly to __cxa_atexit if the target does not allow this
+  // mismatch.
+  const CXXRecordDecl *Record = type->getAsCXXRecordDecl();
+  bool CanRegisterDestructor =
+      Record && (!CGM.getCXXABI().HasThisReturn(
+                     GlobalDecl(Record->getDestructor(), Dtor_Complete)) ||
+                 CGM.getCXXABI().canCallMismatchedFunctionType());
+
+  // If __cxa_atexit is disabled via a flag, a different helper function is
+  // generated elsewhere which uses atexit instead, and it takes the destructor
+  // directly.
+  auto UsingExternalHelper = CGM.getCodeGenOpts().CXAAtExit;
+  mlir::cir::FuncOp fnOp;
+  if (Record && (CanRegisterDestructor || UsingExternalHelper)) {
+    assert(!D->getTLSKind() && "TLS NYI");
+    assert(!Record->hasTrivialDestructor());
+    assert(!MissingFeatures::openCL());
+    CXXDestructorDecl *Dtor = Record->getDestructor();
+    // In LLVM OG codegen this is done in registerGlobalDtor, but CIRGen
+    // relies on LoweringPrepare for further decoupling, so build the
+    // call right here.
+    auto GD = GlobalDecl(Dtor, Dtor_Complete);
+    auto structorInfo = CGM.getAddrAndTypeOfCXXStructor(GD);
+    fnOp = structorInfo.second;
+    CGF.getBuilder().createCallOp(
+        CGF.getLoc(D->getSourceRange()),
+        mlir::FlatSymbolRefAttr::get(fnOp.getSymNameAttr()),
+        mlir::ValueRange{CGF.CGM.getAddrOfGlobalVar(D)});
+  } else {
+    llvm_unreachable("array destructors not yet supported!");
+  }
+  assert(fnOp && "expected cir.func");
+  CGM.getCXXABI().registerGlobalDtor(CGF, D, fnOp, nullptr);
+}
+
+mlir::cir::FuncOp CIRGenModule::codegenCXXStructor(GlobalDecl GD) {
+  const auto &FnInfo = getTypes().arrangeCXXStructorDeclaration(GD);
+  auto Fn = getAddrOfCXXStructor(GD, &FnInfo, /*FnType=*/nullptr,
+                                 /*DontDefer=*/true, ForDefinition);
+
+  setFunctionLinkage(GD, Fn);
+  CIRGenFunction CGF{*this, builder};
+  CurCGF = &CGF;
+  {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+    CGF.generateCode(GD, Fn, FnInfo);
+  }
+  CurCGF = nullptr;
+
+  setNonAliasAttributes(GD, Fn);
+  setCIRFunctionAttributesForDefinition(cast<CXXMethodDecl>(GD.getDecl()), Fn);
+  return Fn;
+}
+
+/// Emit code to cause the variable at the given address to be considered as
+/// constant from this point onwards.
+static void buildDeclInvariant(CIRGenFunction &CGF, const VarDecl *D) {
+  return CGF.buildInvariantStart(
+      CGF.getContext().getTypeSizeInChars(D->getType()));
+}
+
+void CIRGenFunction::buildInvariantStart([[maybe_unused]] CharUnits Size) {
+  // Do not emit the intrinsic if we're not optimizing.
+  if (!CGM.getCodeGenOpts().OptimizationLevel)
+    return;
+
+  assert(!MissingFeatures::createInvariantIntrinsic());
+}
+
+void CIRGenModule::codegenGlobalInitCxxStructor(const VarDecl *D,
+                                                mlir::cir::GlobalOp Addr,
+                                                bool NeedsCtor, bool NeedsDtor,
+                                                bool isCstStorage) {
+  assert(D && " Expected a global declaration!");
+  CIRGenFunction CGF{*this, builder, true};
+  CurCGF = &CGF;
+  CurCGF->CurFn = Addr;
+  Addr.setAstAttr(mlir::cir::ASTVarDeclAttr::get(builder.getContext(), D));
+
+  if (NeedsCtor) {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+    auto block = builder.createBlock(&Addr.getCtorRegion());
+    CIRGenFunction::LexicalScope lexScope{*CurCGF, Addr.getLoc(),
+                                          builder.getInsertionBlock()};
+    lexScope.setAsGlobalInit();
+
+    builder.setInsertionPointToStart(block);
+    Address DeclAddr(getAddrOfGlobalVar(D), getASTContext().getDeclAlign(D));
+    buildDeclInit(CGF, D, DeclAddr);
+    builder.setInsertionPointToEnd(block);
+    builder.create<mlir::cir::YieldOp>(Addr->getLoc());
+  }
+
+  if (isCstStorage) {
+    // TODO: this leads to a missing feature in the moment, probably also need a
+    // LexicalScope to be inserted here.
+    buildDeclInvariant(CGF, D);
+  } else {
+    // If not constant storage we'll emit this regardless of NeedsDtor value.
+    mlir::OpBuilder::InsertionGuard guard(builder);
+    auto block = builder.createBlock(&Addr.getDtorRegion());
+    CIRGenFunction::LexicalScope lexScope{*CurCGF, Addr.getLoc(),
+                                          builder.getInsertionBlock()};
+    lexScope.setAsGlobalInit();
+
+    builder.setInsertionPointToStart(block);
+    buildDeclDestroy(CGF, D);
+    builder.setInsertionPointToEnd(block);
+    if (block->empty()) {
+      block->erase();
+      // Don't confuse lexical cleanup.
+      builder.clearInsertionPoint();
+    } else
+      builder.create<mlir::cir::YieldOp>(Addr->getLoc());
+  }
+
+  CurCGF = nullptr;
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp
new file mode 100644
index 000000000000..b17206772c3f
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp
@@ -0,0 +1,78 @@
+//===----- CirGenCXXABI.cpp - Interface to C++ ABIs -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides an abstract class for C++ code generation. Concrete subclasses
+// of this implement code generation for specific C++ ABIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenCXXABI.h"
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/GlobalDecl.h"
+#include "clang/AST/Mangle.h"
+#include "clang/AST/RecordLayout.h"
+
+using namespace cir;
+using namespace clang;
+
+CIRGenCXXABI::~CIRGenCXXABI() {}
+
+CIRGenCXXABI::AddedStructorArgCounts CIRGenCXXABI::addImplicitConstructorArgs(
+    CIRGenFunction &CGF, const clang::CXXConstructorDecl *D,
+    clang::CXXCtorType Type, bool ForVirtualBase, bool Delegating,
+    CallArgList &Args) {
+  auto AddedArgs =
+      getImplicitConstructorArgs(CGF, D, Type, ForVirtualBase, Delegating);
+  for (size_t i = 0; i < AddedArgs.Prefix.size(); ++i)
+    Args.insert(Args.begin() + 1 + i,
+                CallArg(RValue::get(AddedArgs.Prefix[i].Value),
+                        AddedArgs.Prefix[i].Type));
+  for (const auto &arg : AddedArgs.Suffix)
+    Args.add(RValue::get(arg.Value), arg.Type);
+  return AddedStructorArgCounts(AddedArgs.Prefix.size(),
+                                AddedArgs.Suffix.size());
+}
+
+CatchTypeInfo CIRGenCXXABI::getCatchAllTypeInfo() {
+  return CatchTypeInfo{nullptr, 0};
+}
+
+bool CIRGenCXXABI::NeedsVTTParameter(GlobalDecl GD) { return false; }
+
+void CIRGenCXXABI::buildThisParam(CIRGenFunction &CGF,
+                                  FunctionArgList &params) {
+  const auto *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
+
+  // FIXME: I'm not entirely sure I like using a fake decl just for code
+  // generation. Maybe we can come up with a better way?
+  auto *ThisDecl =
+      ImplicitParamDecl::Create(CGM.getASTContext(), nullptr, MD->getLocation(),
+                                &CGM.getASTContext().Idents.get("this"),
+                                MD->getThisType(), ImplicitParamKind::CXXThis);
+  params.push_back(ThisDecl);
+  CGF.CXXABIThisDecl = ThisDecl;
+
+  // Compute the presumed alignment of 'this', which basically comes down to
+  // whether we know it's a complete object or not.
+  auto &Layout = CGF.getContext().getASTRecordLayout(MD->getParent());
+  if (MD->getParent()->getNumVBases() == 0 ||
+      MD->getParent()->isEffectivelyFinal() ||
+      isThisCompleteObject(CGF.CurGD)) {
+    CGF.CXXABIThisAlignment = Layout.getAlignment();
+  } else {
+    llvm_unreachable("NYI");
+  }
+}
+
+mlir::cir::GlobalLinkageKind CIRGenCXXABI::getCXXDestructorLinkage(
+    GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const {
+  // Delegate back to CGM by default.
+  return CGM.getCIRLinkageForDeclarator(Dtor, Linkage,
+                                        /*IsConstantVariable=*/false);
+}
\ No newline at end of file
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
new file mode 100644
index 000000000000..6c67e849a4c4
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
@@ -0,0 +1,339 @@
+//===----- CIRGenCXXABI.h - Interface to C++ ABIs ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides an abstract class for C++ code generation. Concrete subclasses
+// of this implement code generation for specific C++ ABIs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CIRGENCXXABI_H
+#define LLVM_CLANG_LIB_CIR_CIRGENCXXABI_H
+
+#include "CIRGenCall.h"
+#include "CIRGenCleanup.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+
+#include "mlir/IR/Attributes.h"
+#include "clang/AST/Mangle.h"
+
+namespace cir {
+
+class CIRGenFunction;
+class CIRGenFunctionInfo;
+
+/// Implements C++ ABI-specific code generation functions.
+class CIRGenCXXABI {
+protected:
+  cir::CIRGenModule &CGM;
+  std::unique_ptr<clang::MangleContext> MangleCtx;
+
+  CIRGenCXXABI(CIRGenModule &CGM)
+      : CGM{CGM}, MangleCtx(CGM.getASTContext().createMangleContext()) {}
+
+  clang::ASTContext &getContext() const { return CGM.getASTContext(); }
+
+public:
+  /// Similar to AddedStructorArgs, but only notes the number of additional
+  /// arguments.
+  struct AddedStructorArgCounts {
+    unsigned Prefix = 0;
+    unsigned Suffix = 0;
+    AddedStructorArgCounts() = default;
+    AddedStructorArgCounts(unsigned P, unsigned S) : Prefix(P), Suffix(S) {}
+    static AddedStructorArgCounts prefix(unsigned N) { return {N, 0}; }
+    static AddedStructorArgCounts suffix(unsigned N) { return {0, N}; }
+  };
+
+  /// Additional implicit arguments to add to the beginning (Prefix) and end
+  /// (Suffix) of a constructor / destructor arg list.
+  ///
+  /// Note that Prefix should actually be inserted *after* the first existing
+  /// arg; `this` arguments always come first.
+  struct AddedStructorArgs {
+    struct Arg {
+      mlir::Value Value;
+      clang::QualType Type;
+    };
+    llvm::SmallVector<Arg, 1> Prefix;
+    llvm::SmallVector<Arg, 1> Suffix;
+    AddedStructorArgs() = default;
+    AddedStructorArgs(llvm::SmallVector<Arg, 1> P, llvm::SmallVector<Arg, 1> S)
+        : Prefix(std::move(P)), Suffix(std::move(S)) {}
+    static AddedStructorArgs prefix(llvm::SmallVector<Arg, 1> Args) {
+      return {std::move(Args), {}};
+    }
+    static AddedStructorArgs suffix(llvm::SmallVector<Arg, 1> Args) {
+      return {{}, std::move(Args)};
+    }
+  };
+
+  /// Build the signature of the given constructor or destructor vairant by
+  /// adding any required parameters. For convenience, ArgTys has been
+  /// initialized with the type of 'this'.
+  virtual AddedStructorArgCounts
+  buildStructorSignature(clang::GlobalDecl GD,
+                         llvm::SmallVectorImpl<clang::CanQualType> &ArgTys) = 0;
+
+  AddedStructorArgCounts
+  addImplicitConstructorArgs(CIRGenFunction &CGF,
+                             const clang::CXXConstructorDecl *D,
+                             clang::CXXCtorType Type, bool ForVirtualBase,
+                             bool Delegating, CallArgList &Args);
+
+  clang::ImplicitParamDecl *getThisDecl(CIRGenFunction &CGF) {
+    return CGF.CXXABIThisDecl;
+  }
+
+  virtual AddedStructorArgs getImplicitConstructorArgs(
+      CIRGenFunction &CGF, const clang::CXXConstructorDecl *D,
+      clang::CXXCtorType Type, bool ForVirtualBase, bool Delegating) = 0;
+
+  /// Emit the ABI-specific prolog for the function
+  virtual void buildInstanceFunctionProlog(CIRGenFunction &CGF) = 0;
+
+  /// Get the type of the implicit "this" parameter used by a method. May return
+  /// zero if no specific type is applicable, e.g. if the ABI expects the "this"
+  /// parameter to point to some artificial offset in a complete object due to
+  /// vbases being reordered.
+  virtual const clang::CXXRecordDecl *
+  getThisArgumentTypeForMethod(const clang::CXXMethodDecl *MD) {
+    return MD->getParent();
+  }
+
+  /// Return whether the given global decl needs a VTT parameter.
+  virtual bool NeedsVTTParameter(clang::GlobalDecl GD);
+
+  /// If the C++ ABI requires the given type be returned in a particular way,
+  /// this method sets RetAI and returns true.
+  virtual bool classifyReturnType(CIRGenFunctionInfo &FI) const = 0;
+
+  /// Gets the mangle context.
+  clang::MangleContext &getMangleContext() { return *MangleCtx; }
+
+  clang::ImplicitParamDecl *&getStructorImplicitParamDecl(CIRGenFunction &CGF) {
+    return CGF.CXXStructorImplicitParamDecl;
+  }
+
+  /// Perform ABI-specific "this" argument adjustment required prior to
+  /// a call of a virtual function.
+  /// The "VirtualCall" argument is true iff the call itself is virtual.
+  virtual Address adjustThisArgumentForVirtualFunctionCall(CIRGenFunction &CGF,
+                                                           GlobalDecl GD,
+                                                           Address This,
+                                                           bool VirtualCall) {
+    return This;
+  }
+
+  /// Build a parameter variable suitable for 'this'.
+  void buildThisParam(CIRGenFunction &CGF, FunctionArgList &Params);
+
+  /// Loads the incoming C++ this pointer as it was passed by the caller.
+  mlir::Value loadIncomingCXXThis(CIRGenFunction &CGF);
+
+  virtual CatchTypeInfo getCatchAllTypeInfo();
+
+  /// Determine whether there's something special about the rules of the ABI
+  /// tell us that 'this' is a complete object within the given function.
+  /// Obvious common logic like being defined on a final class will have been
+  /// taken care of by the caller.
+  virtual bool isThisCompleteObject(clang::GlobalDecl GD) const = 0;
+
+  /// Get the implicit (second) parameter that comes after the "this" pointer,
+  /// or nullptr if there is isn't one.
+  virtual mlir::Value getCXXDestructorImplicitParam(CIRGenFunction &CGF,
+                                                    const CXXDestructorDecl *DD,
+                                                    CXXDtorType Type,
+                                                    bool ForVirtualBase,
+                                                    bool Delegating) = 0;
+
+  /// Emit constructor variants required by this ABI.
+  virtual void buildCXXConstructors(const clang::CXXConstructorDecl *D) = 0;
+  /// Emit dtor variants required by this ABI.
+  virtual void buildCXXDestructors(const clang::CXXDestructorDecl *D) = 0;
+
+  /// Emit the destructor call.
+  virtual void buildDestructorCall(CIRGenFunction &CGF,
+                                   const CXXDestructorDecl *DD,
+                                   CXXDtorType Type, bool ForVirtualBase,
+                                   bool Delegating, Address This,
+                                   QualType ThisTy) = 0;
+
+  /// Emit code to force the execution of a destructor during global
+  /// teardown.  The default implementation of this uses atexit.
+  ///
+  /// \param Dtor - a function taking a single pointer argument
+  /// \param Addr - a pointer to pass to the destructor function.
+  virtual void registerGlobalDtor(CIRGenFunction &CGF, const VarDecl *D,
+                                  mlir::cir::FuncOp dtor,
+                                  mlir::Attribute Addr) = 0;
+
+  virtual size_t getSrcArgforCopyCtor(const CXXConstructorDecl *,
+                                      FunctionArgList &Args) const = 0;
+
+  virtual void emitBeginCatch(CIRGenFunction &CGF, const CXXCatchStmt *C) = 0;
+
+  /// Get the address of the vtable for the given record decl which should be
+  /// used for the vptr at the given offset in RD.
+  virtual mlir::cir::GlobalOp getAddrOfVTable(const CXXRecordDecl *RD,
+                                              CharUnits VPtrOffset) = 0;
+
+  /// Build a virtual function pointer in the ABI-specific way.
+  virtual CIRGenCallee getVirtualFunctionPointer(CIRGenFunction &CGF,
+                                                 GlobalDecl GD, Address This,
+                                                 mlir::Type Ty,
+                                                 SourceLocation Loc) = 0;
+
+  /// Checks if ABI requires extra virtual offset for vtable field.
+  virtual bool
+  isVirtualOffsetNeededForVTableField(CIRGenFunction &CGF,
+                                      CIRGenFunction::VPtr Vptr) = 0;
+
+  /// Determine whether it's possible to emit a vtable for \p RD, even
+  /// though we do not know that the vtable has been marked as used by semantic
+  /// analysis.
+  virtual bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const = 0;
+
+  /// Emits the VTable definitions required for the given record type.
+  virtual void emitVTableDefinitions(CIRGenVTables &CGVT,
+                                     const CXXRecordDecl *RD) = 0;
+
+  /// Emit any tables needed to implement virtual inheritance.  For Itanium,
+  /// this emits virtual table tables.
+  virtual void emitVirtualInheritanceTables(const CXXRecordDecl *RD) = 0;
+
+  virtual mlir::Attribute getAddrOfRTTIDescriptor(mlir::Location loc,
+                                                  QualType Ty) = 0;
+  virtual CatchTypeInfo
+  getAddrOfCXXCatchHandlerType(mlir::Location loc, QualType Ty,
+                               QualType CatchHandlerType) = 0;
+
+  /// Returns true if the given destructor type should be emitted as a linkonce
+  /// delegating thunk, regardless of whether the dtor is defined in this TU or
+  /// not.
+  virtual bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
+                                      CXXDtorType DT) const = 0;
+
+  virtual mlir::cir::GlobalLinkageKind
+  getCXXDestructorLinkage(GVALinkage Linkage, const CXXDestructorDecl *Dtor,
+                          CXXDtorType DT) const;
+
+  /// Get the address point of the vtable for the given base subobject.
+  virtual mlir::Value
+  getVTableAddressPoint(BaseSubobject Base,
+                        const CXXRecordDecl *VTableClass) = 0;
+
+  /// Get the address point of the vtable for the given base subobject while
+  /// building a constructor or a destructor.
+  virtual mlir::Value
+  getVTableAddressPointInStructor(CIRGenFunction &CGF, const CXXRecordDecl *RD,
+                                  BaseSubobject Base,
+                                  const CXXRecordDecl *NearestVBase) = 0;
+
+  /// Gets the pure virtual member call function.
+  virtual StringRef getPureVirtualCallName() = 0;
+
+  /// Gets the deleted virtual member call name.
+  virtual StringRef getDeletedVirtualCallName() = 0;
+
+  /// Specify how one should pass an argument of a record type.
+  enum class RecordArgABI {
+    /// Pass it using the normal C aggregate rules for the ABI, potentially
+    /// introducing extra copies and passing some or all of it in registers.
+    Default = 0,
+
+    /// Pass it on the stack using its defined layout. The argument must be
+    /// evaluated directly into the correct stack position in the arguments
+    /// area, and the call machinery must not move it or introduce extra copies.
+    DirectInMemory,
+
+    /// Pass it as a pointer to temporary memory.
+    Indirect
+  };
+
+  /// Returns how an argument of the given record type should be passed.
+  virtual RecordArgABI
+  getRecordArgABI(const clang::CXXRecordDecl *RD) const = 0;
+
+  /// Insert any ABI-specific implicit parameters into the parameter list for a
+  /// function. This generally involves extra data for constructors and
+  /// destructors.
+  ///
+  /// ABIs may also choose to override the return type, which has been
+  /// initialized with the type of 'this' if HasThisReturn(CGF.CurGD) is true or
+  /// the formal return type of the function otherwise.
+  virtual void addImplicitStructorParams(CIRGenFunction &CGF,
+                                         clang::QualType &ResTy,
+                                         FunctionArgList &Params) = 0;
+
+  /// Checks if ABI requires to initialize vptrs for given dynamic class.
+  virtual bool
+  doStructorsInitializeVPtrs(const clang::CXXRecordDecl *VTableClass) = 0;
+
+  /// Returns true if the given constructor or destructor is one of the kinds
+  /// that the ABI says returns 'this' (only applies when called non-virtually
+  /// for destructors).
+  ///
+  /// There currently is no way to indicate if a destructor returns 'this' when
+  /// called virtually, and CIR generation does not support this case.
+  virtual bool HasThisReturn(clang::GlobalDecl GD) const { return false; }
+
+  virtual bool hasMostDerivedReturn(clang::GlobalDecl GD) const {
+    return false;
+  }
+
+  /// Returns true if the target allows calling a function through a pointer
+  /// with a different signature than the actual function (or equivalently,
+  /// bitcasting a function or function pointer to a different function type).
+  /// In principle in the most general case this could depend on the target, the
+  /// calling convention, and the actual types of the arguments and return
+  /// value. Here it just means whether the signature mismatch could *ever* be
+  /// allowed; in other words, does the target do strict checking of signatures
+  /// for all calls.
+  virtual bool canCallMismatchedFunctionType() const { return true; }
+
+  virtual ~CIRGenCXXABI();
+
+  void setCXXABIThisValue(CIRGenFunction &CGF, mlir::Value ThisPtr);
+
+  // Determine if references to thread_local global variables can be made
+  // directly or require access through a thread wrapper function.
+  virtual bool usesThreadWrapperFunction(const VarDecl *VD) const = 0;
+
+  /// Emit the code to initialize hidden members required to handle virtual
+  /// inheritance, if needed by the ABI.
+  virtual void
+  initializeHiddenVirtualInheritanceMembers(CIRGenFunction &CGF,
+                                            const CXXRecordDecl *RD) {}
+
+  /// Emit a single constructor/destructor with the gien type from a C++
+  /// constructor Decl.
+  virtual void buildCXXStructor(clang::GlobalDecl GD) = 0;
+
+  virtual void buildRethrow(CIRGenFunction &CGF, bool isNoReturn) = 0;
+  virtual void buildThrow(CIRGenFunction &CGF, const CXXThrowExpr *E) = 0;
+
+  virtual void buildBadCastCall(CIRGenFunction &CGF, mlir::Location loc) = 0;
+
+  virtual mlir::Value buildDynamicCast(CIRGenFunction &CGF, mlir::Location Loc,
+                                       QualType SrcRecordTy,
+                                       QualType DestRecordTy,
+                                       mlir::cir::PointerType DestCIRTy,
+                                       bool isRefCast, Address Src) = 0;
+
+  virtual mlir::cir::MethodAttr
+  buildVirtualMethodAttr(mlir::cir::MethodType MethodTy,
+                         const CXXMethodDecl *MD) = 0;
+};
+
+/// Creates and Itanium-family ABI
+CIRGenCXXABI *CreateCIRGenItaniumCXXABI(CIRGenModule &CGM);
+
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp
new file mode 100644
index 000000000000..2a1b1a69da3d
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp
@@ -0,0 +1,1599 @@
+//===--- CIRGenCall.cpp - Encapsulate calling convention details ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// These classes wrap the information about a call or function
+// definition used to handle ABI compliancy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenBuilder.h"
+#include "CIRGenCXXABI.h"
+#include "CIRGenFunction.h"
+#include "CIRGenFunctionInfo.h"
+#include "CIRGenTypes.h"
+#include "TargetInfo.h"
+
+#include "clang/AST/Attr.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/GlobalDecl.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/FnInfoOpts.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/SymbolTable.h"
+#include "mlir/IR/Types.h"
+#include "clang/CIR/MissingFeatures.h"
+
+using namespace cir;
+using namespace clang;
+
+CIRGenFunctionInfo *CIRGenFunctionInfo::create(
+    mlir::cir::CallingConv cirCC, bool instanceMethod, bool chainCall,
+    const FunctionType::ExtInfo &info,
+    llvm::ArrayRef<ExtParameterInfo> paramInfos, CanQualType resultType,
+    llvm::ArrayRef<CanQualType> argTypes, RequiredArgs required) {
+  assert(paramInfos.empty() || paramInfos.size() == argTypes.size());
+  assert(!required.allowsOptionalArgs() ||
+         required.getNumRequiredArgs() <= argTypes.size());
+
+  void *buffer = operator new(totalSizeToAlloc<ArgInfo, ExtParameterInfo>(
+      argTypes.size() + 1, paramInfos.size()));
+
+  CIRGenFunctionInfo *FI = new (buffer) CIRGenFunctionInfo();
+  FI->CallingConvention = cirCC;
+  FI->EffectiveCallingConvention = cirCC;
+  FI->ASTCallingConvention = info.getCC();
+  FI->InstanceMethod = instanceMethod;
+  FI->ChainCall = chainCall;
+  FI->CmseNSCall = info.getCmseNSCall();
+  FI->NoReturn = info.getNoReturn();
+  FI->ReturnsRetained = info.getProducesResult();
+  FI->NoCallerSavedRegs = info.getNoCallerSavedRegs();
+  FI->NoCfCheck = info.getNoCfCheck();
+  FI->Required = required;
+  FI->HasRegParm = info.getHasRegParm();
+  FI->RegParm = info.getRegParm();
+  FI->ArgStruct = nullptr;
+  FI->ArgStructAlign = 0;
+  FI->NumArgs = argTypes.size();
+  FI->HasExtParameterInfos = !paramInfos.empty();
+  FI->getArgsBuffer()[0].type = resultType;
+  for (unsigned i = 0; i < argTypes.size(); ++i)
+    FI->getArgsBuffer()[i + 1].type = argTypes[i];
+  for (unsigned i = 0; i < paramInfos.size(); ++i)
+    FI->getExtParameterInfosBuffer()[i] = paramInfos[i];
+
+  return FI;
+}
+
+namespace {
+
+/// Encapsulates information about the way function arguments from
+/// CIRGenFunctionInfo should be passed to actual CIR function.
+class ClangToCIRArgMapping {
+  static const unsigned InvalidIndex = ~0U;
+  unsigned InallocaArgNo;
+  unsigned SRetArgNo;
+  unsigned TotalCIRArgs;
+
+  /// Arguments of CIR function corresponding to single Clang argument.
+  struct CIRArgs {
+    unsigned PaddingArgIndex = 0;
+    // Argument is expanded to CIR arguments at positions
+    // [FirstArgIndex, FirstArgIndex + NumberOfArgs).
+    unsigned FirstArgIndex = 0;
+    unsigned NumberOfArgs = 0;
+
+    CIRArgs()
+        : PaddingArgIndex(InvalidIndex), FirstArgIndex(InvalidIndex),
+          NumberOfArgs(0) {}
+  };
+
+  SmallVector<CIRArgs, 8> ArgInfo;
+
+public:
+  ClangToCIRArgMapping(const ASTContext &Context, const CIRGenFunctionInfo &FI,
+                       bool OnlyRequiredArgs = false)
+      : InallocaArgNo(InvalidIndex), SRetArgNo(InvalidIndex), TotalCIRArgs(0),
+        ArgInfo(OnlyRequiredArgs ? FI.getNumRequiredArgs() : FI.arg_size()) {
+    construct(Context, FI, OnlyRequiredArgs);
+  }
+
+  bool hasSRetArg() const { return SRetArgNo != InvalidIndex; }
+
+  bool hasInallocaArg() const { return InallocaArgNo != InvalidIndex; }
+
+  unsigned totalCIRArgs() const { return TotalCIRArgs; }
+
+  bool hasPaddingArg(unsigned ArgNo) const {
+    assert(ArgNo < ArgInfo.size());
+    return ArgInfo[ArgNo].PaddingArgIndex != InvalidIndex;
+  }
+
+  /// Returns index of first CIR argument corresponding to ArgNo, and their
+  /// quantity.
+  std::pair<unsigned, unsigned> getCIRArgs(unsigned ArgNo) const {
+    assert(ArgNo < ArgInfo.size());
+    return std::make_pair(ArgInfo[ArgNo].FirstArgIndex,
+                          ArgInfo[ArgNo].NumberOfArgs);
+  }
+
+private:
+  void construct(const ASTContext &Context, const CIRGenFunctionInfo &FI,
+                 bool OnlyRequiredArgs);
+};
+
+void ClangToCIRArgMapping::construct(const ASTContext &Context,
+                                     const CIRGenFunctionInfo &FI,
+                                     bool OnlyRequiredArgs) {
+  unsigned CIRArgNo = 0;
+  bool SwapThisWithSRet = false;
+  const ABIArgInfo &RetAI = FI.getReturnInfo();
+
+  assert(RetAI.getKind() != ABIArgInfo::Indirect && "NYI");
+
+  unsigned ArgNo = 0;
+  unsigned NumArgs = OnlyRequiredArgs ? FI.getNumRequiredArgs() : FI.arg_size();
+  for (CIRGenFunctionInfo::const_arg_iterator I = FI.arg_begin();
+       ArgNo < NumArgs; ++I, ++ArgNo) {
+    assert(I != FI.arg_end());
+    const ABIArgInfo &AI = I->info;
+    // Collect data about CIR arguments corresponding to Clang argument ArgNo.
+    auto &CIRArgs = ArgInfo[ArgNo];
+
+    assert(!AI.getPaddingType() && "NYI");
+
+    switch (AI.getKind()) {
+    default:
+      llvm_unreachable("NYI");
+    case ABIArgInfo::Extend:
+    case ABIArgInfo::Direct: {
+      // Postpone splitting structs into elements since this makes it way
+      // more complicated for analysis to obtain information on the original
+      // arguments.
+      //
+      // TODO(cir): a LLVM lowering prepare pass should break this down into
+      // the appropriated pieces.
+      assert(!MissingFeatures::constructABIArgDirectExtend());
+      CIRArgs.NumberOfArgs = 1;
+      break;
+    }
+    }
+
+    if (CIRArgs.NumberOfArgs > 0) {
+      CIRArgs.FirstArgIndex = CIRArgNo;
+      CIRArgNo += CIRArgs.NumberOfArgs;
+    }
+
+    assert(!SwapThisWithSRet && "NYI");
+  }
+  assert(ArgNo == ArgInfo.size());
+
+  assert(!FI.usesInAlloca() && "NYI");
+
+  TotalCIRArgs = CIRArgNo;
+}
+
+} // namespace
+
+static bool hasInAllocaArgs(CIRGenModule &CGM, CallingConv ExplicitCC,
+                            ArrayRef<QualType> ArgTypes) {
+  assert(ExplicitCC != CC_Swift && ExplicitCC != CC_SwiftAsync && "Swift NYI");
+  assert(!CGM.getTarget().getCXXABI().isMicrosoft() && "MSABI NYI");
+
+  return false;
+}
+
+mlir::cir::FuncType CIRGenTypes::GetFunctionType(GlobalDecl GD) {
+  const CIRGenFunctionInfo &FI = arrangeGlobalDeclaration(GD);
+  return GetFunctionType(FI);
+}
+
+mlir::cir::FuncType CIRGenTypes::GetFunctionType(const CIRGenFunctionInfo &FI) {
+  bool Inserted = FunctionsBeingProcessed.insert(&FI).second;
+  (void)Inserted;
+  assert(Inserted && "Recursively being processed?");
+
+  mlir::Type resultType = nullptr;
+  const ABIArgInfo &retAI = FI.getReturnInfo();
+  switch (retAI.getKind()) {
+  case ABIArgInfo::Ignore:
+    // TODO(CIR): This should probably be the None type from the builtin
+    // dialect.
+    resultType = nullptr;
+    break;
+
+  case ABIArgInfo::Extend:
+  case ABIArgInfo::Direct:
+    resultType = retAI.getCoerceToType();
+    break;
+
+  default:
+    assert(false && "NYI");
+  }
+
+  ClangToCIRArgMapping CIRFunctionArgs(getContext(), FI, true);
+  SmallVector<mlir::Type, 8> ArgTypes(CIRFunctionArgs.totalCIRArgs());
+
+  assert(!CIRFunctionArgs.hasSRetArg() && "NYI");
+  assert(!CIRFunctionArgs.hasInallocaArg() && "NYI");
+
+  // Add in all of the required arguments.
+  unsigned ArgNo = 0;
+  CIRGenFunctionInfo::const_arg_iterator it = FI.arg_begin(),
+                                         ie = it + FI.getNumRequiredArgs();
+
+  for (; it != ie; ++it, ++ArgNo) {
+    const auto &ArgInfo = it->info;
+
+    assert(!CIRFunctionArgs.hasPaddingArg(ArgNo) && "NYI");
+
+    unsigned FirstCIRArg, NumCIRArgs;
+    std::tie(FirstCIRArg, NumCIRArgs) = CIRFunctionArgs.getCIRArgs(ArgNo);
+
+    switch (ArgInfo.getKind()) {
+    default:
+      llvm_unreachable("NYI");
+    case ABIArgInfo::Extend:
+    case ABIArgInfo::Direct: {
+      mlir::Type argType = ArgInfo.getCoerceToType();
+      // TODO: handle the test against llvm::StructType from codegen
+      assert(NumCIRArgs == 1);
+      ArgTypes[FirstCIRArg] = argType;
+      break;
+    }
+    }
+  }
+
+  bool Erased = FunctionsBeingProcessed.erase(&FI);
+  (void)Erased;
+  assert(Erased && "Not in set?");
+
+  return mlir::cir::FuncType::get(
+      ArgTypes, (resultType ? resultType : Builder.getVoidTy()),
+      FI.isVariadic());
+}
+
+mlir::cir::FuncType CIRGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) {
+  const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
+  const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
+
+  if (!isFuncTypeConvertible(FPT)) {
+    llvm_unreachable("NYI");
+    // return llvm::StructType::get(getLLVMContext());
+  }
+
+  return GetFunctionType(GD);
+}
+
+CIRGenCallee CIRGenCallee::prepareConcreteCallee(CIRGenFunction &CGF) const {
+  if (isVirtual()) {
+    const CallExpr *CE = getVirtualCallExpr();
+    return CGF.CGM.getCXXABI().getVirtualFunctionPointer(
+        CGF, getVirtualMethodDecl(), getThisAddress(), getVirtualFunctionType(),
+        CE ? CE->getBeginLoc() : SourceLocation());
+  }
+  return *this;
+}
+
+void CIRGenFunction::buildAggregateStore(mlir::Value Val, Address Dest,
+                                         bool DestIsVolatile) {
+  // In LLVM codegen:
+  // Function to store a first-class aggregate into memory. We prefer to
+  // store the elements rather than the aggregate to be more friendly to
+  // fast-isel.
+  // In CIR codegen:
+  // Emit the most simple cir.store possible (e.g. a store for a whole
+  // struct), which can later be broken down in other CIR levels (or prior
+  // to dialect codegen).
+  (void)DestIsVolatile;
+  builder.createStore(*currSrcLoc, Val, Dest);
+}
+
+static Address emitAddressAtOffset(CIRGenFunction &CGF, Address addr,
+                                   const ABIArgInfo &info) {
+  if (unsigned offset = info.getDirectOffset()) {
+    llvm_unreachable("NYI");
+  }
+  return addr;
+}
+
+static void AddAttributesFromFunctionProtoType(CIRGenBuilderTy &builder,
+                                               ASTContext &Ctx,
+                                               mlir::NamedAttrList &FuncAttrs,
+                                               const FunctionProtoType *FPT) {
+  if (!FPT)
+    return;
+
+  if (!isUnresolvedExceptionSpec(FPT->getExceptionSpecType()) &&
+      FPT->isNothrow()) {
+    auto nu = mlir::cir::NoThrowAttr::get(builder.getContext());
+    FuncAttrs.set(nu.getMnemonic(), nu);
+  }
+}
+
+/// Construct the CIR attribute list of a function or call.
+///
+/// When adding an attribute, please consider where it should be handled:
+///
+///   - getDefaultFunctionAttributes is for attributes that are essentially
+///     part of the global target configuration (but perhaps can be
+///     overridden on a per-function basis).  Adding attributes there
+///     will cause them to also be set in frontends that build on Clang's
+///     target-configuration logic, as well as for code defined in library
+///     modules such as CUDA's libdevice.
+///
+///   - constructAttributeList builds on top of getDefaultFunctionAttributes
+///     and adds declaration-specific, convention-specific, and
+///     frontend-specific logic.  The last is of particular importance:
+///     attributes that restrict how the frontend generates code must be
+///     added here rather than getDefaultFunctionAttributes.
+///
+void CIRGenModule::constructAttributeList(StringRef Name,
+                                          const CIRGenFunctionInfo &FI,
+                                          CIRGenCalleeInfo CalleeInfo,
+                                          mlir::NamedAttrList &funcAttrs,
+                                          mlir::cir::CallingConv &callingConv,
+                                          bool AttrOnCallSite, bool IsThunk) {
+  // Implementation Disclaimer
+  //
+  // UnimplementedFeature and asserts are used throughout the code to track
+  // unsupported and things not yet implemented. However, most of the content of
+  // this function is on detecting attributes, which doesn't not cope with
+  // existing approaches to track work because its too big.
+  //
+  // That said, for the most part, the approach here is very specific compared
+  // to the rest of CIRGen and attributes and other handling should be done upon
+  // demand.
+
+  // Collect function CIR attributes from the CC lowering.
+  callingConv = FI.getEffectiveCallingConvention();
+  // TODO: NoReturn, cmse_nonsecure_call
+
+  // Collect function CIR attributes from the callee prototype if we have one.
+  AddAttributesFromFunctionProtoType(getBuilder(), astCtx, funcAttrs,
+                                     CalleeInfo.getCalleeFunctionProtoType());
+
+  const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
+
+  // TODO(cir): Attach assumption attributes to the declaration. If this is a
+  // call site, attach assumptions from the caller to the call as well.
+
+  bool HasOptnone = false;
+  (void)HasOptnone;
+  // The NoBuiltinAttr attached to the target FunctionDecl.
+  mlir::Attribute *NBA;
+
+  if (TargetDecl) {
+
+    if (TargetDecl->hasAttr<NoThrowAttr>()) {
+      auto nu = mlir::cir::NoThrowAttr::get(builder.getContext());
+      funcAttrs.set(nu.getMnemonic(), nu);
+    }
+
+    if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
+      AddAttributesFromFunctionProtoType(
+          getBuilder(), astCtx, funcAttrs,
+          Fn->getType()->getAs<FunctionProtoType>());
+      if (AttrOnCallSite && Fn->isReplaceableGlobalAllocationFunction()) {
+        // A sane operator new returns a non-aliasing pointer.
+        auto Kind = Fn->getDeclName().getCXXOverloadedOperator();
+        if (getCodeGenOpts().AssumeSaneOperatorNew &&
+            (Kind == OO_New || Kind == OO_Array_New))
+          ; // llvm::Attribute::NoAlias
+      }
+      const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Fn);
+      const bool IsVirtualCall = MD && MD->isVirtual();
+      // Don't use [[noreturn]], _Noreturn or [[no_builtin]] for a call to a
+      // virtual function. These attributes are not inherited by overloads.
+      if (!(AttrOnCallSite && IsVirtualCall)) {
+        if (Fn->isNoReturn())
+          ; // NoReturn
+        // NBA = Fn->getAttr<NoBuiltinAttr>();
+        (void)NBA;
+      }
+    }
+
+    if (isa<FunctionDecl>(TargetDecl) || isa<VarDecl>(TargetDecl)) {
+      // Only place nomerge attribute on call sites, never functions. This
+      // allows it to work on indirect virtual function calls.
+      if (AttrOnCallSite && TargetDecl->hasAttr<NoMergeAttr>())
+        ;
+    }
+
+    // 'const', 'pure' and 'noalias' attributed functions are also nounwind.
+    if (TargetDecl->hasAttr<ConstAttr>()) {
+      // gcc specifies that 'const' functions have greater restrictions than
+      // 'pure' functions, so they also cannot have infinite loops.
+    } else if (TargetDecl->hasAttr<PureAttr>()) {
+      // gcc specifies that 'pure' functions cannot have infinite loops.
+    } else if (TargetDecl->hasAttr<NoAliasAttr>()) {
+    }
+
+    HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
+    if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
+      std::optional<unsigned> NumElemsParam;
+      if (AllocSize->getNumElemsParam().isValid())
+        NumElemsParam = AllocSize->getNumElemsParam().getLLVMIndex();
+      // TODO(cir): add alloc size attr.
+    }
+
+    if (TargetDecl->hasAttr<OpenCLKernelAttr>()) {
+      auto cirKernelAttr =
+          mlir::cir::OpenCLKernelAttr::get(builder.getContext());
+      funcAttrs.set(cirKernelAttr.getMnemonic(), cirKernelAttr);
+      assert(!MissingFeatures::openCL());
+    }
+
+    if (TargetDecl->hasAttr<CUDAGlobalAttr>() &&
+        getLangOpts().OffloadUniformBlock)
+      assert(!MissingFeatures::CUDA());
+
+    if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>())
+      ;
+  }
+
+  getDefaultFunctionAttributes(Name, HasOptnone, AttrOnCallSite, funcAttrs);
+}
+
+static mlir::cir::CIRCallOpInterface
+buildCallLikeOp(CIRGenFunction &CGF, mlir::Location callLoc,
+                mlir::cir::FuncType indirectFuncTy, mlir::Value indirectFuncVal,
+                mlir::cir::FuncOp directFuncOp,
+                SmallVectorImpl<mlir::Value> &CIRCallArgs, bool isInvoke,
+                mlir::cir::CallingConv callingConv,
+                mlir::cir::ExtraFuncAttributesAttr extraFnAttrs) {
+  auto &builder = CGF.getBuilder();
+  auto getOrCreateSurroundingTryOp = [&]() {
+    // In OG, we build the landing pad for this scope. In CIR, we emit a
+    // synthetic cir.try because this didn't come from codegenerating from a
+    // try/catch in C++.
+    assert(CGF.currLexScope && "expected scope");
+    mlir::cir::TryOp op = CGF.currLexScope->getClosestTryParent();
+    if (op)
+      return op;
+
+    op = builder.create<mlir::cir::TryOp>(
+        *CGF.currSrcLoc, /*scopeBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {},
+        // Don't emit the code right away for catch clauses, for
+        // now create the regions and consume the try scope result.
+        // Note that clauses are later populated in
+        // CIRGenFunction::buildLandingPad.
+        [&](mlir::OpBuilder &b, mlir::Location loc,
+            mlir::OperationState &result) {
+          // Since this didn't come from an explicit try, we only need one
+          // handler: unwind.
+          auto *r = result.addRegion();
+          builder.createBlock(r);
+        });
+    op.setSynthetic(true);
+    return op;
+  };
+
+  if (isInvoke) {
+    // This call can throw, few options:
+    //  - If this call does not have an associated cir.try, use the
+    //    one provided by InvokeDest,
+    //  - User written try/catch clauses require calls to handle
+    //    exceptions under cir.try.
+    auto tryOp = getOrCreateSurroundingTryOp();
+    assert(tryOp && "expected");
+
+    mlir::OpBuilder::InsertPoint ip = builder.saveInsertionPoint();
+    if (tryOp.getSynthetic()) {
+      mlir::Block *lastBlock = &tryOp.getTryRegion().back();
+      builder.setInsertionPointToStart(lastBlock);
+    } else {
+      assert(builder.getInsertionBlock() && "expected valid basic block");
+    }
+
+    mlir::cir::CallOp callOpWithExceptions;
+    // TODO(cir): Set calling convention for `cir.try_call`.
+    assert(callingConv == mlir::cir::CallingConv::C && "NYI");
+    if (indirectFuncTy) {
+      callOpWithExceptions = builder.createIndirectTryCallOp(
+          callLoc, indirectFuncVal, indirectFuncTy, CIRCallArgs);
+    } else {
+      callOpWithExceptions =
+          builder.createTryCallOp(callLoc, directFuncOp, CIRCallArgs);
+    }
+    callOpWithExceptions->setAttr("extra_attrs", extraFnAttrs);
+
+    CGF.callWithExceptionCtx = callOpWithExceptions;
+    auto *invokeDest = CGF.getInvokeDest(tryOp);
+    (void)invokeDest;
+    CGF.callWithExceptionCtx = nullptr;
+
+    if (tryOp.getSynthetic()) {
+      builder.create<mlir::cir::YieldOp>(tryOp.getLoc());
+      builder.restoreInsertionPoint(ip);
+    }
+    return callOpWithExceptions;
+  }
+
+  assert(builder.getInsertionBlock() && "expected valid basic block");
+  if (indirectFuncTy) {
+    // TODO(cir): Set calling convention for indirect calls.
+    assert(callingConv == mlir::cir::CallingConv::C && "NYI");
+    return builder.createIndirectCallOp(
+        callLoc, indirectFuncVal, indirectFuncTy, CIRCallArgs,
+        mlir::cir::CallingConv::C, extraFnAttrs);
+  }
+  return builder.createCallOp(callLoc, directFuncOp, CIRCallArgs, callingConv,
+                              extraFnAttrs);
+}
+
+RValue CIRGenFunction::buildCall(const CIRGenFunctionInfo &CallInfo,
+                                 const CIRGenCallee &Callee,
+                                 ReturnValueSlot ReturnValue,
+                                 const CallArgList &CallArgs,
+                                 mlir::cir::CIRCallOpInterface *callOrTryCall,
+                                 bool IsMustTail, mlir::Location loc,
+                                 std::optional<const clang::CallExpr *> E) {
+  auto builder = CGM.getBuilder();
+  // FIXME: We no longer need the types from CallArgs; lift up and simplify
+
+  assert(Callee.isOrdinary() || Callee.isVirtual());
+
+  // Handle struct-return functions by passing a pointer to the location that we
+  // would like to return info.
+  QualType RetTy = CallInfo.getReturnType();
+  const auto &RetAI = CallInfo.getReturnInfo();
+
+  mlir::cir::FuncType CIRFuncTy = getTypes().GetFunctionType(CallInfo);
+
+  const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl();
+  // This is not always tied to a FunctionDecl (e.g. builtins that are xformed
+  // into calls to other functions)
+  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
+    // We can only guarantee that a function is called from the correct
+    // context/function based on the appropriate target attributes,
+    // so only check in the case where we have both always_inline and target
+    // since otherwise we could be making a conditional call after a check for
+    // the proper cpu features (and it won't cause code generation issues due to
+    // function based code generation).
+    if (TargetDecl->hasAttr<AlwaysInlineAttr>() &&
+        (TargetDecl->hasAttr<TargetAttr>() ||
+         (CurFuncDecl && CurFuncDecl->hasAttr<TargetAttr>()))) {
+      // FIXME(cir): somehow refactor this function to use SourceLocation?
+      SourceLocation Loc;
+      checkTargetFeatures(Loc, FD);
+    }
+
+    // Some architectures (such as x86-64) have the ABI changed based on
+    // attribute-target/features. Give them a chance to diagnose.
+    assert(!MissingFeatures::checkFunctionCallABI());
+  }
+
+  // TODO: add DNEBUG code
+
+  // 1. Set up the arguments
+
+  // If we're using inalloca, insert the allocation after the stack save.
+  // FIXME: Do this earlier rather than hacking it in here!
+  Address ArgMemory = Address::invalid();
+  assert(!CallInfo.getArgStruct() && "NYI");
+
+  ClangToCIRArgMapping CIRFunctionArgs(CGM.getASTContext(), CallInfo);
+  SmallVector<mlir::Value, 16> CIRCallArgs(CIRFunctionArgs.totalCIRArgs());
+
+  // If the call returns a temporary with struct return, create a temporary
+  // alloca to hold the result, unless one is given to us.
+  assert(!RetAI.isIndirect() && !RetAI.isInAlloca() &&
+         !RetAI.isCoerceAndExpand() && "NYI");
+
+  // When passing arguments using temporary allocas, we need to add the
+  // appropriate lifetime markers. This vector keeps track of all the lifetime
+  // markers that need to be ended right after the call.
+  assert(!MissingFeatures::shouldEmitLifetimeMarkers() && "NYI");
+
+  // Translate all of the arguments as necessary to match the CIR lowering.
+  assert(CallInfo.arg_size() == CallArgs.size() &&
+         "Mismatch between function signature & arguments.");
+  unsigned ArgNo = 0;
+  CIRGenFunctionInfo::const_arg_iterator info_it = CallInfo.arg_begin();
+  for (CallArgList::const_iterator I = CallArgs.begin(), E = CallArgs.end();
+       I != E; ++I, ++info_it, ++ArgNo) {
+    const ABIArgInfo &ArgInfo = info_it->info;
+
+    // Insert a padding argument to ensure proper alignment.
+    assert(!CIRFunctionArgs.hasPaddingArg(ArgNo) && "Padding args NYI");
+
+    unsigned FirstCIRArg, NumCIRArgs;
+    std::tie(FirstCIRArg, NumCIRArgs) = CIRFunctionArgs.getCIRArgs(ArgNo);
+
+    switch (ArgInfo.getKind()) {
+    case ABIArgInfo::Direct: {
+      if (!mlir::isa<mlir::cir::StructType>(ArgInfo.getCoerceToType()) &&
+          ArgInfo.getCoerceToType() == convertType(info_it->type) &&
+          ArgInfo.getDirectOffset() == 0) {
+        assert(NumCIRArgs == 1);
+        mlir::Value V;
+        assert(!I->isAggregate() && "Aggregate NYI");
+        V = I->getKnownRValue().getScalarVal();
+
+        assert(CallInfo.getExtParameterInfo(ArgNo).getABI() !=
+                   ParameterABI::SwiftErrorResult &&
+               "swift NYI");
+
+        // We might have to widen integers, but we should never truncate.
+        if (ArgInfo.getCoerceToType() != V.getType() &&
+            mlir::isa<mlir::cir::IntType>(V.getType()))
+          llvm_unreachable("NYI");
+
+        // If the argument doesn't match, perform a bitcast to coerce it. This
+        // can happen due to trivial type mismatches.
+        if (FirstCIRArg < CIRFuncTy.getNumInputs() &&
+            V.getType() != CIRFuncTy.getInput(FirstCIRArg))
+          V = builder.createBitcast(V, CIRFuncTy.getInput(FirstCIRArg));
+
+        CIRCallArgs[FirstCIRArg] = V;
+        break;
+      }
+
+      // FIXME: Avoid the conversion through memory if possible.
+      Address Src = Address::invalid();
+      if (!I->isAggregate()) {
+        llvm_unreachable("NYI");
+      } else {
+        Src = I->hasLValue() ? I->getKnownLValue().getAddress()
+                             : I->getKnownRValue().getAggregateAddress();
+      }
+
+      // If the value is offset in memory, apply the offset now.
+      Src = emitAddressAtOffset(*this, Src, ArgInfo);
+
+      // Fast-isel and the optimizer generally like scalar values better than
+      // FCAs, so we flatten them if this is safe to do for this argument.
+      auto STy = dyn_cast<mlir::cir::StructType>(ArgInfo.getCoerceToType());
+      if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
+        auto SrcTy = Src.getElementType();
+        // FIXME(cir): get proper location for each argument.
+        auto argLoc = loc;
+
+        // If the source type is smaller than the destination type of the
+        // coerce-to logic, copy the source value into a temp alloca the size
+        // of the destination type to allow loading all of it. The bits past
+        // the source value are left undef.
+        // FIXME(cir): add data layout info and compare sizes instead of
+        // matching the types.
+        //
+        // uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
+        // uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy);
+        // if (SrcSize < DstSize) {
+        if (SrcTy != STy)
+          llvm_unreachable("NYI");
+        else {
+          // FIXME(cir): this currently only runs when the types are different,
+          // but should be when alloc sizes are different, fix this as soon as
+          // datalayout gets introduced.
+          Src = builder.createElementBitCast(argLoc, Src, STy);
+        }
+
+        // assert(NumCIRArgs == STy.getMembers().size());
+        // In LLVMGen: Still only pass the struct without any gaps but mark it
+        // as such somehow.
+        //
+        // In CIRGen: Emit a load from the "whole" struct,
+        // which shall be broken later by some lowering step into multiple
+        // loads.
+        assert(NumCIRArgs == 1 && "dont break up arguments here!");
+        CIRCallArgs[FirstCIRArg] = builder.createLoad(argLoc, Src);
+      } else {
+        llvm_unreachable("NYI");
+      }
+
+      break;
+    }
+    default:
+      assert(false && "Only Direct support so far");
+    }
+  }
+
+  const CIRGenCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this);
+  auto CalleePtr = ConcreteCallee.getFunctionPointer();
+
+  // If we're using inalloca, set up that argument.
+  assert(!ArgMemory.isValid() && "inalloca NYI");
+
+  // 2. Prepare the function pointer.
+
+  // TODO: simplifyVariadicCallee
+
+  // 3. Perform the actual call.
+
+  // TODO: Deactivate any cleanups that we're supposed to do immediately before
+  // the call.
+  // if (!CallArgs.getCleanupsToDeactivate().empty())
+  //   deactivateArgCleanupsBeforeCall(*this, CallArgs);
+  // TODO: Update the largest vector width if any arguments have vector types.
+
+  // Compute the calling convention and attributes.
+  mlir::NamedAttrList Attrs;
+  StringRef FnName;
+  if (auto calleeFnOp = dyn_cast<mlir::cir::FuncOp>(CalleePtr))
+    FnName = calleeFnOp.getName();
+
+  mlir::cir::CallingConv callingConv;
+  CGM.constructAttributeList(FnName, CallInfo, Callee.getAbstractInfo(), Attrs,
+                             callingConv,
+                             /*AttrOnCallSite=*/true,
+                             /*IsThunk=*/false);
+
+  // TODO: strictfp
+  // TODO: Add call-site nomerge, noinline, always_inline attribute if exists.
+
+  // Apply some call-site-specific attributes.
+  // TODO: work this into building the attribute set.
+
+  // Apply always_inline to all calls within flatten functions.
+  // FIXME: should this really take priority over __try, below?
+  // assert(!CurCodeDecl->hasAttr<FlattenAttr>() &&
+  //        !TargetDecl->hasAttr<NoInlineAttr>() && "NYI");
+
+  // Disable inlining inside SEH __try blocks.
+  if (isSEHTryScope())
+    llvm_unreachable("NYI");
+
+  // Decide whether to use a call or an invoke.
+  bool CannotThrow;
+  if (currentFunctionUsesSEHTry()) {
+    // SEH cares about asynchronous exceptions, so everything can "throw."
+    CannotThrow = false;
+  } else if (isCleanupPadScope() &&
+             EHPersonality::get(*this).isMSVCXXPersonality()) {
+    // The MSVC++ personality will implicitly terminate the program if an
+    // exception is thrown during a cleanup outside of a try/catch.
+    // We don't need to model anything in IR to get this behavior.
+    CannotThrow = true;
+  } else {
+    // Otherwise, nounwind call sites will never throw.
+    auto noThrowAttr = mlir::cir::NoThrowAttr::get(builder.getContext());
+    CannotThrow = Attrs.getNamed(noThrowAttr.getMnemonic()).has_value();
+
+    if (auto fptr = dyn_cast<mlir::cir::FuncOp>(CalleePtr))
+      if (fptr.getExtraAttrs().getElements().contains(
+              noThrowAttr.getMnemonic()))
+        CannotThrow = true;
+  }
+  bool isInvoke = CannotThrow ? false : isInvokeDest();
+
+  // TODO: UnusedReturnSizePtr
+  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl))
+    assert(!FD->hasAttr<StrictFPAttr>() && "NYI");
+
+  // TODO: alignment attributes
+
+  auto callLoc = loc;
+  mlir::cir::CIRCallOpInterface theCall = [&]() {
+    mlir::cir::FuncType indirectFuncTy;
+    mlir::Value indirectFuncVal;
+    mlir::cir::FuncOp directFuncOp;
+
+    if (auto fnOp = dyn_cast<mlir::cir::FuncOp>(CalleePtr)) {
+      directFuncOp = fnOp;
+    } else if (auto getGlobalOp = dyn_cast<mlir::cir::GetGlobalOp>(CalleePtr)) {
+      // FIXME(cir): This peephole optimization to avoids indirect calls for
+      // builtins. This should be fixed in the builting declaration instead by
+      // not emitting an unecessary get_global in the first place.
+      auto *globalOp = mlir::SymbolTable::lookupSymbolIn(CGM.getModule(),
+                                                         getGlobalOp.getName());
+      assert(getGlobalOp && "undefined global function");
+      directFuncOp = llvm::dyn_cast<mlir::cir::FuncOp>(globalOp);
+      assert(directFuncOp && "operation is not a function");
+    } else {
+      [[maybe_unused]] auto resultTypes = CalleePtr->getResultTypes();
+      [[maybe_unused]] auto FuncPtrTy =
+          mlir::dyn_cast<mlir::cir::PointerType>(resultTypes.front());
+      assert(FuncPtrTy &&
+             mlir::isa<mlir::cir::FuncType>(FuncPtrTy.getPointee()) &&
+             "expected pointer to function");
+
+      indirectFuncTy = CIRFuncTy;
+      indirectFuncVal = CalleePtr->getResult(0);
+    }
+
+    auto extraFnAttrs = mlir::cir::ExtraFuncAttributesAttr::get(
+        builder.getContext(), Attrs.getDictionary(builder.getContext()));
+
+    mlir::cir::CIRCallOpInterface callLikeOp = buildCallLikeOp(
+        *this, callLoc, indirectFuncTy, indirectFuncVal, directFuncOp,
+        CIRCallArgs, isInvoke, callingConv, extraFnAttrs);
+
+    if (E)
+      callLikeOp->setAttr(
+          "ast", mlir::cir::ASTCallExprAttr::get(builder.getContext(), *E));
+
+    if (callOrTryCall)
+      *callOrTryCall = callLikeOp;
+    return callLikeOp;
+  }();
+
+  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl))
+    assert(!FD->getAttr<CFGuardAttr>() && "NYI");
+
+  // TODO: set attributes on callop
+  // assert(!theCall.getResults().getType().front().isSignlessInteger() &&
+  //        "Vector NYI");
+  // TODO: LLVM models indirect calls via a null callee, how should we do this?
+  assert(!CGM.getLangOpts().ObjCAutoRefCount && "Not supported");
+  assert((!TargetDecl || !TargetDecl->hasAttr<NotTailCalledAttr>()) && "NYI");
+  assert(!getDebugInfo() && "No debug info yet");
+  assert((!TargetDecl || !TargetDecl->hasAttr<ErrorAttr>()) && "NYI");
+
+  // 4. Finish the call.
+
+  // If the call doesn't return, finish the basic block and clear the insertion
+  // point; this allows the rest of CIRGen to discard unreachable code.
+  // TODO: figure out how to support doesNotReturn
+
+  assert(!IsMustTail && "NYI");
+
+  // TODO: figure out writebacks? seems like ObjC only __autorelease
+
+  // TODO: cleanup argument memory at the end
+
+  // Extract the return value.
+  RValue ret = [&] {
+    switch (RetAI.getKind()) {
+    case ABIArgInfo::Direct: {
+      mlir::Type RetCIRTy = convertType(RetTy);
+      if (RetAI.getCoerceToType() == RetCIRTy && RetAI.getDirectOffset() == 0) {
+        switch (getEvaluationKind(RetTy)) {
+        case TEK_Aggregate: {
+          Address DestPtr = ReturnValue.getValue();
+          bool DestIsVolatile = ReturnValue.isVolatile();
+
+          if (!DestPtr.isValid()) {
+            DestPtr = CreateMemTemp(RetTy, callLoc, getCounterAggTmpAsString());
+            DestIsVolatile = false;
+          }
+
+          auto Results = theCall->getOpResults();
+          assert(Results.size() <= 1 && "multiple returns NYI");
+
+          SourceLocRAIIObject Loc{*this, callLoc};
+          buildAggregateStore(Results[0], DestPtr, DestIsVolatile);
+          return RValue::getAggregate(DestPtr);
+        }
+        case TEK_Scalar: {
+          // If the argument doesn't match, perform a bitcast to coerce it. This
+          // can happen due to trivial type mismatches.
+          auto Results = theCall->getOpResults();
+          assert(Results.size() <= 1 && "multiple returns NYI");
+          assert(Results[0].getType() == RetCIRTy && "Bitcast support NYI");
+          return RValue::get(Results[0]);
+        }
+        default:
+          llvm_unreachable("NYI");
+        }
+      } else {
+        llvm_unreachable("No other forms implemented yet.");
+      }
+    }
+
+    case ABIArgInfo::Ignore:
+      // If we are ignoring an argument that had a result, make sure to
+      // construct the appropriate return value for our caller.
+      return GetUndefRValue(RetTy);
+
+    default:
+      llvm_unreachable("NYI");
+    }
+
+    llvm_unreachable("NYI");
+    return RValue{};
+  }();
+
+  // TODO: implement assumed_aligned
+
+  // TODO: implement lifetime extensions
+
+  assert(RetTy.isDestructedType() != QualType::DK_nontrivial_c_struct && "NYI");
+
+  return ret;
+}
+
+mlir::Value CIRGenFunction::buildRuntimeCall(mlir::Location loc,
+                                             mlir::cir::FuncOp callee,
+                                             ArrayRef<mlir::Value> args) {
+  // TODO(cir): set the calling convention to this runtime call.
+  assert(!MissingFeatures::setCallingConv());
+
+  auto call = builder.createCallOp(loc, callee, args);
+  assert(call->getNumResults() <= 1 &&
+         "runtime functions have at most 1 result");
+
+  if (call->getNumResults() == 0)
+    return nullptr;
+
+  return call->getResult(0);
+}
+
+void CIRGenFunction::buildCallArg(CallArgList &args, const Expr *E,
+                                  QualType type) {
+  // TODO: Add the DisableDebugLocationUpdates helper
+  assert(!dyn_cast<ObjCIndirectCopyRestoreExpr>(E) && "NYI");
+
+  assert(type->isReferenceType() == E->isGLValue() &&
+         "reference binding to unmaterialized r-value!");
+
+  if (E->isGLValue()) {
+    assert(E->getObjectKind() == OK_Ordinary);
+    return args.add(buildReferenceBindingToExpr(E), type);
+  }
+
+  bool HasAggregateEvalKind = hasAggregateEvaluationKind(type);
+
+  // In the Microsoft C++ ABI, aggregate arguments are destructed by the callee.
+  // However, we still have to push an EH-only cleanup in case we unwind before
+  // we make it to the call.
+  if (type->isRecordType() &&
+      type->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
+    llvm_unreachable("Microsoft C++ ABI is NYI");
+  }
+
+  if (HasAggregateEvalKind && isa<ImplicitCastExpr>(E) &&
+      cast<CastExpr>(E)->getCastKind() == CK_LValueToRValue) {
+    LValue L = buildLValue(cast<CastExpr>(E)->getSubExpr());
+    assert(L.isSimple());
+    args.addUncopiedAggregate(L, type);
+    return;
+  }
+
+  args.add(buildAnyExprToTemp(E), type);
+}
+
+QualType CIRGenFunction::getVarArgType(const Expr *Arg) {
+  // System headers on Windows define NULL to 0 instead of 0LL on Win64. MSVC
+  // implicitly widens null pointer constants that are arguments to varargs
+  // functions to pointer-sized ints.
+  if (!getTarget().getTriple().isOSWindows())
+    return Arg->getType();
+
+  if (Arg->getType()->isIntegerType() &&
+      getContext().getTypeSize(Arg->getType()) <
+          getContext().getTargetInfo().getPointerWidth(LangAS::Default) &&
+      Arg->isNullPointerConstant(getContext(),
+                                 Expr::NPC_ValueDependentIsNotNull)) {
+    return getContext().getIntPtrType();
+  }
+
+  return Arg->getType();
+}
+
+/// Similar to buildAnyExpr(), however, the result will always be accessible
+/// even if no aggregate location is provided.
+RValue CIRGenFunction::buildAnyExprToTemp(const Expr *E) {
+  AggValueSlot AggSlot = AggValueSlot::ignored();
+
+  if (hasAggregateEvaluationKind(E->getType()))
+    AggSlot = CreateAggTemp(E->getType(), getLoc(E->getSourceRange()),
+                            getCounterAggTmpAsString());
+
+  return buildAnyExpr(E, AggSlot);
+}
+
+void CIRGenFunction::buildCallArgs(
+    CallArgList &Args, PrototypeWrapper Prototype,
+    llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange,
+    AbstractCallee AC, unsigned ParamsToSkip, EvaluationOrder Order) {
+
+  llvm::SmallVector<QualType, 16> ArgTypes;
+
+  assert((ParamsToSkip == 0 || Prototype.P) &&
+         "Can't skip parameters if type info is not provided");
+
+  // This variable only captures *explicitly* written conventions, not those
+  // applied by default via command line flags or target defaults, such as
+  // thiscall, appcs, stdcall via -mrtd, etc. Computing that correctly would
+  // require knowing if this is a C++ instance method or being able to see
+  // unprotyped FunctionTypes.
+  CallingConv ExplicitCC = CC_C;
+
+  // First, if a prototype was provided, use those argument types.
+  bool IsVariadic = false;
+  if (Prototype.P) {
+    const auto *MD = mlir::dyn_cast<const ObjCMethodDecl *>(Prototype.P);
+    assert(!MD && "ObjCMethodDecl NYI");
+
+    const auto *FPT = Prototype.P.get<const FunctionProtoType *>();
+    IsVariadic = FPT->isVariadic();
+    ExplicitCC = FPT->getExtInfo().getCC();
+    ArgTypes.assign(FPT->param_type_begin() + ParamsToSkip,
+                    FPT->param_type_end());
+  }
+
+  // If we still have any arguments, emit them using the type of the argument.
+  for (auto *A : llvm::drop_begin(ArgRange, ArgTypes.size()))
+    ArgTypes.push_back(IsVariadic ? getVarArgType(A) : A->getType());
+  assert((int)ArgTypes.size() == (ArgRange.end() - ArgRange.begin()));
+
+  // We must evaluate arguments from right to left in the MS C++ ABI, because
+  // arguments are destroyed left to right in the callee. As a special case,
+  // there are certain language constructs taht require left-to-right
+  // evaluation, and in those cases we consider the evaluation order requirement
+  // to trump the "destruction order is reverse construction order" guarantee.
+  bool LeftToRight = true;
+  assert(!CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee() &&
+         "MSABI NYI");
+  assert(!hasInAllocaArgs(CGM, ExplicitCC, ArgTypes) && "NYI");
+
+  auto MaybeEmitImplicitObjectSize = [&](unsigned I, const Expr *Arg,
+                                         RValue EmittedArg) {
+    if (!AC.hasFunctionDecl() || I >= AC.getNumParams())
+      return;
+    auto *PS = AC.getParamDecl(I)->getAttr<PassObjectSizeAttr>();
+    if (PS == nullptr)
+      return;
+
+    const auto &Context = getContext();
+    auto SizeTy = Context.getSizeType();
+    auto T = builder.getUIntNTy(Context.getTypeSize(SizeTy));
+    assert(EmittedArg.getScalarVal() && "We emitted nothing for the arg?");
+    auto V = evaluateOrEmitBuiltinObjectSize(
+        Arg, PS->getType(), T, EmittedArg.getScalarVal(), PS->isDynamic());
+    Args.add(RValue::get(V), SizeTy);
+    // If we're emitting args in reverse, be sure to do so with
+    // pass_object_size, as well.
+    if (!LeftToRight)
+      std::swap(Args.back(), *(&Args.back() - 1));
+  };
+
+  // Evaluate each argument in the appropriate order.
+  size_t CallArgsStart = Args.size();
+  for (unsigned I = 0, E = ArgTypes.size(); I != E; ++I) {
+    unsigned Idx = LeftToRight ? I : E - I - 1;
+    CallExpr::const_arg_iterator Arg = ArgRange.begin() + Idx;
+    unsigned InitialArgSize = Args.size();
+    assert(!isa<ObjCIndirectCopyRestoreExpr>(*Arg) && "NYI");
+    assert(!isa_and_nonnull<ObjCMethodDecl>(AC.getDecl()) && "NYI");
+
+    buildCallArg(Args, *Arg, ArgTypes[Idx]);
+    // In particular, we depend on it being the last arg in Args, and the
+    // objectsize bits depend on there only being one arg if !LeftToRight.
+    assert(InitialArgSize + 1 == Args.size() &&
+           "The code below depends on only adding one arg per buildCallArg");
+    (void)InitialArgSize;
+    // Since pointer argument are never emitted as LValue, it is safe to emit
+    // non-null argument check for r-value only.
+    if (!Args.back().hasLValue()) {
+      RValue RVArg = Args.back().getKnownRValue();
+      assert(!SanOpts.has(SanitizerKind::NonnullAttribute) && "Sanitizers NYI");
+      assert(!SanOpts.has(SanitizerKind::NullabilityArg) && "Sanitizers NYI");
+      // @llvm.objectsize should never have side-effects and shouldn't need
+      // destruction/cleanups, so we can safely "emit" it after its arg,
+      // regardless of right-to-leftness
+      MaybeEmitImplicitObjectSize(Idx, *Arg, RVArg);
+    }
+  }
+
+  if (!LeftToRight) {
+    // Un-reverse the arguments we just evaluated so they match up with the CIR
+    // function.
+    std::reverse(Args.begin() + CallArgsStart, Args.end());
+  }
+}
+
+/// Returns the canonical formal type of the given C++ method.
+static CanQual<FunctionProtoType> GetFormalType(const CXXMethodDecl *MD) {
+  return MD->getType()
+      ->getCanonicalTypeUnqualified()
+      .getAs<FunctionProtoType>();
+}
+
+/// TODO(cir): this should be shared with LLVM codegen
+static void addExtParameterInfosForCall(
+    llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
+    const FunctionProtoType *proto, unsigned prefixArgs, unsigned totalArgs) {
+  assert(proto->hasExtParameterInfos());
+  assert(paramInfos.size() <= prefixArgs);
+  assert(proto->getNumParams() + prefixArgs <= totalArgs);
+
+  paramInfos.reserve(totalArgs);
+
+  // Add default infos for any prefix args that don't already have infos.
+  paramInfos.resize(prefixArgs);
+
+  // Add infos for the prototype.
+  for (const auto &ParamInfo : proto->getExtParameterInfos()) {
+    paramInfos.push_back(ParamInfo);
+    // pass_object_size params have no parameter info.
+    if (ParamInfo.hasPassObjectSize())
+      paramInfos.emplace_back();
+  }
+
+  assert(paramInfos.size() <= totalArgs &&
+         "Did we forget to insert pass_object_size args?");
+  // Add default infos for the variadic and/or suffix arguments.
+  paramInfos.resize(totalArgs);
+}
+
+/// Adds the formal parameters in FPT to the given prefix. If any parameter in
+/// FPT has pass_object_size_attrs, then we'll add parameters for those, too.
+/// TODO(cir): this should be shared with LLVM codegen
+static void appendParameterTypes(
+    const CIRGenTypes &CGT, SmallVectorImpl<CanQualType> &prefix,
+    SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
+    CanQual<FunctionProtoType> FPT) {
+  // Fast path: don't touch param info if we don't need to.
+  if (!FPT->hasExtParameterInfos()) {
+    assert(paramInfos.empty() &&
+           "We have paramInfos, but the prototype doesn't?");
+    prefix.append(FPT->param_type_begin(), FPT->param_type_end());
+    return;
+  }
+
+  unsigned PrefixSize = prefix.size();
+  // In the vast majority of cases, we'll have precisely FPT->getNumParams()
+  // parameters; the only thing that can change this is the presence of
+  // pass_object_size. So, we preallocate for the common case.
+  prefix.reserve(prefix.size() + FPT->getNumParams());
+
+  auto ExtInfos = FPT->getExtParameterInfos();
+  assert(ExtInfos.size() == FPT->getNumParams());
+  for (unsigned I = 0, E = FPT->getNumParams(); I != E; ++I) {
+    prefix.push_back(FPT->getParamType(I));
+    if (ExtInfos[I].hasPassObjectSize())
+      prefix.push_back(CGT.getContext().getSizeType());
+  }
+
+  addExtParameterInfosForCall(paramInfos, FPT.getTypePtr(), PrefixSize,
+                              prefix.size());
+}
+
+const CIRGenFunctionInfo &
+CIRGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) {
+  auto *MD = cast<CXXMethodDecl>(GD.getDecl());
+
+  llvm::SmallVector<CanQualType, 16> argTypes;
+  SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
+  argTypes.push_back(DeriveThisType(MD->getParent(), MD));
+
+  bool PassParams = true;
+
+  if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
+    // A base class inheriting constructor doesn't get forwarded arguments
+    // needed to construct a virtual base (or base class thereof)
+    assert(!CD->getInheritedConstructor() && "Inheritance NYI");
+  }
+
+  CanQual<FunctionProtoType> FTP = GetFormalType(MD);
+
+  if (PassParams)
+    appendParameterTypes(*this, argTypes, paramInfos, FTP);
+
+  assert(paramInfos.empty() && "NYI");
+
+  assert(!MD->isVariadic() && "Variadic fns NYI");
+  RequiredArgs required = RequiredArgs::All;
+  (void)required;
+
+  FunctionType::ExtInfo extInfo = FTP->getExtInfo();
+
+  assert(!TheCXXABI.HasThisReturn(GD) && "NYI");
+
+  CanQualType resultType = Context.VoidTy;
+  (void)resultType;
+
+  return arrangeCIRFunctionInfo(resultType, FnInfoOpts::IsInstanceMethod,
+                                argTypes, extInfo, paramInfos, required);
+}
+
+/// Derives the 'this' type for CIRGen purposes, i.e. ignoring method CVR
+/// qualification. Either or both of RD and MD may be null. A null RD indicates
+/// that there is no meaningful 'this' type, and a null MD can occur when
+/// calling a method pointer.
+CanQualType CIRGenTypes::DeriveThisType(const CXXRecordDecl *RD,
+                                        const CXXMethodDecl *MD) {
+  QualType RecTy;
+  if (RD)
+    RecTy = getContext().getTagDeclType(RD)->getCanonicalTypeInternal();
+  else
+    assert(false && "CXXMethodDecl NYI");
+
+  if (MD)
+    RecTy = getContext().getAddrSpaceQualType(
+        RecTy, MD->getMethodQualifiers().getAddressSpace());
+  return getContext().getPointerType(CanQualType::CreateUnsafe(RecTy));
+}
+
+/// Arrange the CIR function layout for a value of the given function type, on
+/// top of any implicit parameters already stored.
+static const CIRGenFunctionInfo &
+arrangeCIRFunctionInfo(CIRGenTypes &CGT, FnInfoOpts instanceMethod,
+                       SmallVectorImpl<CanQualType> &prefix,
+                       CanQual<FunctionProtoType> FTP) {
+  SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
+  RequiredArgs Required = RequiredArgs::forPrototypePlus(FTP, prefix.size());
+  // FIXME: Kill copy. -- from codegen
+  appendParameterTypes(CGT, prefix, paramInfos, FTP);
+  CanQualType resultType = FTP->getReturnType().getUnqualifiedType();
+
+  return CGT.arrangeCIRFunctionInfo(resultType, instanceMethod, prefix,
+                                    FTP->getExtInfo(), paramInfos, Required);
+}
+
+/// Arrange the argument and result information for a value of the given
+/// freestanding function type.
+const CIRGenFunctionInfo &
+CIRGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) {
+  SmallVector<CanQualType, 16> argTypes;
+  return ::arrangeCIRFunctionInfo(*this, FnInfoOpts::None, argTypes, FTP);
+}
+
+/// Arrange the argument and result information for a value of the given
+/// unprototyped freestanding function type.
+const CIRGenFunctionInfo &
+CIRGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
+  // When translating an unprototyped function type, always use a
+  // variadic type.
+  return arrangeCIRFunctionInfo(FTNP->getReturnType().getUnqualifiedType(),
+                                FnInfoOpts::None, std::nullopt,
+                                FTNP->getExtInfo(), {}, RequiredArgs(0));
+}
+
+const CIRGenFunctionInfo &
+CIRGenTypes::arrangeBuiltinFunctionCall(QualType resultType,
+                                        const CallArgList &args) {
+  // FIXME: Kill copy.
+  SmallVector<CanQualType, 16> argTypes;
+  for (const auto &Arg : args)
+    argTypes.push_back(getContext().getCanonicalParamType(Arg.Ty));
+  llvm_unreachable("NYI");
+}
+
+/// Arrange a call to a C++ method, passing the given arguments.
+///
+/// ExtraPrefixArgs is the number of ABI-specific args passed after the `this`
+/// parameter.
+/// ExtraSuffixArgs is the number of ABI-specific args passed at the end of
+/// args.
+/// PassProtoArgs indicates whether `args` has args for the parameters in the
+/// given CXXConstructorDecl.
+const CIRGenFunctionInfo &CIRGenTypes::arrangeCXXConstructorCall(
+    const CallArgList &Args, const CXXConstructorDecl *D, CXXCtorType CtorKind,
+    unsigned ExtraPrefixArgs, unsigned ExtraSuffixArgs, bool PassProtoArgs) {
+
+  // FIXME: Kill copy.
+  llvm::SmallVector<CanQualType, 16> ArgTypes;
+  for (const auto &Arg : Args)
+    ArgTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
+
+  // +1 for implicit this, which should always be args[0]
+  unsigned TotalPrefixArgs = 1 + ExtraPrefixArgs;
+
+  CanQual<FunctionProtoType> FPT = GetFormalType(D);
+  RequiredArgs Required = PassProtoArgs
+                              ? RequiredArgs::forPrototypePlus(
+                                    FPT, TotalPrefixArgs + ExtraSuffixArgs)
+                              : RequiredArgs::All;
+
+  GlobalDecl GD(D, CtorKind);
+  assert(!TheCXXABI.HasThisReturn(GD) && "ThisReturn NYI");
+  assert(!TheCXXABI.hasMostDerivedReturn(GD) && "Most derived return NYI");
+  CanQualType ResultType = Context.VoidTy;
+
+  FunctionType::ExtInfo Info = FPT->getExtInfo();
+  llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> ParamInfos;
+  // If the prototype args are elided, we should onlyy have ABI-specific args,
+  // which never have param info.
+  assert(!FPT->hasExtParameterInfos() && "NYI");
+
+  return arrangeCIRFunctionInfo(ResultType, FnInfoOpts::IsInstanceMethod,
+                                ArgTypes, Info, ParamInfos, Required);
+}
+
+bool CIRGenTypes::inheritingCtorHasParams(const InheritedConstructor &Inherited,
+                                          CXXCtorType Type) {
+
+  // Parameters are unnecessary if we're constructing a base class subobject and
+  // the inherited constructor lives in a virtual base.
+  return Type == Ctor_Complete ||
+         !Inherited.getShadowDecl()->constructsVirtualBase() ||
+         !Target.getCXXABI().hasConstructorVariants();
+}
+
+bool CIRGenModule::MayDropFunctionReturn(const ASTContext &Context,
+                                         QualType ReturnType) {
+  // We can't just disard the return value for a record type with a complex
+  // destructor or a non-trivially copyable type.
+  if (const RecordType *RT =
+          ReturnType.getCanonicalType()->getAs<RecordType>()) {
+    llvm_unreachable("NYI");
+  }
+
+  return ReturnType.isTriviallyCopyableType(Context);
+}
+
+static bool isInAllocaArgument(CIRGenCXXABI &ABI, QualType type) {
+  const auto *RD = type->getAsCXXRecordDecl();
+  return RD &&
+         ABI.getRecordArgABI(RD) == CIRGenCXXABI::RecordArgABI::DirectInMemory;
+}
+
+void CIRGenFunction::buildDelegateCallArg(CallArgList &args,
+                                          const VarDecl *param,
+                                          SourceLocation loc) {
+  // StartFunction converted the ABI-lowered parameter(s) into a local alloca.
+  // We need to turn that into an r-value suitable for buildCall
+  Address local = GetAddrOfLocalVar(param);
+
+  QualType type = param->getType();
+
+  if (isInAllocaArgument(CGM.getCXXABI(), type)) {
+    llvm_unreachable("NYI");
+  }
+
+  // GetAddrOfLocalVar returns a pointer-to-pointer for references, but the
+  // argument needs to be the original pointer.
+  if (type->isReferenceType()) {
+    args.add(
+        RValue::get(builder.createLoad(getLoc(param->getSourceRange()), local)),
+        type);
+  } else if (getLangOpts().ObjCAutoRefCount) {
+    llvm_unreachable("NYI");
+    // For the most part, we just need to load the alloca, except that aggregate
+    // r-values are actually pointers to temporaries.
+  } else {
+    args.add(convertTempToRValue(local, type, loc), type);
+  }
+
+  // Deactivate the cleanup for the callee-destructed param that was pushed.
+  if (type->isRecordType() && !CurFuncIsThunk &&
+      type->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee() &&
+      param->needsDestruction(getContext())) {
+    llvm_unreachable("NYI");
+  }
+}
+
+/// Returns the "extra-canonicalized" return type, which discards qualifiers on
+/// the return type. Codegen doesn't care about them, and it makes ABI code a
+/// little easier to be able to assume that all parameter and return types are
+/// top-level unqualified.
+/// FIXME(CIR): This should be a common helper extracted from CodeGen
+static CanQualType GetReturnType(QualType RetTy) {
+  return RetTy->getCanonicalTypeUnqualified().getUnqualifiedType();
+}
+
+/// Arrange a call as unto a free function, except possibly with an additional
+/// number of formal parameters considered required.
+static const CIRGenFunctionInfo &
+arrangeFreeFunctionLikeCall(CIRGenTypes &CGT, CIRGenModule &CGM,
+                            const CallArgList &args, const FunctionType *fnType,
+                            unsigned numExtraRequiredArgs,
+                            FnInfoOpts chainCall) {
+  assert(args.size() >= numExtraRequiredArgs);
+  assert((chainCall != FnInfoOpts::IsChainCall) && "Chain call NYI");
+
+  llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
+
+  // In most cases, there are no optional arguments.
+  RequiredArgs required = RequiredArgs::All;
+
+  // If we have a variadic prototype, the required arguments are the
+  // extra prefix plus the arguments in the prototype.
+  if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fnType)) {
+    if (proto->isVariadic())
+      required = RequiredArgs::forPrototypePlus(proto, numExtraRequiredArgs);
+
+    if (proto->hasExtParameterInfos())
+      addExtParameterInfosForCall(paramInfos, proto, numExtraRequiredArgs,
+                                  args.size());
+  } else if (llvm::isa<FunctionNoProtoType>(fnType)) {
+    assert(!MissingFeatures::targetCodeGenInfoIsProtoCallVariadic());
+    required = RequiredArgs(args.size());
+  }
+
+  // FIXME: Kill copy.
+  SmallVector<CanQualType, 16> argTypes;
+  for (const auto &arg : args)
+    argTypes.push_back(CGT.getContext().getCanonicalParamType(arg.Ty));
+  return CGT.arrangeCIRFunctionInfo(GetReturnType(fnType->getReturnType()),
+                                    chainCall, argTypes, fnType->getExtInfo(),
+                                    paramInfos, required);
+}
+
+static llvm::SmallVector<CanQualType, 16>
+getArgTypesForCall(ASTContext &ctx, const CallArgList &args) {
+  llvm::SmallVector<CanQualType, 16> argTypes;
+  for (auto &arg : args)
+    argTypes.push_back(ctx.getCanonicalParamType(arg.Ty));
+  return argTypes;
+}
+
+static llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16>
+getExtParameterInfosForCall(const FunctionProtoType *proto, unsigned prefixArgs,
+                            unsigned totalArgs) {
+  llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> result;
+  if (proto->hasExtParameterInfos()) {
+    llvm_unreachable("NYI");
+  }
+  return result;
+}
+
+/// Arrange a call to a C++ method, passing the given arguments.
+///
+/// numPrefixArgs is the number of the ABI-specific prefix arguments we have. It
+/// does not count `this`.
+const CIRGenFunctionInfo &CIRGenTypes::arrangeCXXMethodCall(
+    const CallArgList &args, const FunctionProtoType *proto,
+    RequiredArgs required, unsigned numPrefixArgs) {
+  assert(numPrefixArgs + 1 <= args.size() &&
+         "Emitting a call with less args than the required prefix?");
+  // Add one to account for `this`. It is a bit awkard here, but we don't count
+  // `this` in similar places elsewhere.
+  auto paramInfos =
+      getExtParameterInfosForCall(proto, numPrefixArgs + 1, args.size());
+
+  // FIXME: Kill copy.
+  auto argTypes = getArgTypesForCall(Context, args);
+
+  auto info = proto->getExtInfo();
+  return arrangeCIRFunctionInfo(GetReturnType(proto->getReturnType()),
+                                FnInfoOpts::IsInstanceMethod, argTypes, info,
+                                paramInfos, required);
+}
+
+/// Figure out the rules for calling a function with the given formal type using
+/// the given arguments. The arguments are necessary because the function might
+/// be unprototyped, in which case it's target-dependent in crazy ways.
+const CIRGenFunctionInfo &CIRGenTypes::arrangeFreeFunctionCall(
+    const CallArgList &args, const FunctionType *fnType, bool ChainCall) {
+  assert(!ChainCall && "ChainCall NYI");
+  return arrangeFreeFunctionLikeCall(
+      *this, CGM, args, fnType, ChainCall ? 1 : 0,
+      ChainCall ? FnInfoOpts::IsChainCall : FnInfoOpts::None);
+}
+
+/// Set calling convention for CUDA/HIP kernel.
+static void setCUDAKernelCallingConvention(CanQualType &FTy, CIRGenModule &CGM,
+                                           const FunctionDecl *FD) {
+  if (FD->hasAttr<CUDAGlobalAttr>()) {
+    llvm_unreachable("NYI");
+  }
+}
+
+/// Arrange the argument and result information for a declaration or definition
+/// of the given C++ non-static member function. The member function must be an
+/// ordinary function, i.e. not a constructor or destructor.
+const CIRGenFunctionInfo &
+CIRGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
+  assert(!isa<CXXConstructorDecl>(MD) && "wrong method for constructors!");
+  assert(!isa<CXXDestructorDecl>(MD) && "wrong method for destructors!");
+
+  CanQualType FT = GetFormalType(MD).getAs<Type>();
+  setCUDAKernelCallingConvention(FT, CGM, MD);
+  auto prototype = FT.getAs<FunctionProtoType>();
+
+  if (MD->isInstance()) {
+    // The abstarct case is perfectly fine.
+    auto *ThisType = TheCXXABI.getThisArgumentTypeForMethod(MD);
+    return arrangeCXXMethodType(ThisType, prototype.getTypePtr(), MD);
+  }
+
+  return arrangeFreeFunctionType(prototype);
+}
+
+/// Arrange the argument and result information for a call to an unknown C++
+/// non-static member function of the given abstract type. (A null RD means we
+/// don't have any meaningful "this" argument type, so fall back to a generic
+/// pointer type). The member fucntion must be an ordinary function, i.e. not a
+/// constructor or destructor.
+const CIRGenFunctionInfo &
+CIRGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
+                                  const FunctionProtoType *FTP,
+                                  const CXXMethodDecl *MD) {
+  llvm::SmallVector<CanQualType, 16> argTypes;
+
+  // Add the 'this' pointer.
+  argTypes.push_back(DeriveThisType(RD, MD));
+
+  return ::arrangeCIRFunctionInfo(
+      *this, FnInfoOpts::IsChainCall, argTypes,
+      FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>());
+}
+
+/// Arrange the argument and result information for the declaration or
+/// definition of the given function.
+const CIRGenFunctionInfo &
+CIRGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
+  if (const auto *MD = dyn_cast<CXXMethodDecl>(FD))
+    if (MD->isInstance())
+      return arrangeCXXMethodDeclaration(MD);
+
+  auto FTy = FD->getType()->getCanonicalTypeUnqualified();
+
+  assert(isa<FunctionType>(FTy));
+  // TODO: setCUDAKernelCallingConvention
+
+  // When declaring a function without a prototype, always use a non-variadic
+  // type.
+  if (CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>()) {
+    return arrangeCIRFunctionInfo(noProto->getReturnType(), FnInfoOpts::None,
+                                  std::nullopt, noProto->getExtInfo(), {},
+                                  RequiredArgs::All);
+  }
+
+  return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>());
+}
+
+RValue CallArg::getRValue(CIRGenFunction &CGF, mlir::Location loc) const {
+  if (!HasLV)
+    return RV;
+  LValue Copy = CGF.makeAddrLValue(CGF.CreateMemTemp(Ty, loc), Ty);
+  CGF.buildAggregateCopy(Copy, LV, Ty, AggValueSlot::DoesNotOverlap,
+                         LV.isVolatile());
+  IsUsed = true;
+  return RValue::getAggregate(Copy.getAddress());
+}
+
+void CIRGenFunction::buildNonNullArgCheck(RValue RV, QualType ArgType,
+                                          SourceLocation ArgLoc,
+                                          AbstractCallee AC, unsigned ParmNum) {
+  if (!AC.getDecl() || !(SanOpts.has(SanitizerKind::NonnullAttribute) ||
+                         SanOpts.has(SanitizerKind::NullabilityArg)))
+    return;
+  llvm_unreachable("non-null arg check is NYI");
+}
+
+/* VarArg handling */
+
+// FIXME(cir): This completely abstracts away the ABI with a generic CIR Op. We
+// need to decide how to handle va_arg target-specific codegen.
+mlir::Value CIRGenFunction::buildVAArg(VAArgExpr *VE, Address &VAListAddr) {
+  assert(!VE->isMicrosoftABI() && "NYI");
+  auto loc = CGM.getLoc(VE->getExprLoc());
+  auto type = ConvertType(VE->getType());
+  auto vaList = buildVAListRef(VE->getSubExpr()).getPointer();
+  return builder.create<mlir::cir::VAArgOp>(loc, type, vaList);
+}
+
+static void getTrivialDefaultFunctionAttributes(
+    StringRef name, bool hasOptnone, const CodeGenOptions &codeGenOpts,
+    const LangOptions &langOpts, bool attrOnCallSite, CIRGenModule &CGM,
+    mlir::NamedAttrList &funcAttrs) {
+
+  if (langOpts.assumeFunctionsAreConvergent()) {
+    // Conservatively, mark all functions and calls in CUDA and OpenCL as
+    // convergent (meaning, they may call an intrinsically convergent op, such
+    // as __syncthreads() / barrier(), and so can't have certain optimizations
+    // applied around them).  LLVM will remove this attribute where it safely
+    // can.
+
+    auto convgt = mlir::cir::ConvergentAttr::get(CGM.getBuilder().getContext());
+    funcAttrs.set(convgt.getMnemonic(), convgt);
+  }
+}
+
+void CIRGenModule::getTrivialDefaultFunctionAttributes(
+    StringRef name, bool hasOptnone, bool attrOnCallSite,
+    mlir::NamedAttrList &funcAttrs) {
+  ::getTrivialDefaultFunctionAttributes(name, hasOptnone, getCodeGenOpts(),
+                                        getLangOpts(), attrOnCallSite, *this,
+                                        funcAttrs);
+}
+
+void CIRGenModule::getDefaultFunctionAttributes(
+    StringRef name, bool hasOptnone, bool attrOnCallSite,
+    mlir::NamedAttrList &funcAttrs) {
+  getTrivialDefaultFunctionAttributes(name, hasOptnone, attrOnCallSite,
+                                      funcAttrs);
+  // If we're just getting the default, get the default values for mergeable
+  // attributes.
+  if (!attrOnCallSite) {
+    // TODO(cir): addMergableDefaultFunctionAttributes(codeGenOpts, funcAttrs);
+  }
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.h b/clang/lib/CIR/CodeGen/CIRGenCall.h
new file mode 100644
index 000000000000..ea8e9e546352
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenCall.h
@@ -0,0 +1,295 @@
+//===----- CIRGenCall.h - Encapsulate calling convention details ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// These classes wrap the information about a call or function
+// definition used to handle ABI compliancy.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_CIRGENCALL_H
+#define LLVM_CLANG_LIB_CODEGEN_CIRGENCALL_H
+
+#include "CIRGenValue.h"
+
+#include "clang/AST/GlobalDecl.h"
+#include "clang/AST/Type.h"
+
+#include "llvm/ADT/SmallVector.h"
+
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+
+#include "mlir/IR/BuiltinOps.h"
+
+namespace cir {
+class CIRGenFunction;
+
+/// Abstract information about a function or function prototype.
+class CIRGenCalleeInfo {
+  const clang::FunctionProtoType *CalleeProtoTy;
+  clang::GlobalDecl CalleeDecl;
+
+public:
+  explicit CIRGenCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl() {}
+  CIRGenCalleeInfo(const clang::FunctionProtoType *calleeProtoTy,
+                   clang::GlobalDecl calleeDecl)
+      : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {}
+  CIRGenCalleeInfo(const clang::FunctionProtoType *calleeProtoTy)
+      : CalleeProtoTy(calleeProtoTy) {}
+  CIRGenCalleeInfo(clang::GlobalDecl calleeDecl)
+      : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {}
+
+  const clang::FunctionProtoType *getCalleeFunctionProtoType() const {
+    return CalleeProtoTy;
+  }
+  const clang::GlobalDecl getCalleeDecl() const { return CalleeDecl; }
+};
+
+/// All available information about a concrete callee.
+class CIRGenCallee {
+  enum class SpecialKind : uintptr_t {
+    Invalid,
+    Builtin,
+    PsuedoDestructor,
+    Virtual,
+
+    Last = Virtual
+  };
+
+  struct BuiltinInfoStorage {
+    const clang::FunctionDecl *Decl;
+    unsigned ID;
+  };
+  struct PseudoDestructorInfoStorage {
+    const clang::CXXPseudoDestructorExpr *Expr;
+  };
+  struct VirtualInfoStorage {
+    const clang::CallExpr *CE;
+    clang::GlobalDecl MD;
+    Address Addr;
+    mlir::cir::FuncType FTy;
+  };
+
+  SpecialKind KindOrFunctionPointer;
+
+  union {
+    CIRGenCalleeInfo AbstractInfo;
+    BuiltinInfoStorage BuiltinInfo;
+    PseudoDestructorInfoStorage PseudoDestructorInfo;
+    VirtualInfoStorage VirtualInfo;
+  };
+
+  explicit CIRGenCallee(SpecialKind kind) : KindOrFunctionPointer(kind) {}
+
+public:
+  CIRGenCallee() : KindOrFunctionPointer(SpecialKind::Invalid) {}
+
+  // Construct a callee. Call this constructor directly when this isn't a direct
+  // call.
+  CIRGenCallee(const CIRGenCalleeInfo &abstractInfo,
+               mlir::Operation *functionPtr)
+      : KindOrFunctionPointer(
+            SpecialKind(reinterpret_cast<uintptr_t>(functionPtr))) {
+    AbstractInfo = abstractInfo;
+    assert(functionPtr && "configuring callee without function pointer");
+    // TODO: codegen asserts functionPtr is a pointer
+    // TODO: codegen asserts functionPtr is either an opaque pointer type or a
+    // pointer to a function
+  }
+
+  static CIRGenCallee
+  forDirect(mlir::Operation *functionPtr,
+            const CIRGenCalleeInfo &abstractInfo = CIRGenCalleeInfo()) {
+    return CIRGenCallee(abstractInfo, functionPtr);
+  }
+
+  bool isBuiltin() const {
+    return KindOrFunctionPointer == SpecialKind::Builtin;
+  }
+
+  const clang::FunctionDecl *getBuiltinDecl() const {
+    assert(isBuiltin());
+    return BuiltinInfo.Decl;
+  }
+  unsigned getBuiltinID() const {
+    assert(isBuiltin());
+    return BuiltinInfo.ID;
+  }
+
+  static CIRGenCallee forBuiltin(unsigned builtinID,
+                                 const clang::FunctionDecl *builtinDecl) {
+    CIRGenCallee result(SpecialKind::Builtin);
+    result.BuiltinInfo.Decl = builtinDecl;
+    result.BuiltinInfo.ID = builtinID;
+    return result;
+  }
+
+  bool isPsuedoDestructor() const {
+    return KindOrFunctionPointer == SpecialKind::PsuedoDestructor;
+  }
+
+  bool isOrdinary() const {
+    return uintptr_t(KindOrFunctionPointer) > uintptr_t(SpecialKind::Last);
+  }
+
+  /// If this is a delayed callee computation of some sort, prepare a concrete
+  /// callee
+  CIRGenCallee prepareConcreteCallee(CIRGenFunction &CGF) const;
+
+  mlir::Operation *getFunctionPointer() const {
+    assert(isOrdinary());
+    return reinterpret_cast<mlir::Operation *>(KindOrFunctionPointer);
+  }
+
+  CIRGenCalleeInfo getAbstractInfo() const {
+    if (isVirtual())
+      return VirtualInfo.MD;
+    assert(isOrdinary());
+    return AbstractInfo;
+  }
+
+  bool isVirtual() const {
+    return KindOrFunctionPointer == SpecialKind::Virtual;
+  }
+
+  static CIRGenCallee forVirtual(const clang::CallExpr *CE,
+                                 clang::GlobalDecl MD, Address Addr,
+                                 mlir::cir::FuncType FTy) {
+    CIRGenCallee result(SpecialKind::Virtual);
+    result.VirtualInfo.CE = CE;
+    result.VirtualInfo.MD = MD;
+    result.VirtualInfo.Addr = Addr;
+    result.VirtualInfo.FTy = FTy;
+    return result;
+  }
+
+  const clang::CallExpr *getVirtualCallExpr() const {
+    assert(isVirtual());
+    return VirtualInfo.CE;
+  }
+
+  clang::GlobalDecl getVirtualMethodDecl() const {
+    assert(isVirtual());
+    return VirtualInfo.MD;
+  }
+  Address getThisAddress() const {
+    assert(isVirtual());
+    return VirtualInfo.Addr;
+  }
+  mlir::cir::FuncType getVirtualFunctionType() const {
+    assert(isVirtual());
+    return VirtualInfo.FTy;
+  }
+
+  void setFunctionPointer(mlir::Operation *functionPtr) {
+    assert(isOrdinary());
+    KindOrFunctionPointer =
+        SpecialKind(reinterpret_cast<uintptr_t>(functionPtr));
+  }
+};
+
+struct CallArg {
+private:
+  union {
+    RValue RV;
+    LValue LV; /// This argument is semantically a load from this l-value
+  };
+  bool HasLV;
+
+  /// A data-flow flag to make sure getRValue and/or copyInto are not
+  /// called twice for duplicated IR emission.
+  mutable bool IsUsed;
+
+public:
+  clang::QualType Ty;
+  CallArg(RValue rv, clang::QualType ty)
+      : RV(rv), HasLV(false), IsUsed(false), Ty(ty) {
+    (void)IsUsed;
+  }
+  CallArg(LValue lv, clang::QualType ty)
+      : LV(lv), HasLV(true), IsUsed(false), Ty(ty) {}
+
+  /// \returns an independent RValue. If the CallArg contains an LValue,
+  /// a temporary copy is returned.
+  RValue getRValue(CIRGenFunction &CGF, mlir::Location loc) const;
+
+  bool hasLValue() const { return HasLV; }
+
+  LValue getKnownLValue() const {
+    assert(HasLV && !IsUsed);
+    return LV;
+  }
+
+  RValue getKnownRValue() const {
+    assert(!HasLV && !IsUsed);
+    return RV;
+  }
+
+  bool isAggregate() const { return HasLV || RV.isAggregate(); }
+};
+
+class CallArgList : public llvm::SmallVector<CallArg, 8> {
+public:
+  CallArgList() {}
+
+  struct Writeback {
+    LValue Source;
+  };
+
+  void add(RValue rvalue, clang::QualType type) {
+    push_back(CallArg(rvalue, type));
+  }
+
+  void addUncopiedAggregate(LValue LV, clang::QualType type) {
+    push_back(CallArg(LV, type));
+  }
+
+  /// Add all the arguments from another CallArgList to this one. After doing
+  /// this, the old CallArgList retains its list of arguments, but must not
+  /// be used to emit a call.
+  void addFrom(const CallArgList &other) {
+    insert(end(), other.begin(), other.end());
+    // TODO: Writebacks, CleanupsToDeactivate, StackBase???
+  }
+};
+
+/// Type for representing both the decl and type of parameters to a function.
+/// The decl must be either a ParmVarDecl or ImplicitParamDecl.
+class FunctionArgList : public llvm::SmallVector<const clang::VarDecl *, 16> {};
+
+/// Contains the address where the return value of a function can be stored, and
+/// whether the address is volatile or not.
+class ReturnValueSlot {
+  Address Addr = Address::invalid();
+
+  // Return value slot flags
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned IsVolatile : 1;
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned IsUnused : 1;
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned IsExternallyDestructed : 1;
+
+public:
+  ReturnValueSlot()
+      : IsVolatile(false), IsUnused(false), IsExternallyDestructed(false) {}
+  ReturnValueSlot(Address Addr, bool IsVolatile, bool IsUnused = false,
+                  bool IsExternallyDestructed = false)
+      : Addr(Addr), IsVolatile(IsVolatile), IsUnused(IsUnused),
+        IsExternallyDestructed(IsExternallyDestructed) {}
+
+  bool isNull() const { return !Addr.isValid(); }
+  bool isVolatile() const { return IsVolatile; }
+  Address getValue() const { return Addr; }
+  bool isUnused() const { return IsUnused; }
+  bool isExternallyDestructed() const { return IsExternallyDestructed; }
+  Address getAddress() const { return Addr; }
+};
+
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
new file mode 100644
index 000000000000..9d8021d74400
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
@@ -0,0 +1,1765 @@
+//===--- CIRGenClass.cpp - Emit CIR Code for C++ classes --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code dealing with C++ code generation of classes
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenCXXABI.h"
+#include "CIRGenFunction.h"
+
+#include "clang/AST/EvaluatedExprVisitor.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/Basic/NoSanitizeList.h"
+#include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/MissingFeatures.h"
+
+using namespace clang;
+using namespace cir;
+
+/// Checks whether the given constructor is a valid subject for the
+/// complete-to-base constructor delgation optimization, i.e. emitting the
+/// complete constructor as a simple call to the base constructor.
+bool CIRGenFunction::IsConstructorDelegationValid(
+    const CXXConstructorDecl *Ctor) {
+
+  // Currently we disable the optimization for classes with virtual bases
+  // because (1) the address of parameter variables need to be consistent across
+  // all initializers but (2) the delegate function call necessarily creates a
+  // second copy of the parameter variable.
+  //
+  // The limiting example (purely theoretical AFAIK):
+  //   struct A { A(int &c) { c++; } };
+  //   struct A : virtual A {
+  //     B(int count) : A(count) { printf("%d\n", count); }
+  //   };
+  // ...although even this example could in principle be emitted as a delegation
+  // since the address of the parameter doesn't escape.
+  if (Ctor->getParent()->getNumVBases())
+    return false;
+
+  // We also disable the optimization for variadic functions because it's
+  // impossible to "re-pass" varargs.
+  if (Ctor->getType()->castAs<FunctionProtoType>()->isVariadic())
+    return false;
+
+  // FIXME: Decide if we can do a delegation of a delegating constructor.
+  if (Ctor->isDelegatingConstructor())
+    return false;
+
+  return true;
+}
+
+/// TODO(cir): strong candidate for AST helper to be shared between LLVM and CIR
+/// codegen.
+static bool isMemcpyEquivalentSpecialMember(const CXXMethodDecl *D) {
+  auto *CD = dyn_cast<CXXConstructorDecl>(D);
+  if (!(CD && CD->isCopyOrMoveConstructor()) &&
+      !D->isCopyAssignmentOperator() && !D->isMoveAssignmentOperator())
+    return false;
+
+  // We can emit a memcpy for a trivial copy or move constructor/assignment.
+  if (D->isTrivial() && !D->getParent()->mayInsertExtraPadding())
+    return true;
+
+  // We *must* emit a memcpy for a defaulted union copy or move op.
+  if (D->getParent()->isUnion() && D->isDefaulted())
+    return true;
+
+  return false;
+}
+
+namespace {
+/// TODO(cir): a lot of what we see under this namespace is a strong candidate
+/// to be shared between LLVM and CIR codegen.
+
+/// RAII object to indicate that codegen is copying the value representation
+/// instead of the object representation. Useful when copying a struct or
+/// class which has uninitialized members and we're only performing
+/// lvalue-to-rvalue conversion on the object but not its members.
+class CopyingValueRepresentation {
+public:
+  explicit CopyingValueRepresentation(CIRGenFunction &CGF)
+      : CGF(CGF), OldSanOpts(CGF.SanOpts) {
+    CGF.SanOpts.set(SanitizerKind::Bool, false);
+    CGF.SanOpts.set(SanitizerKind::Enum, false);
+  }
+  ~CopyingValueRepresentation() { CGF.SanOpts = OldSanOpts; }
+
+private:
+  CIRGenFunction &CGF;
+  SanitizerSet OldSanOpts;
+};
+
+class FieldMemcpyizer {
+public:
+  FieldMemcpyizer(CIRGenFunction &CGF, const CXXRecordDecl *ClassDecl,
+                  const VarDecl *SrcRec)
+      : CGF(CGF), ClassDecl(ClassDecl),
+        // SrcRec(SrcRec),
+        RecLayout(CGF.getContext().getASTRecordLayout(ClassDecl)),
+        FirstField(nullptr), LastField(nullptr), FirstFieldOffset(0),
+        LastFieldOffset(0), LastAddedFieldIndex(0) {
+    (void)SrcRec;
+  }
+
+  bool isMemcpyableField(FieldDecl *F) const {
+    // Never memcpy fields when we are adding poised paddings.
+    if (CGF.getContext().getLangOpts().SanitizeAddressFieldPadding)
+      return false;
+    Qualifiers Qual = F->getType().getQualifiers();
+    if (Qual.hasVolatile() || Qual.hasObjCLifetime())
+      return false;
+
+    return true;
+  }
+
+  void addMemcpyableField(FieldDecl *F) {
+    if (F->isZeroSize(CGF.getContext()))
+      return;
+    if (!FirstField)
+      addInitialField(F);
+    else
+      addNextField(F);
+  }
+
+  CharUnits getMemcpySize(uint64_t FirstByteOffset) const {
+    ASTContext &Ctx = CGF.getContext();
+    unsigned LastFieldSize =
+        LastField->isBitField()
+            ? LastField->getBitWidthValue(Ctx)
+            : Ctx.toBits(
+                  Ctx.getTypeInfoDataSizeInChars(LastField->getType()).Width);
+    uint64_t MemcpySizeBits = LastFieldOffset + LastFieldSize -
+                              FirstByteOffset + Ctx.getCharWidth() - 1;
+    CharUnits MemcpySize = Ctx.toCharUnitsFromBits(MemcpySizeBits);
+    return MemcpySize;
+  }
+
+  void buildMemcpy() {
+    // Give the subclass a chance to bail out if it feels the memcpy isn't worth
+    // it (e.g. Hasn't aggregated enough data).
+    if (!FirstField) {
+      return;
+    }
+
+    llvm_unreachable("NYI");
+  }
+
+  void reset() { FirstField = nullptr; }
+
+protected:
+  CIRGenFunction &CGF;
+  const CXXRecordDecl *ClassDecl;
+
+private:
+  void buildMemcpyIR(Address DestPtr, Address SrcPtr, CharUnits Size) {
+    llvm_unreachable("NYI");
+  }
+
+  void addInitialField(FieldDecl *F) {
+    FirstField = F;
+    LastField = F;
+    FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex());
+    LastFieldOffset = FirstFieldOffset;
+    LastAddedFieldIndex = F->getFieldIndex();
+  }
+
+  void addNextField(FieldDecl *F) {
+    // For the most part, the following invariant will hold:
+    //   F->getFieldIndex() == LastAddedFieldIndex + 1
+    // The one exception is that Sema won't add a copy-initializer for an
+    // unnamed bitfield, which will show up here as a gap in the sequence.
+    assert(F->getFieldIndex() >= LastAddedFieldIndex + 1 &&
+           "Cannot aggregate fields out of order.");
+    LastAddedFieldIndex = F->getFieldIndex();
+
+    // The 'first' and 'last' fields are chosen by offset, rather than field
+    // index. This allows the code to support bitfields, as well as regular
+    // fields.
+    uint64_t FOffset = RecLayout.getFieldOffset(F->getFieldIndex());
+    if (FOffset < FirstFieldOffset) {
+      FirstField = F;
+      FirstFieldOffset = FOffset;
+    } else if (FOffset >= LastFieldOffset) {
+      LastField = F;
+      LastFieldOffset = FOffset;
+    }
+  }
+
+  // const VarDecl *SrcRec;
+  const ASTRecordLayout &RecLayout;
+  FieldDecl *FirstField;
+  FieldDecl *LastField;
+  uint64_t FirstFieldOffset, LastFieldOffset;
+  unsigned LastAddedFieldIndex;
+};
+
+static void buildLValueForAnyFieldInitialization(CIRGenFunction &CGF,
+                                                 CXXCtorInitializer *MemberInit,
+                                                 LValue &LHS) {
+  FieldDecl *Field = MemberInit->getAnyMember();
+  if (MemberInit->isIndirectMemberInitializer()) {
+    llvm_unreachable("NYI");
+  } else {
+    LHS = CGF.buildLValueForFieldInitialization(LHS, Field, Field->getName());
+  }
+}
+
+static void buildMemberInitializer(CIRGenFunction &CGF,
+                                   const CXXRecordDecl *ClassDecl,
+                                   CXXCtorInitializer *MemberInit,
+                                   const CXXConstructorDecl *Constructor,
+                                   FunctionArgList &Args) {
+  // TODO: ApplyDebugLocation
+  assert(MemberInit->isAnyMemberInitializer() &&
+         "Mush have member initializer!");
+  assert(MemberInit->getInit() && "Must have initializer!");
+
+  // non-static data member initializers
+  FieldDecl *Field = MemberInit->getAnyMember();
+  QualType FieldType = Field->getType();
+
+  auto ThisPtr = CGF.LoadCXXThis();
+  QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
+  LValue LHS;
+
+  // If a base constructor is being emitted, create an LValue that has the
+  // non-virtual alignment.
+  if (CGF.CurGD.getCtorType() == Ctor_Base)
+    LHS = CGF.MakeNaturalAlignPointeeAddrLValue(ThisPtr, RecordTy);
+  else
+    LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+
+  buildLValueForAnyFieldInitialization(CGF, MemberInit, LHS);
+
+  // Special case: If we are in a copy or move constructor, and we are copying
+  // an array off PODs or classes with tirival copy constructors, ignore the AST
+  // and perform the copy we know is equivalent.
+  // FIXME: This is hacky at best... if we had a bit more explicit information
+  // in the AST, we could generalize it more easily.
+  const ConstantArrayType *Array =
+      CGF.getContext().getAsConstantArrayType(FieldType);
+  if (Array && Constructor->isDefaulted() &&
+      Constructor->isCopyOrMoveConstructor()) {
+    llvm_unreachable("NYI");
+  }
+
+  CGF.buildInitializerForField(Field, LHS, MemberInit->getInit());
+}
+
+class ConstructorMemcpyizer : public FieldMemcpyizer {
+private:
+  /// Get source argument for copy constructor. Returns null if not a copy
+  /// constructor.
+  static const VarDecl *getTrivialCopySource(CIRGenFunction &CGF,
+                                             const CXXConstructorDecl *CD,
+                                             FunctionArgList &Args) {
+    if (CD->isCopyOrMoveConstructor() && CD->isDefaulted())
+      return Args[CGF.CGM.getCXXABI().getSrcArgforCopyCtor(CD, Args)];
+
+    return nullptr;
+  }
+
+  // Returns true if a CXXCtorInitializer represents a member initialization
+  // that can be rolled into a memcpy.
+  bool isMemberInitMemcpyable(CXXCtorInitializer *MemberInit) const {
+    if (!MemcpyableCtor)
+      return false;
+
+    assert(!MissingFeatures::fieldMemcpyizerBuildMemcpy());
+    return false;
+  }
+
+public:
+  ConstructorMemcpyizer(CIRGenFunction &CGF, const CXXConstructorDecl *CD,
+                        FunctionArgList &Args)
+      : FieldMemcpyizer(CGF, CD->getParent(),
+                        getTrivialCopySource(CGF, CD, Args)),
+        ConstructorDecl(CD),
+        MemcpyableCtor(CD->isDefaulted() && CD->isCopyOrMoveConstructor() &&
+                       CGF.getLangOpts().getGC() == LangOptions::NonGC),
+        Args(Args) {}
+
+  void addMemberInitializer(CXXCtorInitializer *MemberInit) {
+    if (isMemberInitMemcpyable(MemberInit)) {
+      AggregatedInits.push_back(MemberInit);
+      addMemcpyableField(MemberInit->getMember());
+    } else {
+      buildAggregatedInits();
+      buildMemberInitializer(CGF, ConstructorDecl->getParent(), MemberInit,
+                             ConstructorDecl, Args);
+    }
+  }
+
+  void buildAggregatedInits() {
+    if (AggregatedInits.size() <= 1) {
+      // This memcpy is too small to be worthwhile. Fall back on default
+      // codegen.
+      if (!AggregatedInits.empty()) {
+        llvm_unreachable("NYI");
+      }
+      reset();
+      return;
+    }
+
+    pushEHDestructors();
+    buildMemcpy();
+    AggregatedInits.clear();
+  }
+
+  void pushEHDestructors() {
+    Address ThisPtr = CGF.LoadCXXThisAddress();
+    QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
+    LValue LHS = CGF.makeAddrLValue(ThisPtr, RecordTy);
+    (void)LHS;
+
+    for (unsigned i = 0; i < AggregatedInits.size(); ++i) {
+      CXXCtorInitializer *MemberInit = AggregatedInits[i];
+      QualType FieldType = MemberInit->getAnyMember()->getType();
+      QualType::DestructionKind dtorKind = FieldType.isDestructedType();
+      if (!CGF.needsEHCleanup(dtorKind))
+        continue;
+      LValue FieldLHS = LHS;
+      buildLValueForAnyFieldInitialization(CGF, MemberInit, FieldLHS);
+      CGF.pushEHDestroy(dtorKind, FieldLHS.getAddress(), FieldType);
+    }
+  }
+
+  void finish() { buildAggregatedInits(); }
+
+private:
+  const CXXConstructorDecl *ConstructorDecl;
+  bool MemcpyableCtor;
+  FunctionArgList &Args;
+  SmallVector<CXXCtorInitializer *, 16> AggregatedInits;
+};
+
+class AssignmentMemcpyizer : public FieldMemcpyizer {
+private:
+  // Returns the memcpyable field copied by the given statement, if one
+  // exists. Otherwise returns null.
+  FieldDecl *getMemcpyableField(Stmt *S) {
+    if (!AssignmentsMemcpyable)
+      return nullptr;
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(S)) {
+      // Recognise trivial assignments.
+      if (BO->getOpcode() != BO_Assign)
+        return nullptr;
+      MemberExpr *ME = dyn_cast<MemberExpr>(BO->getLHS());
+      if (!ME)
+        return nullptr;
+      FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
+      if (!Field || !isMemcpyableField(Field))
+        return nullptr;
+      Stmt *RHS = BO->getRHS();
+      if (ImplicitCastExpr *EC = dyn_cast<ImplicitCastExpr>(RHS))
+        RHS = EC->getSubExpr();
+      if (!RHS)
+        return nullptr;
+      if (MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS)) {
+        if (ME2->getMemberDecl() == Field)
+          return Field;
+      }
+      return nullptr;
+    } else if (CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(S)) {
+      CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MCE->getCalleeDecl());
+      if (!(MD && isMemcpyEquivalentSpecialMember(MD)))
+        return nullptr;
+      MemberExpr *IOA = dyn_cast<MemberExpr>(MCE->getImplicitObjectArgument());
+      if (!IOA)
+        return nullptr;
+      FieldDecl *Field = dyn_cast<FieldDecl>(IOA->getMemberDecl());
+      if (!Field || !isMemcpyableField(Field))
+        return nullptr;
+      MemberExpr *Arg0 = dyn_cast<MemberExpr>(MCE->getArg(0));
+      if (!Arg0 || Field != dyn_cast<FieldDecl>(Arg0->getMemberDecl()))
+        return nullptr;
+      return Field;
+    } else if (CallExpr *CE = dyn_cast<CallExpr>(S)) {
+      FunctionDecl *FD = dyn_cast<FunctionDecl>(CE->getCalleeDecl());
+      if (!FD || FD->getBuiltinID() != Builtin::BI__builtin_memcpy)
+        return nullptr;
+      Expr *DstPtr = CE->getArg(0);
+      if (ImplicitCastExpr *DC = dyn_cast<ImplicitCastExpr>(DstPtr))
+        DstPtr = DC->getSubExpr();
+      UnaryOperator *DUO = dyn_cast<UnaryOperator>(DstPtr);
+      if (!DUO || DUO->getOpcode() != UO_AddrOf)
+        return nullptr;
+      MemberExpr *ME = dyn_cast<MemberExpr>(DUO->getSubExpr());
+      if (!ME)
+        return nullptr;
+      FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
+      if (!Field || !isMemcpyableField(Field))
+        return nullptr;
+      Expr *SrcPtr = CE->getArg(1);
+      if (ImplicitCastExpr *SC = dyn_cast<ImplicitCastExpr>(SrcPtr))
+        SrcPtr = SC->getSubExpr();
+      UnaryOperator *SUO = dyn_cast<UnaryOperator>(SrcPtr);
+      if (!SUO || SUO->getOpcode() != UO_AddrOf)
+        return nullptr;
+      MemberExpr *ME2 = dyn_cast<MemberExpr>(SUO->getSubExpr());
+      if (!ME2 || Field != dyn_cast<FieldDecl>(ME2->getMemberDecl()))
+        return nullptr;
+      return Field;
+    }
+
+    return nullptr;
+  }
+
+  bool AssignmentsMemcpyable;
+  SmallVector<Stmt *, 16> AggregatedStmts;
+
+public:
+  AssignmentMemcpyizer(CIRGenFunction &CGF, const CXXMethodDecl *AD,
+                       FunctionArgList &Args)
+      : FieldMemcpyizer(CGF, AD->getParent(), Args[Args.size() - 1]),
+        AssignmentsMemcpyable(CGF.getLangOpts().getGC() == LangOptions::NonGC) {
+    assert(Args.size() == 2);
+  }
+
+  void emitAssignment(Stmt *S) {
+    FieldDecl *F = getMemcpyableField(S);
+    if (F) {
+      addMemcpyableField(F);
+      AggregatedStmts.push_back(S);
+    } else {
+      emitAggregatedStmts();
+      if (CGF.buildStmt(S, /*useCurrentScope=*/true).failed())
+        llvm_unreachable("Should not get here!");
+    }
+  }
+
+  void emitAggregatedStmts() {
+    if (AggregatedStmts.size() <= 1) {
+      if (!AggregatedStmts.empty()) {
+        CopyingValueRepresentation CVR(CGF);
+        if (CGF.buildStmt(AggregatedStmts[0], /*useCurrentScope=*/true)
+                .failed())
+          llvm_unreachable("Should not get here!");
+      }
+      reset();
+    }
+
+    buildMemcpy();
+    AggregatedStmts.clear();
+  }
+
+  void finish() { emitAggregatedStmts(); }
+};
+} // namespace
+
+static bool isInitializerOfDynamicClass(const CXXCtorInitializer *BaseInit) {
+  const Type *BaseType = BaseInit->getBaseClass();
+  const auto *BaseClassDecl =
+      cast<CXXRecordDecl>(BaseType->castAs<RecordType>()->getDecl());
+  return BaseClassDecl->isDynamicClass();
+}
+
+namespace {
+/// Call the destructor for a direct base class.
+struct CallBaseDtor final : EHScopeStack::Cleanup {
+  const CXXRecordDecl *BaseClass;
+  bool BaseIsVirtual;
+  CallBaseDtor(const CXXRecordDecl *Base, bool BaseIsVirtual)
+      : BaseClass(Base), BaseIsVirtual(BaseIsVirtual) {}
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    const CXXRecordDecl *DerivedClass =
+        cast<CXXMethodDecl>(CGF.CurCodeDecl)->getParent();
+
+    const CXXDestructorDecl *D = BaseClass->getDestructor();
+    // We are already inside a destructor, so presumably the object being
+    // destroyed should have the expected type.
+    QualType ThisTy = D->getFunctionObjectParameterType();
+    assert(CGF.currSrcLoc && "expected source location");
+    Address Addr = CGF.getAddressOfDirectBaseInCompleteClass(
+        *CGF.currSrcLoc, CGF.LoadCXXThisAddress(), DerivedClass, BaseClass,
+        BaseIsVirtual);
+    CGF.buildCXXDestructorCall(D, Dtor_Base, BaseIsVirtual,
+                               /*Delegating=*/false, Addr, ThisTy);
+  }
+};
+
+/// A visitor which checks whether an initializer uses 'this' in a
+/// way which requires the vtable to be properly set.
+struct DynamicThisUseChecker
+    : ConstEvaluatedExprVisitor<DynamicThisUseChecker> {
+  typedef ConstEvaluatedExprVisitor<DynamicThisUseChecker> super;
+
+  bool UsesThis;
+
+  DynamicThisUseChecker(const ASTContext &C) : super(C), UsesThis(false) {}
+
+  // Black-list all explicit and implicit references to 'this'.
+  //
+  // Do we need to worry about external references to 'this' derived
+  // from arbitrary code?  If so, then anything which runs arbitrary
+  // external code might potentially access the vtable.
+  void VisitCXXThisExpr(const CXXThisExpr *E) { UsesThis = true; }
+};
+} // end anonymous namespace
+
+static bool BaseInitializerUsesThis(ASTContext &C, const Expr *Init) {
+  DynamicThisUseChecker Checker(C);
+  Checker.Visit(Init);
+  return Checker.UsesThis;
+}
+
+/// Gets the address of a direct base class within a complete object.
+/// This should only be used for (1) non-virtual bases or (2) virtual bases
+/// when the type is known to be complete (e.g. in complete destructors).
+///
+/// The object pointed to by 'This' is assumed to be non-null.
+Address CIRGenFunction::getAddressOfDirectBaseInCompleteClass(
+    mlir::Location loc, Address This, const CXXRecordDecl *Derived,
+    const CXXRecordDecl *Base, bool BaseIsVirtual) {
+  // 'this' must be a pointer (in some address space) to Derived.
+  assert(This.getElementType() == ConvertType(Derived));
+
+  // Compute the offset of the virtual base.
+  CharUnits Offset;
+  const ASTRecordLayout &Layout = getContext().getASTRecordLayout(Derived);
+  if (BaseIsVirtual)
+    Offset = Layout.getVBaseClassOffset(Base);
+  else
+    Offset = Layout.getBaseClassOffset(Base);
+
+  // Shift and cast down to the base type.
+  // TODO: for complete types, this should be possible with a GEP.
+  Address V = This;
+  if (!Offset.isZero()) {
+    mlir::Value OffsetVal = builder.getSInt32(Offset.getQuantity(), loc);
+    mlir::Value VBaseThisPtr = builder.create<mlir::cir::PtrStrideOp>(
+        loc, This.getPointer().getType(), This.getPointer(), OffsetVal);
+    V = Address(VBaseThisPtr, CXXABIThisAlignment);
+  }
+  V = builder.createElementBitCast(loc, V, ConvertType(Base));
+  return V;
+}
+
+static void buildBaseInitializer(mlir::Location loc, CIRGenFunction &CGF,
+                                 const CXXRecordDecl *ClassDecl,
+                                 CXXCtorInitializer *BaseInit) {
+  assert(BaseInit->isBaseInitializer() && "Must have base initializer!");
+
+  Address ThisPtr = CGF.LoadCXXThisAddress();
+
+  const Type *BaseType = BaseInit->getBaseClass();
+  const auto *BaseClassDecl =
+      cast<CXXRecordDecl>(BaseType->castAs<RecordType>()->getDecl());
+
+  bool isBaseVirtual = BaseInit->isBaseVirtual();
+
+  // If the initializer for the base (other than the constructor
+  // itself) accesses 'this' in any way, we need to initialize the
+  // vtables.
+  if (BaseInitializerUsesThis(CGF.getContext(), BaseInit->getInit()))
+    CGF.initializeVTablePointers(loc, ClassDecl);
+
+  // We can pretend to be a complete class because it only matters for
+  // virtual bases, and we only do virtual bases for complete ctors.
+  Address V = CGF.getAddressOfDirectBaseInCompleteClass(
+      loc, ThisPtr, ClassDecl, BaseClassDecl, isBaseVirtual);
+  AggValueSlot AggSlot = AggValueSlot::forAddr(
+      V, Qualifiers(), AggValueSlot::IsDestructed,
+      AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased,
+      CGF.getOverlapForBaseInit(ClassDecl, BaseClassDecl, isBaseVirtual));
+
+  CGF.buildAggExpr(BaseInit->getInit(), AggSlot);
+
+  if (CGF.CGM.getLangOpts().Exceptions &&
+      !BaseClassDecl->hasTrivialDestructor())
+    CGF.EHStack.pushCleanup<CallBaseDtor>(EHCleanup, BaseClassDecl,
+                                          isBaseVirtual);
+}
+
+/// This routine generates necessary code to initialize base classes and
+/// non-static data members belonging to this constructor.
+void CIRGenFunction::buildCtorPrologue(const CXXConstructorDecl *CD,
+                                       CXXCtorType CtorType,
+                                       FunctionArgList &Args) {
+  if (CD->isDelegatingConstructor())
+    return buildDelegatingCXXConstructorCall(CD, Args);
+
+  const CXXRecordDecl *ClassDecl = CD->getParent();
+
+  CXXConstructorDecl::init_const_iterator B = CD->init_begin(),
+                                          E = CD->init_end();
+
+  // Virtual base initializers first, if any. They aren't needed if:
+  // - This is a base ctor variant
+  // - There are no vbases
+  // - The class is abstract, so a complete object of it cannot be constructed
+  //
+  // The check for an abstract class is necessary because sema may not have
+  // marked virtual base destructors referenced.
+  bool ConstructVBases = CtorType != Ctor_Base &&
+                         ClassDecl->getNumVBases() != 0 &&
+                         !ClassDecl->isAbstract();
+
+  // In the Microsoft C++ ABI, there are no constructor variants. Instead, the
+  // constructor of a class with virtual bases takes an additional parameter to
+  // conditionally construct the virtual bases. Emit that check here.
+  mlir::Block *BaseCtorContinueBB = nullptr;
+  if (ConstructVBases &&
+      !CGM.getTarget().getCXXABI().hasConstructorVariants()) {
+    llvm_unreachable("NYI");
+  }
+
+  auto const OldThis = CXXThisValue;
+  for (; B != E && (*B)->isBaseInitializer() && (*B)->isBaseVirtual(); B++) {
+    if (!ConstructVBases)
+      continue;
+    if (CGM.getCodeGenOpts().StrictVTablePointers &&
+        CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+        isInitializerOfDynamicClass(*B))
+      llvm_unreachable("NYI");
+    buildBaseInitializer(getLoc(CD->getBeginLoc()), *this, ClassDecl, *B);
+  }
+
+  if (BaseCtorContinueBB) {
+    llvm_unreachable("NYI");
+  }
+
+  // Then, non-virtual base initializers.
+  for (; B != E && (*B)->isBaseInitializer(); B++) {
+    assert(!(*B)->isBaseVirtual());
+
+    if (CGM.getCodeGenOpts().StrictVTablePointers &&
+        CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+        isInitializerOfDynamicClass(*B))
+      llvm_unreachable("NYI");
+    buildBaseInitializer(getLoc(CD->getBeginLoc()), *this, ClassDecl, *B);
+  }
+
+  CXXThisValue = OldThis;
+
+  initializeVTablePointers(getLoc(CD->getBeginLoc()), ClassDecl);
+
+  // And finally, initialize class members.
+  FieldConstructionScope FCS(*this, LoadCXXThisAddress());
+  ConstructorMemcpyizer CM(*this, CD, Args);
+  for (; B != E; B++) {
+    CXXCtorInitializer *Member = (*B);
+    assert(!Member->isBaseInitializer());
+    assert(Member->isAnyMemberInitializer() &&
+           "Delegating initializer on non-delegating constructor");
+    CM.addMemberInitializer(Member);
+  }
+  CM.finish();
+}
+
+static Address ApplyNonVirtualAndVirtualOffset(
+    mlir::Location loc, CIRGenFunction &CGF, Address addr,
+    CharUnits nonVirtualOffset, mlir::Value virtualOffset,
+    const CXXRecordDecl *derivedClass, const CXXRecordDecl *nearestVBase) {
+  // Assert that we have something to do.
+  assert(!nonVirtualOffset.isZero() || virtualOffset != nullptr);
+
+  // Compute the offset from the static and dynamic components.
+  mlir::Value baseOffset;
+  if (!nonVirtualOffset.isZero()) {
+    mlir::Type OffsetType =
+        (CGF.CGM.getTarget().getCXXABI().isItaniumFamily() &&
+         CGF.CGM.getItaniumVTableContext().isRelativeLayout())
+            ? CGF.SInt32Ty
+            : CGF.PtrDiffTy;
+    baseOffset = CGF.getBuilder().getConstInt(loc, OffsetType,
+                                              nonVirtualOffset.getQuantity());
+    if (virtualOffset) {
+      baseOffset = CGF.getBuilder().createBinop(
+          virtualOffset, mlir::cir::BinOpKind::Add, baseOffset);
+    }
+  } else {
+    baseOffset = virtualOffset;
+  }
+
+  // Apply the base offset.
+  mlir::Value ptr = addr.getPointer();
+  ptr = CGF.getBuilder().create<mlir::cir::PtrStrideOp>(loc, ptr.getType(), ptr,
+                                                        baseOffset);
+
+  // If we have a virtual component, the alignment of the result will
+  // be relative only to the known alignment of that vbase.
+  CharUnits alignment;
+  if (virtualOffset) {
+    assert(nearestVBase && "virtual offset without vbase?");
+    llvm_unreachable("NYI");
+    // alignment = CGF.CGM.getVBaseAlignment(addr.getAlignment(),
+    //                                       derivedClass, nearestVBase);
+  } else {
+    alignment = addr.getAlignment();
+  }
+  alignment = alignment.alignmentAtOffset(nonVirtualOffset);
+
+  return Address(ptr, alignment);
+}
+
+void CIRGenFunction::initializeVTablePointer(mlir::Location loc,
+                                             const VPtr &Vptr) {
+  // Compute the address point.
+  auto VTableAddressPoint = CGM.getCXXABI().getVTableAddressPointInStructor(
+      *this, Vptr.VTableClass, Vptr.Base, Vptr.NearestVBase);
+
+  if (!VTableAddressPoint)
+    return;
+
+  // Compute where to store the address point.
+  mlir::Value VirtualOffset{};
+  CharUnits NonVirtualOffset = CharUnits::Zero();
+
+  if (CGM.getCXXABI().isVirtualOffsetNeededForVTableField(*this, Vptr)) {
+    llvm_unreachable("NYI");
+  } else {
+    // We can just use the base offset in the complete class.
+    NonVirtualOffset = Vptr.Base.getBaseOffset();
+  }
+
+  // Apply the offsets.
+  Address VTableField = LoadCXXThisAddress();
+  if (!NonVirtualOffset.isZero() || VirtualOffset) {
+    VTableField = ApplyNonVirtualAndVirtualOffset(
+        loc, *this, VTableField, NonVirtualOffset, VirtualOffset,
+        Vptr.VTableClass, Vptr.NearestVBase);
+  }
+
+  // Finally, store the address point. Use the same CIR types as the field.
+  //
+  // vtable field is derived from `this` pointer, therefore they should be in
+  // the same addr space.
+  assert(!MissingFeatures::addressSpace());
+  VTableField = builder.createElementBitCast(loc, VTableField,
+                                             VTableAddressPoint.getType());
+  builder.createStore(loc, VTableAddressPoint, VTableField);
+  assert(!MissingFeatures::tbaa());
+}
+
+void CIRGenFunction::initializeVTablePointers(mlir::Location loc,
+                                              const CXXRecordDecl *RD) {
+  // Ignore classes without a vtable.
+  if (!RD->isDynamicClass())
+    return;
+
+  // Initialize the vtable pointers for this class and all of its bases.
+  if (CGM.getCXXABI().doStructorsInitializeVPtrs(RD))
+    for (const auto &Vptr : getVTablePointers(RD))
+      initializeVTablePointer(loc, Vptr);
+
+  if (RD->getNumVBases())
+    CGM.getCXXABI().initializeHiddenVirtualInheritanceMembers(*this, RD);
+}
+
+CIRGenFunction::VPtrsVector
+CIRGenFunction::getVTablePointers(const CXXRecordDecl *VTableClass) {
+  CIRGenFunction::VPtrsVector VPtrsResult;
+  VisitedVirtualBasesSetTy VBases;
+  getVTablePointers(BaseSubobject(VTableClass, CharUnits::Zero()),
+                    /*NearestVBase=*/nullptr,
+                    /*OffsetFromNearestVBase=*/CharUnits::Zero(),
+                    /*BaseIsNonVirtualPrimaryBase=*/false, VTableClass, VBases,
+                    VPtrsResult);
+  return VPtrsResult;
+}
+
+void CIRGenFunction::getVTablePointers(BaseSubobject Base,
+                                       const CXXRecordDecl *NearestVBase,
+                                       CharUnits OffsetFromNearestVBase,
+                                       bool BaseIsNonVirtualPrimaryBase,
+                                       const CXXRecordDecl *VTableClass,
+                                       VisitedVirtualBasesSetTy &VBases,
+                                       VPtrsVector &Vptrs) {
+  // If this base is a non-virtual primary base the address point has already
+  // been set.
+  if (!BaseIsNonVirtualPrimaryBase) {
+    // Initialize the vtable pointer for this base.
+    VPtr Vptr = {Base, NearestVBase, OffsetFromNearestVBase, VTableClass};
+    Vptrs.push_back(Vptr);
+  }
+
+  const CXXRecordDecl *RD = Base.getBase();
+
+  // Traverse bases.
+  for (const auto &I : RD->bases()) {
+    auto *BaseDecl =
+        cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
+
+    // Ignore classes without a vtable.
+    if (!BaseDecl->isDynamicClass())
+      continue;
+
+    CharUnits BaseOffset;
+    CharUnits BaseOffsetFromNearestVBase;
+    bool BaseDeclIsNonVirtualPrimaryBase;
+
+    if (I.isVirtual()) {
+      llvm_unreachable("NYI");
+    } else {
+      const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
+
+      BaseOffset = Base.getBaseOffset() + Layout.getBaseClassOffset(BaseDecl);
+      BaseOffsetFromNearestVBase =
+          OffsetFromNearestVBase + Layout.getBaseClassOffset(BaseDecl);
+      BaseDeclIsNonVirtualPrimaryBase = Layout.getPrimaryBase() == BaseDecl;
+    }
+
+    getVTablePointers(
+        BaseSubobject(BaseDecl, BaseOffset),
+        I.isVirtual() ? BaseDecl : NearestVBase, BaseOffsetFromNearestVBase,
+        BaseDeclIsNonVirtualPrimaryBase, VTableClass, VBases, Vptrs);
+  }
+}
+
+Address CIRGenFunction::LoadCXXThisAddress() {
+  assert(CurFuncDecl && "loading 'this' without a func declaration?");
+  assert(isa<CXXMethodDecl>(CurFuncDecl));
+
+  // Lazily compute CXXThisAlignment.
+  if (CXXThisAlignment.isZero()) {
+    // Just use the best known alignment for the parent.
+    // TODO: if we're currently emitting a complete-object ctor/dtor, we can
+    // always use the complete-object alignment.
+    auto RD = cast<CXXMethodDecl>(CurFuncDecl)->getParent();
+    CXXThisAlignment = CGM.getClassPointerAlignment(RD);
+  }
+
+  return Address(LoadCXXThis(), CXXThisAlignment);
+}
+
+void CIRGenFunction::buildInitializerForField(FieldDecl *Field, LValue LHS,
+                                              Expr *Init) {
+  QualType FieldType = Field->getType();
+  switch (getEvaluationKind(FieldType)) {
+  case TEK_Scalar:
+    if (LHS.isSimple()) {
+      buildExprAsInit(Init, Field, LHS, false);
+    } else {
+      llvm_unreachable("NYI");
+    }
+    break;
+  case TEK_Complex:
+    llvm_unreachable("NYI");
+    break;
+  case TEK_Aggregate: {
+    AggValueSlot Slot = AggValueSlot::forLValue(
+        LHS, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers,
+        AggValueSlot::IsNotAliased, getOverlapForFieldInit(Field),
+        AggValueSlot::IsNotZeroed,
+        // Checks are made by the code that calls constructor.
+        AggValueSlot::IsSanitizerChecked);
+    buildAggExpr(Init, Slot);
+    break;
+  }
+  }
+
+  // Ensure that we destroy this object if an exception is thrown later in the
+  // constructor.
+  QualType::DestructionKind dtorKind = FieldType.isDestructedType();
+  (void)dtorKind;
+  if (MissingFeatures::cleanups())
+    llvm_unreachable("NYI");
+}
+
+void CIRGenFunction::buildDelegateCXXConstructorCall(
+    const CXXConstructorDecl *Ctor, CXXCtorType CtorType,
+    const FunctionArgList &Args, SourceLocation Loc) {
+  CallArgList DelegateArgs;
+
+  FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
+  assert(I != E && "no parameters to constructor");
+
+  // this
+  Address This = LoadCXXThisAddress();
+  DelegateArgs.add(RValue::get(This.getPointer()), (*I)->getType());
+  ++I;
+
+  // FIXME: The location of the VTT parameter in the parameter list is specific
+  // to the Itanium ABI and shouldn't be hardcoded here.
+  if (CGM.getCXXABI().NeedsVTTParameter(CurGD)) {
+    llvm_unreachable("NYI");
+  }
+
+  // Explicit arguments.
+  for (; I != E; ++I) {
+    const VarDecl *param = *I;
+    // FIXME: per-argument source location
+    buildDelegateCallArg(DelegateArgs, param, Loc);
+  }
+
+  buildCXXConstructorCall(Ctor, CtorType, /*ForVirtualBase=*/false,
+                          /*Delegating=*/true, This, DelegateArgs,
+                          AggValueSlot::MayOverlap, Loc,
+                          /*NewPointerIsChecked=*/true);
+}
+
+void CIRGenFunction::buildImplicitAssignmentOperatorBody(
+    FunctionArgList &Args) {
+  const CXXMethodDecl *AssignOp = cast<CXXMethodDecl>(CurGD.getDecl());
+  const Stmt *RootS = AssignOp->getBody();
+  assert(isa<CompoundStmt>(RootS) &&
+         "Body of an implicit assignment operator should be compound stmt.");
+  const CompoundStmt *RootCS = cast<CompoundStmt>(RootS);
+
+  // LexicalScope Scope(*this, RootCS->getSourceRange());
+  // FIXME(cir): add all of the below under a new scope.
+
+  assert(!MissingFeatures::incrementProfileCounter());
+  AssignmentMemcpyizer AM(*this, AssignOp, Args);
+  for (auto *I : RootCS->body())
+    AM.emitAssignment(I);
+  AM.finish();
+}
+
+void CIRGenFunction::buildForwardingCallToLambda(
+    const CXXMethodDecl *callOperator, CallArgList &callArgs) {
+  // Get the address of the call operator.
+  const auto &calleeFnInfo =
+      CGM.getTypes().arrangeCXXMethodDeclaration(callOperator);
+  auto calleePtr = CGM.GetAddrOfFunction(
+      GlobalDecl(callOperator), CGM.getTypes().GetFunctionType(calleeFnInfo));
+
+  // Prepare the return slot.
+  const FunctionProtoType *FPT =
+      callOperator->getType()->castAs<FunctionProtoType>();
+  QualType resultType = FPT->getReturnType();
+  ReturnValueSlot returnSlot;
+  if (!resultType->isVoidType() &&
+      calleeFnInfo.getReturnInfo().getKind() == ABIArgInfo::Indirect &&
+      !hasScalarEvaluationKind(calleeFnInfo.getReturnType())) {
+    llvm_unreachable("NYI");
+  }
+
+  // We don't need to separately arrange the call arguments because
+  // the call can't be variadic anyway --- it's impossible to forward
+  // variadic arguments.
+
+  // Now emit our call.
+  auto callee = CIRGenCallee::forDirect(calleePtr, GlobalDecl(callOperator));
+  RValue RV = buildCall(calleeFnInfo, callee, returnSlot, callArgs);
+
+  // If necessary, copy the returned value into the slot.
+  if (!resultType->isVoidType() && returnSlot.isNull()) {
+    if (getLangOpts().ObjCAutoRefCount && resultType->isObjCRetainableType())
+      llvm_unreachable("NYI");
+    buildReturnOfRValue(*currSrcLoc, RV, resultType);
+  } else {
+    llvm_unreachable("NYI");
+  }
+}
+
+void CIRGenFunction::buildLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) {
+  const CXXRecordDecl *Lambda = MD->getParent();
+
+  // Start building arguments for forwarding call
+  CallArgList CallArgs;
+
+  QualType LambdaType = getContext().getRecordType(Lambda);
+  QualType ThisType = getContext().getPointerType(LambdaType);
+  Address ThisPtr =
+      CreateMemTemp(LambdaType, getLoc(MD->getSourceRange()), "unused.capture");
+  CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType);
+
+  // Add the rest of the parameters.
+  for (auto *Param : MD->parameters())
+    buildDelegateCallArg(CallArgs, Param, Param->getBeginLoc());
+
+  const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
+  // For a generic lambda, find the corresponding call operator specialization
+  // to which the call to the static-invoker shall be forwarded.
+  if (Lambda->isGenericLambda()) {
+    assert(MD->isFunctionTemplateSpecialization());
+    const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
+    FunctionTemplateDecl *CallOpTemplate =
+        CallOp->getDescribedFunctionTemplate();
+    void *InsertPos = nullptr;
+    FunctionDecl *CorrespondingCallOpSpecialization =
+        CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
+    assert(CorrespondingCallOpSpecialization);
+    CallOp = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
+  }
+  buildForwardingCallToLambda(CallOp, CallArgs);
+}
+
+void CIRGenFunction::buildLambdaStaticInvokeBody(const CXXMethodDecl *MD) {
+  if (MD->isVariadic()) {
+    // Codgen for LLVM doesn't emit code for this as well, it says:
+    // FIXME: Making this work correctly is nasty because it requires either
+    // cloning the body of the call operator or making the call operator
+    // forward.
+    llvm_unreachable("NYI");
+  }
+
+  buildLambdaDelegatingInvokeBody(MD);
+}
+
+void CIRGenFunction::destroyCXXObject(CIRGenFunction &CGF, Address addr,
+                                      QualType type) {
+  const RecordType *rtype = type->castAs<RecordType>();
+  const CXXRecordDecl *record = cast<CXXRecordDecl>(rtype->getDecl());
+  const CXXDestructorDecl *dtor = record->getDestructor();
+  // TODO(cir): Unlike traditional codegen, CIRGen should actually emit trivial
+  // dtors which shall be removed on later CIR passes. However, only remove this
+  // assertion once we get a testcase to exercise this path.
+  assert(!dtor->isTrivial());
+  CGF.buildCXXDestructorCall(dtor, Dtor_Complete, /*for vbase*/ false,
+                             /*Delegating=*/false, addr, type);
+}
+
+static bool FieldHasTrivialDestructorBody(ASTContext &Context,
+                                          const FieldDecl *Field);
+
+// FIXME(cir): this should be shared with traditional codegen.
+static bool
+HasTrivialDestructorBody(ASTContext &Context,
+                         const CXXRecordDecl *BaseClassDecl,
+                         const CXXRecordDecl *MostDerivedClassDecl) {
+  // If the destructor is trivial we don't have to check anything else.
+  if (BaseClassDecl->hasTrivialDestructor())
+    return true;
+
+  if (!BaseClassDecl->getDestructor()->hasTrivialBody())
+    return false;
+
+  // Check fields.
+  for (const auto *Field : BaseClassDecl->fields())
+    if (!FieldHasTrivialDestructorBody(Context, Field))
+      return false;
+
+  // Check non-virtual bases.
+  for (const auto &I : BaseClassDecl->bases()) {
+    if (I.isVirtual())
+      continue;
+
+    const CXXRecordDecl *NonVirtualBase =
+        cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
+    if (!HasTrivialDestructorBody(Context, NonVirtualBase,
+                                  MostDerivedClassDecl))
+      return false;
+  }
+
+  if (BaseClassDecl == MostDerivedClassDecl) {
+    // Check virtual bases.
+    for (const auto &I : BaseClassDecl->vbases()) {
+      const CXXRecordDecl *VirtualBase =
+          cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
+      if (!HasTrivialDestructorBody(Context, VirtualBase, MostDerivedClassDecl))
+        return false;
+    }
+  }
+
+  return true;
+}
+
+// FIXME(cir): this should be shared with traditional codegen.
+static bool FieldHasTrivialDestructorBody(ASTContext &Context,
+                                          const FieldDecl *Field) {
+  QualType FieldBaseElementType = Context.getBaseElementType(Field->getType());
+
+  const RecordType *RT = FieldBaseElementType->getAs<RecordType>();
+  if (!RT)
+    return true;
+
+  CXXRecordDecl *FieldClassDecl = cast<CXXRecordDecl>(RT->getDecl());
+
+  // The destructor for an implicit anonymous union member is never invoked.
+  if (FieldClassDecl->isUnion() && FieldClassDecl->isAnonymousStructOrUnion())
+    return false;
+
+  return HasTrivialDestructorBody(Context, FieldClassDecl, FieldClassDecl);
+}
+
+/// Check whether we need to initialize any vtable pointers before calling this
+/// destructor.
+/// FIXME(cir): this should be shared with traditional codegen.
+static bool CanSkipVTablePointerInitialization(CIRGenFunction &CGF,
+                                               const CXXDestructorDecl *Dtor) {
+  const CXXRecordDecl *ClassDecl = Dtor->getParent();
+  if (!ClassDecl->isDynamicClass())
+    return true;
+
+  // For a final class, the vtable pointer is known to already point to the
+  // class's vtable.
+  if (ClassDecl->isEffectivelyFinal())
+    return true;
+
+  if (!Dtor->hasTrivialBody())
+    return false;
+
+  // Check the fields.
+  for (const auto *Field : ClassDecl->fields())
+    if (!FieldHasTrivialDestructorBody(CGF.getContext(), Field))
+      return false;
+
+  return true;
+}
+
+/// Emits the body of the current destructor.
+void CIRGenFunction::buildDestructorBody(FunctionArgList &Args) {
+  const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CurGD.getDecl());
+  CXXDtorType DtorType = CurGD.getDtorType();
+
+  // For an abstract class, non-base destructors are never used (and can't
+  // be emitted in general, because vbase dtors may not have been validated
+  // by Sema), but the Itanium ABI doesn't make them optional and Clang may
+  // in fact emit references to them from other compilations, so emit them
+  // as functions containing a trap instruction.
+  if (DtorType != Dtor_Base && Dtor->getParent()->isAbstract()) {
+    SourceLocation Loc =
+        Dtor->hasBody() ? Dtor->getBody()->getBeginLoc() : Dtor->getLocation();
+    builder.create<mlir::cir::TrapOp>(getLoc(Loc));
+    // The corresponding clang/CodeGen logic clears the insertion point here,
+    // but MLIR's builder requires a valid insertion point, so we create a dummy
+    // block (since the trap is a block terminator).
+    builder.createBlock(builder.getBlock()->getParent());
+    return;
+  }
+
+  Stmt *Body = Dtor->getBody();
+  if (Body)
+    assert(!MissingFeatures::incrementProfileCounter());
+
+  // The call to operator delete in a deleting destructor happens
+  // outside of the function-try-block, which means it's always
+  // possible to delegate the destructor body to the complete
+  // destructor.  Do so.
+  if (DtorType == Dtor_Deleting) {
+    RunCleanupsScope DtorEpilogue(*this);
+    EnterDtorCleanups(Dtor, Dtor_Deleting);
+    if (HaveInsertPoint()) {
+      QualType ThisTy = Dtor->getFunctionObjectParameterType();
+      buildCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false,
+                             /*Delegating=*/false, LoadCXXThisAddress(),
+                             ThisTy);
+    }
+    return;
+  }
+
+  // If the body is a function-try-block, enter the try before
+  // anything else.
+  bool isTryBody = (Body && isa<CXXTryStmt>(Body));
+  if (isTryBody) {
+    llvm_unreachable("NYI");
+    // EnterCXXTryStmt(*cast<CXXTryStmt>(Body), true);
+  }
+  if (MissingFeatures::emitAsanPrologueOrEpilogue())
+    llvm_unreachable("NYI");
+
+  // Enter the epilogue cleanups.
+  RunCleanupsScope DtorEpilogue(*this);
+
+  // If this is the complete variant, just invoke the base variant;
+  // the epilogue will destruct the virtual bases.  But we can't do
+  // this optimization if the body is a function-try-block, because
+  // we'd introduce *two* handler blocks.  In the Microsoft ABI, we
+  // always delegate because we might not have a definition in this TU.
+  switch (DtorType) {
+  case Dtor_Comdat:
+    llvm_unreachable("not expecting a COMDAT");
+  case Dtor_Deleting:
+    llvm_unreachable("already handled deleting case");
+
+  case Dtor_Complete:
+    assert((Body || getTarget().getCXXABI().isMicrosoft()) &&
+           "can't emit a dtor without a body for non-Microsoft ABIs");
+
+    // Enter the cleanup scopes for virtual bases.
+    EnterDtorCleanups(Dtor, Dtor_Complete);
+
+    if (!isTryBody) {
+      QualType ThisTy = Dtor->getFunctionObjectParameterType();
+      buildCXXDestructorCall(Dtor, Dtor_Base, /*ForVirtualBase=*/false,
+                             /*Delegating=*/false, LoadCXXThisAddress(),
+                             ThisTy);
+      break;
+    }
+
+    // Fallthrough: act like we're in the base variant.
+    [[fallthrough]];
+
+  case Dtor_Base:
+    assert(Body);
+
+    // Enter the cleanup scopes for fields and non-virtual bases.
+    EnterDtorCleanups(Dtor, Dtor_Base);
+
+    // Initialize the vtable pointers before entering the body.
+    if (!CanSkipVTablePointerInitialization(*this, Dtor)) {
+      // Insert the llvm.launder.invariant.group intrinsic before initializing
+      // the vptrs to cancel any previous assumptions we might have made.
+      if (CGM.getCodeGenOpts().StrictVTablePointers &&
+          CGM.getCodeGenOpts().OptimizationLevel > 0)
+        llvm_unreachable("NYI");
+      llvm_unreachable("NYI");
+    }
+
+    if (isTryBody)
+      llvm_unreachable("NYI");
+    else if (Body)
+      (void)buildStmt(Body, /*useCurrentScope=*/true);
+    else {
+      assert(Dtor->isImplicit() && "bodyless dtor not implicit");
+      // nothing to do besides what's in the epilogue
+    }
+    // -fapple-kext must inline any call to this dtor into
+    // the caller's body.
+    if (getLangOpts().AppleKext)
+      llvm_unreachable("NYI");
+
+    break;
+  }
+
+  // Jump out through the epilogue cleanups.
+  DtorEpilogue.ForceCleanup();
+
+  // Exit the try if applicable.
+  if (isTryBody)
+    llvm_unreachable("NYI");
+}
+
+namespace {
+[[maybe_unused]] mlir::Value
+LoadThisForDtorDelete(CIRGenFunction &CGF, const CXXDestructorDecl *DD) {
+  if (Expr *ThisArg = DD->getOperatorDeleteThisArg())
+    return CGF.buildScalarExpr(ThisArg);
+  return CGF.LoadCXXThis();
+}
+
+/// Call the operator delete associated with the current destructor.
+struct CallDtorDelete final : EHScopeStack::Cleanup {
+  CallDtorDelete() {}
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CGF.CurCodeDecl);
+    const CXXRecordDecl *ClassDecl = Dtor->getParent();
+    CGF.buildDeleteCall(Dtor->getOperatorDelete(),
+                        LoadThisForDtorDelete(CGF, Dtor),
+                        CGF.getContext().getTagDeclType(ClassDecl));
+  }
+};
+} // namespace
+
+class DestroyField final : public EHScopeStack::Cleanup {
+  const FieldDecl *field;
+  CIRGenFunction::Destroyer *destroyer;
+  bool useEHCleanupForArray;
+
+public:
+  DestroyField(const FieldDecl *field, CIRGenFunction::Destroyer *destroyer,
+               bool useEHCleanupForArray)
+      : field(field), destroyer(destroyer),
+        useEHCleanupForArray(useEHCleanupForArray) {}
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    // Find the address of the field.
+    Address thisValue = CGF.LoadCXXThisAddress();
+    QualType RecordTy = CGF.getContext().getTagDeclType(field->getParent());
+    LValue ThisLV = CGF.makeAddrLValue(thisValue, RecordTy);
+    LValue LV = CGF.buildLValueForField(ThisLV, field);
+    assert(LV.isSimple());
+
+    CGF.emitDestroy(LV.getAddress(), field->getType(), destroyer,
+                    flags.isForNormalCleanup() && useEHCleanupForArray);
+  }
+};
+
+/// Emit all code that comes at the end of class's destructor. This is to call
+/// destructors on members and base classes in reverse order of their
+/// construction.
+///
+/// For a deleting destructor, this also handles the case where a destroying
+/// operator delete completely overrides the definition.
+void CIRGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
+                                       CXXDtorType DtorType) {
+  assert((!DD->isTrivial() || DD->hasAttr<DLLExportAttr>()) &&
+         "Should not emit dtor epilogue for non-exported trivial dtor!");
+
+  // The deleting-destructor phase just needs to call the appropriate
+  // operator delete that Sema picked up.
+  if (DtorType == Dtor_Deleting) {
+    assert(DD->getOperatorDelete() &&
+           "operator delete missing - EnterDtorCleanups");
+    if (CXXStructorImplicitParamValue) {
+      llvm_unreachable("NYI");
+    } else {
+      if (DD->getOperatorDelete()->isDestroyingOperatorDelete()) {
+        llvm_unreachable("NYI");
+      } else {
+        EHStack.pushCleanup<CallDtorDelete>(NormalAndEHCleanup);
+      }
+    }
+    return;
+  }
+
+  const CXXRecordDecl *ClassDecl = DD->getParent();
+
+  // Unions have no bases and do not call field destructors.
+  if (ClassDecl->isUnion())
+    return;
+
+  // The complete-destructor phase just destructs all the virtual bases.
+  if (DtorType == Dtor_Complete) {
+    // Poison the vtable pointer such that access after the base
+    // and member destructors are invoked is invalid.
+    if (CGM.getCodeGenOpts().SanitizeMemoryUseAfterDtor &&
+        SanOpts.has(SanitizerKind::Memory) && ClassDecl->getNumVBases() &&
+        ClassDecl->isPolymorphic())
+      assert(!MissingFeatures::sanitizeDtor());
+
+    // We push them in the forward order so that they'll be popped in
+    // the reverse order.
+    for (const auto &Base : ClassDecl->vbases()) {
+      auto *BaseClassDecl =
+          cast<CXXRecordDecl>(Base.getType()->castAs<RecordType>()->getDecl());
+
+      if (BaseClassDecl->hasTrivialDestructor()) {
+        // Under SanitizeMemoryUseAfterDtor, poison the trivial base class
+        // memory. For non-trival base classes the same is done in the class
+        // destructor.
+        assert(!MissingFeatures::sanitizeDtor());
+      } else {
+        EHStack.pushCleanup<CallBaseDtor>(NormalAndEHCleanup, BaseClassDecl,
+                                          /*BaseIsVirtual*/ true);
+      }
+    }
+
+    return;
+  }
+
+  assert(DtorType == Dtor_Base);
+  // Poison the vtable pointer if it has no virtual bases, but inherits
+  // virtual functions.
+  if (CGM.getCodeGenOpts().SanitizeMemoryUseAfterDtor &&
+      SanOpts.has(SanitizerKind::Memory) && !ClassDecl->getNumVBases() &&
+      ClassDecl->isPolymorphic())
+    assert(!MissingFeatures::sanitizeDtor());
+
+  // Destroy non-virtual bases.
+  for (const auto &Base : ClassDecl->bases()) {
+    // Ignore virtual bases.
+    if (Base.isVirtual())
+      continue;
+
+    CXXRecordDecl *BaseClassDecl = Base.getType()->getAsCXXRecordDecl();
+
+    if (BaseClassDecl->hasTrivialDestructor()) {
+      if (CGM.getCodeGenOpts().SanitizeMemoryUseAfterDtor &&
+          SanOpts.has(SanitizerKind::Memory) && !BaseClassDecl->isEmpty())
+        assert(!MissingFeatures::sanitizeDtor());
+    } else {
+      EHStack.pushCleanup<CallBaseDtor>(NormalAndEHCleanup, BaseClassDecl,
+                                        /*BaseIsVirtual*/ false);
+    }
+  }
+
+  // Poison fields such that access after their destructors are
+  // invoked, and before the base class destructor runs, is invalid.
+  bool SanitizeFields = CGM.getCodeGenOpts().SanitizeMemoryUseAfterDtor &&
+                        SanOpts.has(SanitizerKind::Memory);
+  assert(!MissingFeatures::sanitizeDtor());
+
+  // Destroy direct fields.
+  for (const auto *Field : ClassDecl->fields()) {
+    if (SanitizeFields)
+      assert(!MissingFeatures::sanitizeDtor());
+
+    QualType type = Field->getType();
+    QualType::DestructionKind dtorKind = type.isDestructedType();
+    if (!dtorKind)
+      continue;
+
+    // Anonymous union members do not have their destructors called.
+    const RecordType *RT = type->getAsUnionType();
+    if (RT && RT->getDecl()->isAnonymousStructOrUnion())
+      continue;
+
+    CleanupKind cleanupKind = getCleanupKind(dtorKind);
+    EHStack.pushCleanup<DestroyField>(
+        cleanupKind, Field, getDestroyer(dtorKind), cleanupKind & EHCleanup);
+  }
+
+  if (SanitizeFields)
+    assert(!MissingFeatures::sanitizeDtor());
+}
+
+namespace {
+struct CallDelegatingCtorDtor final : EHScopeStack::Cleanup {
+  const CXXDestructorDecl *Dtor;
+  Address Addr;
+  CXXDtorType Type;
+
+  CallDelegatingCtorDtor(const CXXDestructorDecl *D, Address Addr,
+                         CXXDtorType Type)
+      : Dtor(D), Addr(Addr), Type(Type) {}
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    // We are calling the destructor from within the constructor.
+    // Therefore, "this" should have the expected type.
+    QualType ThisTy = Dtor->getFunctionObjectParameterType();
+    CGF.buildCXXDestructorCall(Dtor, Type, /*ForVirtualBase=*/false,
+                               /*Delegating=*/true, Addr, ThisTy);
+  }
+};
+} // end anonymous namespace
+
+void CIRGenFunction::buildDelegatingCXXConstructorCall(
+    const CXXConstructorDecl *Ctor, const FunctionArgList &Args) {
+  assert(Ctor->isDelegatingConstructor());
+
+  Address ThisPtr = LoadCXXThisAddress();
+
+  AggValueSlot AggSlot = AggValueSlot::forAddr(
+      ThisPtr, Qualifiers(), AggValueSlot::IsDestructed,
+      AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased,
+      AggValueSlot::MayOverlap, AggValueSlot::IsNotZeroed,
+      // Checks are made by the code that calls constructor.
+      AggValueSlot::IsSanitizerChecked);
+
+  buildAggExpr(Ctor->init_begin()[0]->getInit(), AggSlot);
+
+  const CXXRecordDecl *ClassDecl = Ctor->getParent();
+  if (CGM.getLangOpts().Exceptions && !ClassDecl->hasTrivialDestructor()) {
+    CXXDtorType Type =
+        CurGD.getCtorType() == Ctor_Complete ? Dtor_Complete : Dtor_Base;
+
+    EHStack.pushCleanup<CallDelegatingCtorDtor>(
+        EHCleanup, ClassDecl->getDestructor(), ThisPtr, Type);
+  }
+}
+
+void CIRGenFunction::buildCXXDestructorCall(const CXXDestructorDecl *DD,
+                                            CXXDtorType Type,
+                                            bool ForVirtualBase,
+                                            bool Delegating, Address This,
+                                            QualType ThisTy) {
+  CGM.getCXXABI().buildDestructorCall(*this, DD, Type, ForVirtualBase,
+                                      Delegating, This, ThisTy);
+}
+
+mlir::Value CIRGenFunction::GetVTTParameter(GlobalDecl GD, bool ForVirtualBase,
+                                            bool Delegating) {
+  if (!CGM.getCXXABI().NeedsVTTParameter(GD)) {
+    // This constructor/destructor does not need a VTT parameter.
+    return nullptr;
+  }
+
+  const CXXRecordDecl *RD = cast<CXXMethodDecl>(CurCodeDecl)->getParent();
+  const CXXRecordDecl *Base = cast<CXXMethodDecl>(GD.getDecl())->getParent();
+
+  if (Delegating) {
+    llvm_unreachable("NYI");
+  } else if (RD == Base) {
+    llvm_unreachable("NYI");
+  } else {
+    llvm_unreachable("NYI");
+  }
+
+  if (CGM.getCXXABI().NeedsVTTParameter(CurGD)) {
+    llvm_unreachable("NYI");
+  } else {
+    llvm_unreachable("NYI");
+  }
+}
+
+Address
+CIRGenFunction::getAddressOfBaseClass(Address Value,
+                                      const CXXRecordDecl *Derived,
+                                      CastExpr::path_const_iterator PathBegin,
+                                      CastExpr::path_const_iterator PathEnd,
+                                      bool NullCheckValue, SourceLocation Loc) {
+  assert(PathBegin != PathEnd && "Base path should not be empty!");
+
+  CastExpr::path_const_iterator Start = PathBegin;
+  const CXXRecordDecl *VBase = nullptr;
+
+  // Sema has done some convenient canonicalization here: if the
+  // access path involved any virtual steps, the conversion path will
+  // *start* with a step down to the correct virtual base subobject,
+  // and hence will not require any further steps.
+  if ((*Start)->isVirtual()) {
+    llvm_unreachable("NYI");
+  }
+
+  // Compute the static offset of the ultimate destination within its
+  // allocating subobject (the virtual base, if there is one, or else
+  // the "complete" object that we see).
+  CharUnits NonVirtualOffset = CGM.computeNonVirtualBaseClassOffset(
+      VBase ? VBase : Derived, Start, PathEnd);
+
+  // If there's a virtual step, we can sometimes "devirtualize" it.
+  // For now, that's limited to when the derived type is final.
+  // TODO: "devirtualize" this for accesses to known-complete objects.
+  if (VBase && Derived->hasAttr<FinalAttr>()) {
+    llvm_unreachable("NYI");
+  }
+
+  // Get the base pointer type.
+  auto BaseValueTy = convertType((PathEnd[-1])->getType());
+  assert(!MissingFeatures::addressSpace());
+  // auto BasePtrTy = builder.getPointerTo(BaseValueTy);
+  // QualType DerivedTy = getContext().getRecordType(Derived);
+  // CharUnits DerivedAlign = CGM.getClassPointerAlignment(Derived);
+
+  // If the static offset is zero and we don't have a virtual step,
+  // just do a bitcast; null checks are unnecessary.
+  if (NonVirtualOffset.isZero() && !VBase) {
+    if (sanitizePerformTypeCheck()) {
+      llvm_unreachable("NYI");
+    }
+    return builder.createBaseClassAddr(getLoc(Loc), Value, BaseValueTy);
+  }
+
+  // Skip over the offset (and the vtable load) if we're supposed to
+  // null-check the pointer.
+  if (NullCheckValue) {
+    llvm_unreachable("NYI");
+  }
+
+  if (sanitizePerformTypeCheck()) {
+    llvm_unreachable("NYI");
+  }
+
+  // Compute the virtual offset.
+  mlir::Value VirtualOffset{};
+  if (VBase) {
+    llvm_unreachable("NYI");
+  }
+
+  // Apply both offsets.
+  Value = ApplyNonVirtualAndVirtualOffset(getLoc(Loc), *this, Value,
+                                          NonVirtualOffset, VirtualOffset,
+                                          Derived, VBase);
+  // Cast to the destination type.
+  Value = builder.createElementBitCast(Value.getPointer().getLoc(), Value,
+                                       BaseValueTy);
+
+  // Build a phi if we needed a null check.
+  if (NullCheckValue) {
+    llvm_unreachable("NYI");
+  }
+
+  llvm_unreachable("NYI");
+  return Value;
+}
+
+// TODO(cir): this can be shared with LLVM codegen.
+bool CIRGenFunction::shouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) {
+  if (!CGM.getCodeGenOpts().WholeProgramVTables ||
+      !CGM.HasHiddenLTOVisibility(RD))
+    return false;
+
+  if (CGM.getCodeGenOpts().VirtualFunctionElimination)
+    return true;
+
+  if (!SanOpts.has(SanitizerKind::CFIVCall) ||
+      !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall))
+    return false;
+
+  std::string TypeName = RD->getQualifiedNameAsString();
+  return !getContext().getNoSanitizeList().containsType(SanitizerKind::CFIVCall,
+                                                        TypeName);
+}
+
+void CIRGenFunction::buildTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
+                                                   mlir::Value VTable,
+                                                   SourceLocation Loc) {
+  if (SanOpts.has(SanitizerKind::CFIVCall)) {
+    llvm_unreachable("NYI");
+  } else if (CGM.getCodeGenOpts().WholeProgramVTables &&
+             // Don't insert type test assumes if we are forcing public
+             // visibility.
+             !CGM.AlwaysHasLTOVisibilityPublic(RD)) {
+    llvm_unreachable("NYI");
+  }
+}
+
+mlir::Value CIRGenFunction::getVTablePtr(mlir::Location Loc, Address This,
+                                         mlir::Type VTableTy,
+                                         const CXXRecordDecl *RD) {
+  Address VTablePtrSrc = builder.createElementBitCast(Loc, This, VTableTy);
+  auto VTable = builder.createLoad(Loc, VTablePtrSrc);
+  assert(!MissingFeatures::tbaa());
+
+  if (CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+      CGM.getCodeGenOpts().StrictVTablePointers) {
+    assert(!MissingFeatures::createInvariantGroup());
+  }
+
+  return VTable;
+}
+
+Address CIRGenFunction::buildCXXMemberDataPointerAddress(
+    const Expr *E, Address base, mlir::Value memberPtr,
+    const MemberPointerType *memberPtrType, LValueBaseInfo *baseInfo) {
+  assert(!MissingFeatures::cxxABI());
+
+  auto op = builder.createGetIndirectMember(getLoc(E->getSourceRange()),
+                                            base.getPointer(), memberPtr);
+
+  QualType memberType = memberPtrType->getPointeeType();
+  CharUnits memberAlign = CGM.getNaturalTypeAlignment(memberType, baseInfo);
+  memberAlign = CGM.getDynamicOffsetAlignment(
+      base.getAlignment(), memberPtrType->getClass()->getAsCXXRecordDecl(),
+      memberAlign);
+
+  return Address(op, convertTypeForMem(memberPtrType->getPointeeType()),
+                 memberAlign);
+}
+
+clang::CharUnits
+CIRGenModule::getDynamicOffsetAlignment(clang::CharUnits actualBaseAlign,
+                                        const clang::CXXRecordDecl *baseDecl,
+                                        clang::CharUnits expectedTargetAlign) {
+  // If the base is an incomplete type (which is, alas, possible with
+  // member pointers), be pessimistic.
+  if (!baseDecl->isCompleteDefinition())
+    return std::min(actualBaseAlign, expectedTargetAlign);
+
+  auto &baseLayout = getASTContext().getASTRecordLayout(baseDecl);
+  CharUnits expectedBaseAlign = baseLayout.getNonVirtualAlignment();
+
+  // If the class is properly aligned, assume the target offset is, too.
+  //
+  // This actually isn't necessarily the right thing to do --- if the
+  // class is a complete object, but it's only properly aligned for a
+  // base subobject, then the alignments of things relative to it are
+  // probably off as well.  (Note that this requires the alignment of
+  // the target to be greater than the NV alignment of the derived
+  // class.)
+  //
+  // However, our approach to this kind of under-alignment can only
+  // ever be best effort; after all, we're never going to propagate
+  // alignments through variables or parameters.  Note, in particular,
+  // that constructing a polymorphic type in an address that's less
+  // than pointer-aligned will generally trap in the constructor,
+  // unless we someday add some sort of attribute to change the
+  // assumed alignment of 'this'.  So our goal here is pretty much
+  // just to allow the user to explicitly say that a pointer is
+  // under-aligned and then safely access its fields and vtables.
+  if (actualBaseAlign >= expectedBaseAlign) {
+    return expectedTargetAlign;
+  }
+
+  // Otherwise, we might be offset by an arbitrary multiple of the
+  // actual alignment.  The correct adjustment is to take the min of
+  // the two alignments.
+  return std::min(actualBaseAlign, expectedTargetAlign);
+}
+
+/// Emit a loop to call a particular constructor for each of several members of
+/// an array.
+///
+/// \param ctor the constructor to call for each element
+/// \param arrayType the type of the array to initialize
+/// \param arrayBegin an arrayType*
+/// \param zeroInitialize true if each element should be
+///   zero-initialized before it is constructed
+void CIRGenFunction::buildCXXAggrConstructorCall(
+    const CXXConstructorDecl *ctor, const clang::ArrayType *arrayType,
+    Address arrayBegin, const CXXConstructExpr *E, bool NewPointerIsChecked,
+    bool zeroInitialize) {
+  QualType elementType;
+  auto numElements = buildArrayLength(arrayType, elementType, arrayBegin);
+  buildCXXAggrConstructorCall(ctor, numElements, arrayBegin, E,
+                              NewPointerIsChecked, zeroInitialize);
+}
+
+/// Emit a loop to call a particular constructor for each of several members of
+/// an array.
+///
+/// \param ctor the constructor to call for each element
+/// \param numElements the number of elements in the array;
+///   may be zero
+/// \param arrayBase a T*, where T is the type constructed by ctor
+/// \param zeroInitialize true if each element should be
+///   zero-initialized before it is constructed
+void CIRGenFunction::buildCXXAggrConstructorCall(
+    const CXXConstructorDecl *ctor, mlir::Value numElements, Address arrayBase,
+    const CXXConstructExpr *E, bool NewPointerIsChecked, bool zeroInitialize) {
+  // It's legal for numElements to be zero.  This can happen both
+  // dynamically, because x can be zero in 'new A[x]', and statically,
+  // because of GCC extensions that permit zero-length arrays.  There
+  // are probably legitimate places where we could assume that this
+  // doesn't happen, but it's not clear that it's worth it.
+  // llvm::BranchInst *zeroCheckBranch = nullptr;
+
+  // Optimize for a constant count.
+  auto constantCount =
+      dyn_cast<mlir::cir::ConstantOp>(numElements.getDefiningOp());
+  if (constantCount) {
+    auto constIntAttr =
+        mlir::dyn_cast<mlir::cir::IntAttr>(constantCount.getValue());
+    // Just skip out if the constant count is zero.
+    if (constIntAttr && constIntAttr.getUInt() == 0)
+      return;
+    // Otherwise, emit the check.
+  } else {
+    llvm_unreachable("NYI");
+  }
+
+  auto arrayTy =
+      mlir::dyn_cast<mlir::cir::ArrayType>(arrayBase.getElementType());
+  assert(arrayTy && "expected array type");
+  auto elementType = arrayTy.getEltType();
+  auto ptrToElmType = builder.getPointerTo(elementType);
+
+  // Tradional LLVM codegen emits a loop here.
+  // TODO(cir): Lower to a loop as part of LoweringPrepare.
+
+  // The alignment of the base, adjusted by the size of a single element,
+  // provides a conservative estimate of the alignment of every element.
+  // (This assumes we never start tracking offsetted alignments.)
+  //
+  // Note that these are complete objects and so we don't need to
+  // use the non-virtual size or alignment.
+  QualType type = getContext().getTypeDeclType(ctor->getParent());
+  CharUnits eltAlignment = arrayBase.getAlignment().alignmentOfArrayElement(
+      getContext().getTypeSizeInChars(type));
+
+  // Zero initialize the storage, if requested.
+  if (zeroInitialize) {
+    llvm_unreachable("NYI");
+  }
+
+  // C++ [class.temporary]p4:
+  // There are two contexts in which temporaries are destroyed at a different
+  // point than the end of the full-expression. The first context is when a
+  // default constructor is called to initialize an element of an array.
+  // If the constructor has one or more default arguments, the destruction of
+  // every temporary created in a default argument expression is sequenced
+  // before the construction of the next array element, if any.
+  {
+    RunCleanupsScope Scope(*this);
+
+    // Evaluate the constructor and its arguments in a regular
+    // partial-destroy cleanup.
+    if (getLangOpts().Exceptions &&
+        !ctor->getParent()->hasTrivialDestructor()) {
+      llvm_unreachable("NYI");
+    }
+
+    // Wmit the constructor call that will execute for every array element.
+    builder.create<mlir::cir::ArrayCtor>(
+        *currSrcLoc, arrayBase.getPointer(),
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          auto arg = b.getInsertionBlock()->addArgument(ptrToElmType, loc);
+          Address curAddr = Address(arg, ptrToElmType, eltAlignment);
+          auto currAVS = AggValueSlot::forAddr(
+              curAddr, type.getQualifiers(), AggValueSlot::IsDestructed,
+              AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased,
+              AggValueSlot::DoesNotOverlap, AggValueSlot::IsNotZeroed,
+              NewPointerIsChecked ? AggValueSlot::IsSanitizerChecked
+                                  : AggValueSlot::IsNotSanitizerChecked);
+          buildCXXConstructorCall(ctor, Ctor_Complete, /*ForVirtualBase=*/false,
+                                  /*Delegating=*/false, currAVS, E);
+          builder.create<mlir::cir::YieldOp>(loc);
+        });
+  }
+
+  if (constantCount.use_empty())
+    constantCount.erase();
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
new file mode 100644
index 000000000000..f6de23ca89b8
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
@@ -0,0 +1,694 @@
+//===--- CIRGenCleanup.cpp - Bookkeeping and code emission for cleanups ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code dealing with the IR generation for cleanups
+// and related information.
+//
+// A "cleanup" is a piece of code which needs to be executed whenever
+// control transfers out of a particular scope.  This can be
+// conditionalized to occur only on exceptional control flow, only on
+// normal control flow, or both.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SaveAndRestore.h"
+
+#include "CIRGenCleanup.h"
+#include "CIRGenFunction.h"
+
+using namespace cir;
+using namespace clang;
+using namespace mlir::cir;
+
+//===----------------------------------------------------------------------===//
+// CIRGenFunction cleanup related
+//===----------------------------------------------------------------------===//
+
+/// Build a unconditional branch to the lexical scope cleanup block
+/// or with the labeled blocked if already solved.
+///
+/// Track on scope basis, goto's we need to fix later.
+mlir::cir::BrOp CIRGenFunction::buildBranchThroughCleanup(mlir::Location Loc,
+                                                          JumpDest Dest) {
+  // Remove this once we go for making sure unreachable code is
+  // well modeled (or not).
+  assert(builder.getInsertionBlock() && "not yet implemented");
+  assert(!MissingFeatures::ehStack());
+
+  // Insert a branch: to the cleanup block (unsolved) or to the already
+  // materialized label. Keep track of unsolved goto's.
+  return builder.create<BrOp>(Loc, Dest.isValid() ? Dest.getBlock()
+                                                  : ReturnBlock().getBlock());
+}
+
+/// Emits all the code to cause the given temporary to be cleaned up.
+void CIRGenFunction::buildCXXTemporary(const CXXTemporary *Temporary,
+                                       QualType TempType, Address Ptr) {
+  pushDestroy(NormalAndEHCleanup, Ptr, TempType, destroyCXXObject,
+              /*useEHCleanup*/ true);
+}
+
+Address CIRGenFunction::createCleanupActiveFlag() {
+  mlir::Location loc = currSrcLoc ? *currSrcLoc : builder.getUnknownLoc();
+
+  // Create a variable to decide whether the cleanup needs to be run.
+  // FIXME: set the insertion point for the alloca to be at the entry
+  // basic block of the previous scope, not the entry block of the function.
+  Address active = CreateTempAllocaWithoutCast(
+      builder.getBoolTy(), CharUnits::One(), loc, "cleanup.cond");
+  mlir::Value falseVal, trueVal;
+  {
+    // Place true/false flags close to their allocas.
+    mlir::OpBuilder::InsertionGuard guard(builder);
+    builder.setInsertionPointAfterValue(active.getPointer());
+    falseVal = builder.getFalse(loc);
+    trueVal = builder.getTrue(loc);
+  }
+
+  // Initialize it to false at a site that's guaranteed to be run
+  // before each evaluation.
+  setBeforeOutermostConditional(falseVal, active);
+
+  // Initialize it to true at the current location.
+  builder.createStore(loc, trueVal, active);
+  return active;
+}
+
+DominatingValue<RValue>::saved_type
+DominatingValue<RValue>::saved_type::save(CIRGenFunction &cgf, RValue rv) {
+  if (rv.isScalar()) {
+    mlir::Value val = rv.getScalarVal();
+    return saved_type(DominatingCIRValue::save(cgf, val),
+                      DominatingCIRValue::needsSaving(val) ? ScalarAddress
+                                                           : ScalarLiteral);
+  }
+
+  if (rv.isComplex()) {
+    llvm_unreachable("complex NYI");
+  }
+
+  llvm_unreachable("aggregate NYI");
+}
+
+/// Given a saved r-value produced by SaveRValue, perform the code
+/// necessary to restore it to usability at the current insertion
+/// point.
+RValue DominatingValue<RValue>::saved_type::restore(CIRGenFunction &CGF) {
+  switch (K) {
+  case ScalarLiteral:
+  case ScalarAddress:
+    return RValue::get(DominatingCIRValue::restore(CGF, Vals.first));
+  case AggregateLiteral:
+  case AggregateAddress:
+    return RValue::getAggregate(
+        DominatingValue<Address>::restore(CGF, AggregateAddr));
+  case ComplexAddress: {
+    llvm_unreachable("NYI");
+  }
+  }
+
+  llvm_unreachable("bad saved r-value kind");
+}
+
+static bool IsUsedAsEHCleanup(EHScopeStack &EHStack,
+                              EHScopeStack::stable_iterator cleanup) {
+  // If we needed an EH block for any reason, that counts.
+  if (EHStack.find(cleanup)->hasEHBranches())
+    return true;
+
+  // Check whether any enclosed cleanups were needed.
+  for (EHScopeStack::stable_iterator i = EHStack.getInnermostEHScope();
+       i != cleanup;) {
+    assert(cleanup.strictlyEncloses(i));
+
+    EHScope &scope = *EHStack.find(i);
+    if (scope.hasEHBranches())
+      return true;
+
+    i = scope.getEnclosingEHScope();
+  }
+
+  return false;
+}
+
+enum ForActivation_t { ForActivation, ForDeactivation };
+
+/// The given cleanup block is changing activation state.  Configure a
+/// cleanup variable if necessary.
+///
+/// It would be good if we had some way of determining if there were
+/// extra uses *after* the change-over point.
+static void setupCleanupBlockActivation(CIRGenFunction &CGF,
+                                        EHScopeStack::stable_iterator C,
+                                        ForActivation_t kind,
+                                        mlir::Operation *dominatingIP) {
+  EHCleanupScope &Scope = cast<EHCleanupScope>(*CGF.EHStack.find(C));
+
+  // We always need the flag if we're activating the cleanup in a
+  // conditional context, because we have to assume that the current
+  // location doesn't necessarily dominate the cleanup's code.
+  bool isActivatedInConditional =
+      (kind == ForActivation && CGF.isInConditionalBranch());
+
+  bool needFlag = false;
+
+  // Calculate whether the cleanup was used:
+
+  //   - as a normal cleanup
+  if (Scope.isNormalCleanup()) {
+    Scope.setTestFlagInNormalCleanup();
+    needFlag = true;
+  }
+
+  //  - as an EH cleanup
+  if (Scope.isEHCleanup() &&
+      (isActivatedInConditional || IsUsedAsEHCleanup(CGF.EHStack, C))) {
+    Scope.setTestFlagInEHCleanup();
+    needFlag = true;
+  }
+
+  // If it hasn't yet been used as either, we're done.
+  if (!needFlag)
+    return;
+
+  Address var = Scope.getActiveFlag();
+  if (!var.isValid()) {
+    llvm_unreachable("NYI");
+  }
+
+  llvm_unreachable("NYI");
+}
+
+/// Deactive a cleanup that was created in an active state.
+void CIRGenFunction::DeactivateCleanupBlock(EHScopeStack::stable_iterator C,
+                                            mlir::Operation *dominatingIP) {
+  assert(C != EHStack.stable_end() && "deactivating bottom of stack?");
+  EHCleanupScope &Scope = cast<EHCleanupScope>(*EHStack.find(C));
+  assert(Scope.isActive() && "double deactivation");
+
+  // If it's the top of the stack, just pop it, but do so only if it belongs
+  // to the current RunCleanupsScope.
+  if (C == EHStack.stable_begin() &&
+      CurrentCleanupScopeDepth.strictlyEncloses(C)) {
+    // Per comment below, checking EHAsynch is not really necessary
+    // it's there to assure zero-impact w/o EHAsynch option
+    if (!Scope.isNormalCleanup() && getLangOpts().EHAsynch) {
+      llvm_unreachable("NYI");
+    } else {
+      // From LLVM: If it's a normal cleanup, we need to pretend that the
+      // fallthrough is unreachable.
+      // CIR remarks: LLVM uses an empty insertion point to signal behavior
+      // change to other codegen paths (triggered by PopCleanupBlock).
+      // CIRGen doesn't do that yet, but let's mimic just in case.
+      mlir::OpBuilder::InsertionGuard guard(builder);
+      builder.clearInsertionPoint();
+      PopCleanupBlock();
+    }
+    return;
+  }
+
+  // Otherwise, follow the general case.
+  setupCleanupBlockActivation(*this, C, ForDeactivation, dominatingIP);
+  Scope.setActive(false);
+}
+
+void CIRGenFunction::initFullExprCleanupWithFlag(Address ActiveFlag) {
+  // Set that as the active flag in the cleanup.
+  EHCleanupScope &cleanup = cast<EHCleanupScope>(*EHStack.begin());
+  assert(!cleanup.hasActiveFlag() && "cleanup already has active flag?");
+  cleanup.setActiveFlag(ActiveFlag);
+
+  if (cleanup.isNormalCleanup())
+    cleanup.setTestFlagInNormalCleanup();
+  if (cleanup.isEHCleanup())
+    cleanup.setTestFlagInEHCleanup();
+}
+
+/// We don't need a normal entry block for the given cleanup.
+/// Optimistic fixup branches can cause these blocks to come into
+/// existence anyway;  if so, destroy it.
+///
+/// The validity of this transformation is very much specific to the
+/// exact ways in which we form branches to cleanup entries.
+static void destroyOptimisticNormalEntry(CIRGenFunction &CGF,
+                                         EHCleanupScope &scope) {
+  auto *entry = scope.getNormalBlock();
+  if (!entry)
+    return;
+
+  llvm_unreachable("NYI");
+}
+
+static void buildCleanup(CIRGenFunction &CGF, EHScopeStack::Cleanup *Fn,
+                         EHScopeStack::Cleanup::Flags flags,
+                         Address ActiveFlag) {
+  auto emitCleanup = [&]() {
+    // Ask the cleanup to emit itself.
+    assert(CGF.HaveInsertPoint() && "expected insertion point");
+    Fn->Emit(CGF, flags);
+    assert(CGF.HaveInsertPoint() && "cleanup ended with no insertion point?");
+  };
+
+  // If there's an active flag, load it and skip the cleanup if it's
+  // false.
+  cir::CIRGenBuilderTy &builder = CGF.getBuilder();
+  mlir::Location loc =
+      CGF.currSrcLoc ? *CGF.currSrcLoc : builder.getUnknownLoc();
+
+  if (ActiveFlag.isValid()) {
+    mlir::Value isActive = builder.createLoad(loc, ActiveFlag);
+    builder.create<mlir::cir::IfOp>(loc, isActive, false,
+                                    [&](mlir::OpBuilder &b, mlir::Location) {
+                                      emitCleanup();
+                                      builder.createYield(loc);
+                                    });
+  } else {
+    emitCleanup();
+  }
+  // No need to emit continuation block because CIR uses a cir.if.
+}
+
+/// Pops a cleanup block. If the block includes a normal cleanup, the
+/// current insertion point is threaded through the cleanup, as are
+/// any branch fixups on the cleanup.
+void CIRGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
+  assert(!EHStack.empty() && "cleanup stack is empty!");
+  assert(isa<EHCleanupScope>(*EHStack.begin()) && "top not a cleanup!");
+  EHCleanupScope &Scope = cast<EHCleanupScope>(*EHStack.begin());
+  assert(Scope.getFixupDepth() <= EHStack.getNumBranchFixups());
+
+  // Remember activation information.
+  bool IsActive = Scope.isActive();
+  Address NormalActiveFlag = Scope.shouldTestFlagInNormalCleanup()
+                                 ? Scope.getActiveFlag()
+                                 : Address::invalid();
+  Address EHActiveFlag = Scope.shouldTestFlagInEHCleanup()
+                             ? Scope.getActiveFlag()
+                             : Address::invalid();
+
+  // Check whether we need an EH cleanup. This is only true if we've
+  // generated a lazy EH cleanup block.
+  auto *ehEntry = Scope.getCachedEHDispatchBlock();
+  assert(Scope.hasEHBranches() == (ehEntry != nullptr));
+  bool RequiresEHCleanup = (ehEntry != nullptr);
+  EHScopeStack::stable_iterator EHParent = Scope.getEnclosingEHScope();
+
+  // Check the three conditions which might require a normal cleanup:
+
+  // - whether there are branch fix-ups through this cleanup
+  unsigned FixupDepth = Scope.getFixupDepth();
+  bool HasFixups = EHStack.getNumBranchFixups() != FixupDepth;
+
+  // - whether there are branch-throughs or branch-afters
+  bool HasExistingBranches = Scope.hasBranches();
+
+  // - whether there's a fallthrough
+  auto *FallthroughSource = builder.getInsertionBlock();
+  bool HasFallthrough = (FallthroughSource != nullptr && IsActive);
+
+  // Branch-through fall-throughs leave the insertion point set to the
+  // end of the last cleanup, which points to the current scope.  The
+  // rest of CIR gen doesn't need to worry about this; it only happens
+  // during the execution of PopCleanupBlocks().
+  bool HasTerminator = FallthroughSource &&
+                       FallthroughSource->mightHaveTerminator() &&
+                       FallthroughSource->getTerminator();
+  bool HasPrebranchedFallthrough =
+      HasTerminator &&
+      !isa<mlir::cir::YieldOp>(FallthroughSource->getTerminator());
+
+  // If this is a normal cleanup, then having a prebranched
+  // fallthrough implies that the fallthrough source unconditionally
+  // jumps here.
+  assert(!Scope.isNormalCleanup() || !HasPrebranchedFallthrough ||
+         (Scope.getNormalBlock() &&
+          FallthroughSource->getTerminator()->getSuccessor(0) ==
+              Scope.getNormalBlock()));
+
+  bool RequiresNormalCleanup = false;
+  if (Scope.isNormalCleanup() &&
+      (HasFixups || HasExistingBranches || HasFallthrough)) {
+    RequiresNormalCleanup = true;
+  }
+
+  // If we have a prebranched fallthrough into an inactive normal
+  // cleanup, rewrite it so that it leads to the appropriate place.
+  if (Scope.isNormalCleanup() && HasPrebranchedFallthrough && !IsActive) {
+    llvm_unreachable("NYI");
+  }
+
+  // If we don't need the cleanup at all, we're done.
+  if (!RequiresNormalCleanup && !RequiresEHCleanup) {
+    destroyOptimisticNormalEntry(*this, Scope);
+    EHStack.popCleanup(); // safe because there are no fixups
+    assert(EHStack.getNumBranchFixups() == 0 || EHStack.hasNormalCleanups());
+    return;
+  }
+
+  // Copy the cleanup emission data out.  This uses either a stack
+  // array or malloc'd memory, depending on the size, which is
+  // behavior that SmallVector would provide, if we could use it
+  // here. Unfortunately, if you ask for a SmallVector<char>, the
+  // alignment isn't sufficient.
+  auto *CleanupSource = reinterpret_cast<char *>(Scope.getCleanupBuffer());
+  alignas(EHScopeStack::ScopeStackAlignment) char
+      CleanupBufferStack[8 * sizeof(void *)];
+  std::unique_ptr<char[]> CleanupBufferHeap;
+  size_t CleanupSize = Scope.getCleanupSize();
+  EHScopeStack::Cleanup *Fn;
+
+  if (CleanupSize <= sizeof(CleanupBufferStack)) {
+    memcpy(CleanupBufferStack, CleanupSource, CleanupSize);
+    Fn = reinterpret_cast<EHScopeStack::Cleanup *>(CleanupBufferStack);
+  } else {
+    CleanupBufferHeap.reset(new char[CleanupSize]);
+    memcpy(CleanupBufferHeap.get(), CleanupSource, CleanupSize);
+    Fn = reinterpret_cast<EHScopeStack::Cleanup *>(CleanupBufferHeap.get());
+  }
+
+  EHScopeStack::Cleanup::Flags cleanupFlags;
+  if (Scope.isNormalCleanup())
+    cleanupFlags.setIsNormalCleanupKind();
+  if (Scope.isEHCleanup())
+    cleanupFlags.setIsEHCleanupKind();
+
+  // Under -EHa, invoke seh.scope.end() to mark scope end before dtor
+  bool IsEHa = getLangOpts().EHAsynch && !Scope.isLifetimeMarker();
+  // const EHPersonality &Personality = EHPersonality::get(*this);
+  if (!RequiresNormalCleanup) {
+    // Mark CPP scope end for passed-by-value Arg temp
+    //   per Windows ABI which is "normally" Cleanup in callee
+    if (IsEHa && isInvokeDest()) {
+      // If we are deactivating a normal cleanup then we don't have a
+      // fallthrough. Restore original IP to emit CPP scope ends in the correct
+      // block.
+      llvm_unreachable("NYI");
+    }
+    destroyOptimisticNormalEntry(*this, Scope);
+    Scope.markEmitted();
+    EHStack.popCleanup();
+  } else {
+    // If we have a fallthrough and no other need for the cleanup,
+    // emit it directly.
+    if (HasFallthrough && !HasPrebranchedFallthrough && !HasFixups &&
+        !HasExistingBranches) {
+
+      // mark SEH scope end for fall-through flow
+      if (IsEHa) {
+        llvm_unreachable("NYI");
+      }
+
+      destroyOptimisticNormalEntry(*this, Scope);
+      EHStack.popCleanup();
+      Scope.markEmitted();
+      buildCleanup(*this, Fn, cleanupFlags, NormalActiveFlag);
+
+      // Otherwise, the best approach is to thread everything through
+      // the cleanup block and then try to clean up after ourselves.
+    } else {
+      llvm_unreachable("NYI");
+    }
+  }
+
+  assert(EHStack.hasNormalCleanups() || EHStack.getNumBranchFixups() == 0);
+
+  // Emit the EH cleanup if required.
+  if (RequiresEHCleanup) {
+    mlir::cir::TryOp tryOp =
+        ehEntry->getParentOp()->getParentOfType<mlir::cir::TryOp>();
+    assert(tryOp && "expected available cir.try");
+    auto *nextAction = getEHDispatchBlock(EHParent, tryOp);
+    (void)nextAction;
+
+    // Push a terminate scope or cleanupendpad scope around the potentially
+    // throwing cleanups. For funclet EH personalities, the cleanupendpad models
+    // program termination when cleanups throw.
+    bool PushedTerminate = false;
+    SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad);
+    mlir::Operation *CPI = nullptr;
+
+    const EHPersonality &Personality = EHPersonality::get(*this);
+    if (Personality.usesFuncletPads()) {
+      llvm_unreachable("NYI");
+    }
+
+    // Non-MSVC personalities need to terminate when an EH cleanup throws.
+    if (!Personality.isMSVCPersonality()) {
+      EHStack.pushTerminate();
+      PushedTerminate = true;
+    } else if (IsEHa && isInvokeDest()) {
+      llvm_unreachable("NYI");
+    }
+
+    // We only actually emit the cleanup code if the cleanup is either
+    // active or was used before it was deactivated.
+    if (EHActiveFlag.isValid() || IsActive) {
+      cleanupFlags.setIsForEHCleanup();
+      mlir::OpBuilder::InsertionGuard guard(builder);
+
+      auto yield = cast<YieldOp>(ehEntry->getTerminator());
+      builder.setInsertionPoint(yield);
+      buildCleanup(*this, Fn, cleanupFlags, EHActiveFlag);
+    }
+
+    if (CPI)
+      llvm_unreachable("NYI");
+    else {
+      // In LLVM traditional codegen, here's where it branches off to
+      // nextAction. CIR does not have a flat layout at this point, so
+      // instead patch all the landing pads that need to run this cleanup
+      // as well.
+      mlir::Block *currBlock = ehEntry;
+      while (currBlock && cleanupsToPatch.contains(currBlock)) {
+        mlir::OpBuilder::InsertionGuard guard(builder);
+        mlir::Block *blockToPatch = cleanupsToPatch[currBlock];
+        auto currYield = cast<YieldOp>(blockToPatch->getTerminator());
+        builder.setInsertionPoint(currYield);
+        buildCleanup(*this, Fn, cleanupFlags, EHActiveFlag);
+        currBlock = blockToPatch;
+      }
+
+      // The nextAction is yet to be populated, register that this
+      // cleanup should also incorporate any cleanup from nextAction
+      // when available.
+      cleanupsToPatch[nextAction] = ehEntry;
+    }
+
+    // Leave the terminate scope.
+    if (PushedTerminate)
+      EHStack.popTerminate();
+
+    // FIXME(cir): LLVM traditional codegen tries to simplify some of the
+    // codegen here. Once we are further down with EH support revisit whether we
+    // need to this during lowering.
+    assert(!MissingFeatures::simplifyCleanupEntry());
+  }
+}
+
+/// Pops cleanup blocks until the given savepoint is reached.
+void CIRGenFunction::PopCleanupBlocks(
+    EHScopeStack::stable_iterator Old,
+    std::initializer_list<mlir::Value *> ValuesToReload) {
+  assert(Old.isValid());
+
+  bool HadBranches = false;
+  while (EHStack.stable_begin() != Old) {
+    EHCleanupScope &Scope = cast<EHCleanupScope>(*EHStack.begin());
+    HadBranches |= Scope.hasBranches();
+
+    // As long as Old strictly encloses the scope's enclosing normal
+    // cleanup, we're going to emit another normal cleanup which
+    // fallthrough can propagate through.
+    bool FallThroughIsBranchThrough =
+        Old.strictlyEncloses(Scope.getEnclosingNormalCleanup());
+
+    PopCleanupBlock(FallThroughIsBranchThrough);
+  }
+
+  // If we didn't have any branches, the insertion point before cleanups must
+  // dominate the current insertion point and we don't need to reload any
+  // values.
+  if (!HadBranches)
+    return;
+
+  llvm_unreachable("NYI");
+}
+
+/// Pops cleanup blocks until the given savepoint is reached, then add the
+/// cleanups from the given savepoint in the lifetime-extended cleanups stack.
+void CIRGenFunction::PopCleanupBlocks(
+    EHScopeStack::stable_iterator Old, size_t OldLifetimeExtendedSize,
+    std::initializer_list<mlir::Value *> ValuesToReload) {
+  PopCleanupBlocks(Old, ValuesToReload);
+
+  // Move our deferred cleanups onto the EH stack.
+  for (size_t I = OldLifetimeExtendedSize,
+              E = LifetimeExtendedCleanupStack.size();
+       I != E;
+       /**/) {
+    // Alignment should be guaranteed by the vptrs in the individual cleanups.
+    assert((I % alignof(LifetimeExtendedCleanupHeader) == 0) &&
+           "misaligned cleanup stack entry");
+
+    LifetimeExtendedCleanupHeader &Header =
+        reinterpret_cast<LifetimeExtendedCleanupHeader &>(
+            LifetimeExtendedCleanupStack[I]);
+    I += sizeof(Header);
+
+    EHStack.pushCopyOfCleanup(
+        Header.getKind(), &LifetimeExtendedCleanupStack[I], Header.getSize());
+    I += Header.getSize();
+
+    if (Header.isConditional()) {
+      Address ActiveFlag =
+          reinterpret_cast<Address &>(LifetimeExtendedCleanupStack[I]);
+      initFullExprCleanupWithFlag(ActiveFlag);
+      I += sizeof(ActiveFlag);
+    }
+  }
+  LifetimeExtendedCleanupStack.resize(OldLifetimeExtendedSize);
+}
+
+//===----------------------------------------------------------------------===//
+// EHScopeStack
+//===----------------------------------------------------------------------===//
+
+void EHScopeStack::Cleanup::anchor() {}
+
+/// Push an entry of the given size onto this protected-scope stack.
+char *EHScopeStack::allocate(size_t Size) {
+  Size = llvm::alignTo(Size, ScopeStackAlignment);
+  if (!StartOfBuffer) {
+    unsigned Capacity = 1024;
+    while (Capacity < Size)
+      Capacity *= 2;
+    StartOfBuffer = new char[Capacity];
+    StartOfData = EndOfBuffer = StartOfBuffer + Capacity;
+  } else if (static_cast<size_t>(StartOfData - StartOfBuffer) < Size) {
+    unsigned CurrentCapacity = EndOfBuffer - StartOfBuffer;
+    unsigned UsedCapacity = CurrentCapacity - (StartOfData - StartOfBuffer);
+
+    unsigned NewCapacity = CurrentCapacity;
+    do {
+      NewCapacity *= 2;
+    } while (NewCapacity < UsedCapacity + Size);
+
+    char *NewStartOfBuffer = new char[NewCapacity];
+    char *NewEndOfBuffer = NewStartOfBuffer + NewCapacity;
+    char *NewStartOfData = NewEndOfBuffer - UsedCapacity;
+    memcpy(NewStartOfData, StartOfData, UsedCapacity);
+    delete[] StartOfBuffer;
+    StartOfBuffer = NewStartOfBuffer;
+    EndOfBuffer = NewEndOfBuffer;
+    StartOfData = NewStartOfData;
+  }
+
+  assert(StartOfBuffer + Size <= StartOfData);
+  StartOfData -= Size;
+  return StartOfData;
+}
+
+void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) {
+  char *Buffer = allocate(EHCleanupScope::getSizeForCleanupSize(Size));
+  bool IsNormalCleanup = Kind & NormalCleanup;
+  bool IsEHCleanup = Kind & EHCleanup;
+  bool IsLifetimeMarker = Kind & LifetimeMarker;
+
+  // Per C++ [except.terminate], it is implementation-defined whether none,
+  // some, or all cleanups are called before std::terminate. Thus, when
+  // terminate is the current EH scope, we may skip adding any EH cleanup
+  // scopes.
+  if (InnermostEHScope != stable_end() &&
+      find(InnermostEHScope)->getKind() == EHScope::Terminate)
+    IsEHCleanup = false;
+
+  EHCleanupScope *Scope = new (Buffer)
+      EHCleanupScope(IsNormalCleanup, IsEHCleanup, Size, BranchFixups.size(),
+                     InnermostNormalCleanup, InnermostEHScope);
+  if (IsNormalCleanup)
+    InnermostNormalCleanup = stable_begin();
+  if (IsEHCleanup)
+    InnermostEHScope = stable_begin();
+  if (IsLifetimeMarker)
+    llvm_unreachable("NYI");
+
+  // With Windows -EHa, Invoke llvm.seh.scope.begin() for EHCleanup
+  if (CGF->getLangOpts().EHAsynch && IsEHCleanup && !IsLifetimeMarker &&
+      CGF->getTarget().getCXXABI().isMicrosoft())
+    llvm_unreachable("NYI");
+
+  return Scope->getCleanupBuffer();
+}
+
+void EHScopeStack::popCleanup() {
+  assert(!empty() && "popping exception stack when not empty");
+
+  assert(isa<EHCleanupScope>(*begin()));
+  EHCleanupScope &Cleanup = cast<EHCleanupScope>(*begin());
+  InnermostNormalCleanup = Cleanup.getEnclosingNormalCleanup();
+  InnermostEHScope = Cleanup.getEnclosingEHScope();
+  deallocate(Cleanup.getAllocatedSize());
+
+  // Destroy the cleanup.
+  Cleanup.Destroy();
+
+  // Check whether we can shrink the branch-fixups stack.
+  if (!BranchFixups.empty()) {
+    // If we no longer have any normal cleanups, all the fixups are
+    // complete.
+    if (!hasNormalCleanups())
+      BranchFixups.clear();
+
+    // Otherwise we can still trim out unnecessary nulls.
+    else
+      popNullFixups();
+  }
+}
+
+void EHScopeStack::deallocate(size_t Size) {
+  StartOfData += llvm::alignTo(Size, ScopeStackAlignment);
+}
+
+/// Remove any 'null' fixups on the stack.  However, we can't pop more
+/// fixups than the fixup depth on the innermost normal cleanup, or
+/// else fixups that we try to add to that cleanup will end up in the
+/// wrong place.  We *could* try to shrink fixup depths, but that's
+/// actually a lot of work for little benefit.
+void EHScopeStack::popNullFixups() {
+  // We expect this to only be called when there's still an innermost
+  // normal cleanup;  otherwise there really shouldn't be any fixups.
+  llvm_unreachable("NYI");
+}
+
+bool EHScopeStack::requiresLandingPad() const {
+  for (stable_iterator si = getInnermostEHScope(); si != stable_end();) {
+    // Skip lifetime markers.
+    if (auto *cleanup = dyn_cast<EHCleanupScope>(&*find(si)))
+      if (cleanup->isLifetimeMarker()) {
+        si = cleanup->getEnclosingEHScope();
+        continue;
+      }
+    return true;
+  }
+
+  return false;
+}
+
+EHCatchScope *EHScopeStack::pushCatch(unsigned numHandlers) {
+  char *buffer = allocate(EHCatchScope::getSizeForNumHandlers(numHandlers));
+  EHCatchScope *scope =
+      new (buffer) EHCatchScope(numHandlers, InnermostEHScope);
+  InnermostEHScope = stable_begin();
+  return scope;
+}
+
+void EHScopeStack::pushTerminate() {
+  char *Buffer = allocate(EHTerminateScope::getSize());
+  new (Buffer) EHTerminateScope(InnermostEHScope);
+  InnermostEHScope = stable_begin();
+}
\ No newline at end of file
diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.h b/clang/lib/CIR/CodeGen/CIRGenCleanup.h
new file mode 100644
index 000000000000..76547ceebfe4
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.h
@@ -0,0 +1,670 @@
+//===-- CIRGenCleanup.h - Classes for cleanups CIR generation ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// These classes support the generation of CIR for cleanups, initially based
+// on LLVM IR cleanup handling, but ought to change as CIR evolves.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CODEGEN_CGCLEANUP_H
+#define LLVM_CLANG_LIB_CIR_CODEGEN_CGCLEANUP_H
+
+#include "Address.h"
+#include "EHScopeStack.h"
+#include "mlir/IR/Value.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace clang {
+class FunctionDecl;
+}
+
+namespace cir {
+class CIRGenModule;
+class CIRGenFunction;
+
+/// The MS C++ ABI needs a pointer to RTTI data plus some flags to describe the
+/// type of a catch handler, so we use this wrapper.
+struct CatchTypeInfo {
+  mlir::TypedAttr RTTI;
+  unsigned Flags;
+};
+
+/// A protected scope for zero-cost EH handling.
+class EHScope {
+  mlir::Operation *CachedLandingPad;
+  mlir::Block *CachedEHDispatchBlock;
+
+  EHScopeStack::stable_iterator EnclosingEHScope;
+
+  class CommonBitFields {
+    friend class EHScope;
+    unsigned Kind : 3;
+  };
+  enum { NumCommonBits = 3 };
+
+protected:
+  class CatchBitFields {
+    friend class EHCatchScope;
+    unsigned : NumCommonBits;
+
+    unsigned NumHandlers : 32 - NumCommonBits;
+  };
+
+  class CleanupBitFields {
+    friend class EHCleanupScope;
+    unsigned : NumCommonBits;
+
+    /// Whether this cleanup needs to be run along normal edges.
+    unsigned IsNormalCleanup : 1;
+
+    /// Whether this cleanup needs to be run along exception edges.
+    unsigned IsEHCleanup : 1;
+
+    /// Whether this cleanup is currently active.
+    unsigned IsActive : 1;
+
+    /// Whether this cleanup is a lifetime marker
+    unsigned IsLifetimeMarker : 1;
+
+    /// Whether the normal cleanup should test the activation flag.
+    unsigned TestFlagInNormalCleanup : 1;
+
+    /// Whether the EH cleanup should test the activation flag.
+    unsigned TestFlagInEHCleanup : 1;
+
+    /// The amount of extra storage needed by the Cleanup.
+    /// Always a multiple of the scope-stack alignment.
+    unsigned CleanupSize : 12;
+  };
+
+  class FilterBitFields {
+    friend class EHFilterScope;
+    unsigned : NumCommonBits;
+
+    unsigned NumFilters : 32 - NumCommonBits;
+  };
+
+  union {
+    CommonBitFields CommonBits;
+    CatchBitFields CatchBits;
+    CleanupBitFields CleanupBits;
+    FilterBitFields FilterBits;
+  };
+
+public:
+  enum Kind { Cleanup, Catch, Terminate, Filter };
+
+  EHScope(Kind kind, EHScopeStack::stable_iterator enclosingEHScope)
+      : CachedLandingPad(nullptr), CachedEHDispatchBlock(nullptr),
+        EnclosingEHScope(enclosingEHScope) {
+    CommonBits.Kind = kind;
+  }
+
+  Kind getKind() const { return static_cast<Kind>(CommonBits.Kind); }
+
+  mlir::Operation *getCachedLandingPad() const { return CachedLandingPad; }
+
+  void setCachedLandingPad(mlir::Operation *op) { CachedLandingPad = op; }
+
+  mlir::Block *getCachedEHDispatchBlock() const {
+    return CachedEHDispatchBlock;
+  }
+
+  void setCachedEHDispatchBlock(mlir::Block *block) {
+    CachedEHDispatchBlock = block;
+  }
+
+  bool hasEHBranches() const {
+    // Traditional LLVM codegen also checks for `!block->use_empty()`, but
+    // in CIRGen the block content is not important, just used as a way to
+    // signal `hasEHBranches`.
+    if (mlir::Block *block = getCachedEHDispatchBlock())
+      return true;
+    return false;
+  }
+
+  EHScopeStack::stable_iterator getEnclosingEHScope() const {
+    return EnclosingEHScope;
+  }
+};
+
+/// A scope which attempts to handle some, possibly all, types of
+/// exceptions.
+///
+/// Objective C \@finally blocks are represented using a cleanup scope
+/// after the catch scope.
+class EHCatchScope : public EHScope {
+  // In effect, we have a flexible array member
+  //   Handler Handlers[0];
+  // But that's only standard in C99, not C++, so we have to do
+  // annoying pointer arithmetic instead.
+
+public:
+  struct Handler {
+    /// A type info value, or null (C++ null, not an LLVM null pointer)
+    /// for a catch-all.
+    CatchTypeInfo Type;
+
+    /// The catch handler for this type.
+    mlir::Block *Block;
+
+    bool isCatchAll() const { return Type.RTTI == nullptr; }
+  };
+
+private:
+  friend class EHScopeStack;
+
+  Handler *getHandlers() { return reinterpret_cast<Handler *>(this + 1); }
+
+  const Handler *getHandlers() const {
+    return reinterpret_cast<const Handler *>(this + 1);
+  }
+
+public:
+  static size_t getSizeForNumHandlers(unsigned N) {
+    return sizeof(EHCatchScope) + N * sizeof(Handler);
+  }
+
+  EHCatchScope(unsigned numHandlers,
+               EHScopeStack::stable_iterator enclosingEHScope)
+      : EHScope(Catch, enclosingEHScope) {
+    CatchBits.NumHandlers = numHandlers;
+    assert(CatchBits.NumHandlers == numHandlers && "NumHandlers overflow?");
+  }
+
+  unsigned getNumHandlers() const { return CatchBits.NumHandlers; }
+
+  void setCatchAllHandler(unsigned I, mlir::Block *Block) {
+    setHandler(I, CatchTypeInfo{nullptr, 0}, Block);
+  }
+
+  void setHandler(unsigned I, mlir::TypedAttr Type, mlir::Block *Block) {
+    assert(I < getNumHandlers());
+    getHandlers()[I].Type = CatchTypeInfo{Type, 0};
+    getHandlers()[I].Block = Block;
+  }
+
+  void setHandler(unsigned I, CatchTypeInfo Type, mlir::Block *Block) {
+    assert(I < getNumHandlers());
+    getHandlers()[I].Type = Type;
+    getHandlers()[I].Block = Block;
+  }
+
+  const Handler &getHandler(unsigned I) const {
+    assert(I < getNumHandlers());
+    return getHandlers()[I];
+  }
+
+  // Clear all handler blocks.
+  // FIXME: it's better to always call clearHandlerBlocks in DTOR and have a
+  // 'takeHandler' or some such function which removes ownership from the
+  // EHCatchScope object if the handlers should live longer than EHCatchScope.
+  void clearHandlerBlocks() {
+    // The blocks are owned by TryOp, nothing to delete.
+  }
+
+  typedef const Handler *iterator;
+  iterator begin() const { return getHandlers(); }
+  iterator end() const { return getHandlers() + getNumHandlers(); }
+
+  static bool classof(const EHScope *Scope) {
+    return Scope->getKind() == Catch;
+  }
+};
+
+/// A cleanup scope which generates the cleanup blocks lazily.
+class alignas(8) EHCleanupScope : public EHScope {
+  /// The nearest normal cleanup scope enclosing this one.
+  EHScopeStack::stable_iterator EnclosingNormal;
+
+  /// The nearest EH scope enclosing this one.
+  EHScopeStack::stable_iterator EnclosingEH;
+
+  /// The dual entry/exit block along the normal edge.  This is lazily
+  /// created if needed before the cleanup is popped.
+  mlir::Block *NormalBlock;
+
+  /// An optional i1 variable indicating whether this cleanup has been
+  /// activated yet.
+  Address ActiveFlag;
+
+  /// Extra information required for cleanups that have resolved
+  /// branches through them.  This has to be allocated on the side
+  /// because everything on the cleanup stack has be trivially
+  /// movable.
+  struct ExtInfo {
+    /// The destinations of normal branch-afters and branch-throughs.
+    llvm::SmallPtrSet<mlir::Block *, 4> Branches;
+
+    /// Normal branch-afters.
+    llvm::SmallVector<std::pair<mlir::Block *, mlir::Value>, 4> BranchAfters;
+  };
+  mutable struct ExtInfo *ExtInfo;
+
+  /// The number of fixups required by enclosing scopes (not including
+  /// this one).  If this is the top cleanup scope, all the fixups
+  /// from this index onwards belong to this scope.
+  unsigned FixupDepth;
+
+  struct ExtInfo &getExtInfo() {
+    if (!ExtInfo)
+      ExtInfo = new struct ExtInfo();
+    return *ExtInfo;
+  }
+
+  const struct ExtInfo &getExtInfo() const {
+    if (!ExtInfo)
+      ExtInfo = new struct ExtInfo();
+    return *ExtInfo;
+  }
+
+public:
+  /// Gets the size required for a lazy cleanup scope with the given
+  /// cleanup-data requirements.
+  static size_t getSizeForCleanupSize(size_t Size) {
+    return sizeof(EHCleanupScope) + Size;
+  }
+
+  size_t getAllocatedSize() const {
+    return sizeof(EHCleanupScope) + CleanupBits.CleanupSize;
+  }
+
+  EHCleanupScope(bool isNormal, bool isEH, unsigned cleanupSize,
+                 unsigned fixupDepth,
+                 EHScopeStack::stable_iterator enclosingNormal,
+                 EHScopeStack::stable_iterator enclosingEH)
+      : EHScope(EHScope::Cleanup, enclosingEH),
+        EnclosingNormal(enclosingNormal), NormalBlock(nullptr),
+        ActiveFlag(Address::invalid()), ExtInfo(nullptr),
+        FixupDepth(fixupDepth) {
+    CleanupBits.IsNormalCleanup = isNormal;
+    CleanupBits.IsEHCleanup = isEH;
+    CleanupBits.IsActive = true;
+    CleanupBits.IsLifetimeMarker = false;
+    CleanupBits.TestFlagInNormalCleanup = false;
+    CleanupBits.TestFlagInEHCleanup = false;
+    CleanupBits.CleanupSize = cleanupSize;
+
+    assert(CleanupBits.CleanupSize == cleanupSize && "cleanup size overflow");
+  }
+
+  void Destroy() { delete ExtInfo; }
+  // Objects of EHCleanupScope are not destructed. Use Destroy().
+  ~EHCleanupScope() = delete;
+
+  bool isNormalCleanup() const { return CleanupBits.IsNormalCleanup; }
+  mlir::Block *getNormalBlock() const { return NormalBlock; }
+  void setNormalBlock(mlir::Block *BB) { NormalBlock = BB; }
+
+  bool isEHCleanup() const { return CleanupBits.IsEHCleanup; }
+
+  bool isActive() const { return CleanupBits.IsActive; }
+  void setActive(bool A) { CleanupBits.IsActive = A; }
+
+  bool isLifetimeMarker() const { return CleanupBits.IsLifetimeMarker; }
+  void setLifetimeMarker() { CleanupBits.IsLifetimeMarker = true; }
+
+  bool hasActiveFlag() const { return ActiveFlag.isValid(); }
+  Address getActiveFlag() const { return ActiveFlag; }
+  void setActiveFlag(Address Var) {
+    assert(Var.getAlignment().isOne());
+    ActiveFlag = Var;
+  }
+
+  void setTestFlagInNormalCleanup() {
+    CleanupBits.TestFlagInNormalCleanup = true;
+  }
+  bool shouldTestFlagInNormalCleanup() const {
+    return CleanupBits.TestFlagInNormalCleanup;
+  }
+
+  void setTestFlagInEHCleanup() { CleanupBits.TestFlagInEHCleanup = true; }
+  bool shouldTestFlagInEHCleanup() const {
+    return CleanupBits.TestFlagInEHCleanup;
+  }
+
+  unsigned getFixupDepth() const { return FixupDepth; }
+  EHScopeStack::stable_iterator getEnclosingNormalCleanup() const {
+    return EnclosingNormal;
+  }
+
+  size_t getCleanupSize() const { return CleanupBits.CleanupSize; }
+  void *getCleanupBuffer() { return this + 1; }
+
+  EHScopeStack::Cleanup *getCleanup() {
+    return reinterpret_cast<EHScopeStack::Cleanup *>(getCleanupBuffer());
+  }
+
+  /// True if this cleanup scope has any branch-afters or branch-throughs.
+  bool hasBranches() const { return ExtInfo && !ExtInfo->Branches.empty(); }
+
+  /// Add a branch-after to this cleanup scope.  A branch-after is a
+  /// branch from a point protected by this (normal) cleanup to a
+  /// point in the normal cleanup scope immediately containing it.
+  /// For example,
+  ///   for (;;) { A a; break; }
+  /// contains a branch-after.
+  ///
+  /// Branch-afters each have their own destination out of the
+  /// cleanup, guaranteed distinct from anything else threaded through
+  /// it.  Therefore branch-afters usually force a switch after the
+  /// cleanup.
+  void addBranchAfter(mlir::Value Index, mlir::Block *Block) {
+    struct ExtInfo &ExtInfo = getExtInfo();
+    if (ExtInfo.Branches.insert(Block).second)
+      ExtInfo.BranchAfters.push_back(std::make_pair(Block, Index));
+  }
+
+  /// Return the number of unique branch-afters on this scope.
+  unsigned getNumBranchAfters() const {
+    return ExtInfo ? ExtInfo->BranchAfters.size() : 0;
+  }
+
+  mlir::Block *getBranchAfterBlock(unsigned I) const {
+    assert(I < getNumBranchAfters());
+    return ExtInfo->BranchAfters[I].first;
+  }
+
+  mlir::Value getBranchAfterIndex(unsigned I) const {
+    assert(I < getNumBranchAfters());
+    return ExtInfo->BranchAfters[I].second;
+  }
+
+  /// Add a branch-through to this cleanup scope.  A branch-through is
+  /// a branch from a scope protected by this (normal) cleanup to an
+  /// enclosing scope other than the immediately-enclosing normal
+  /// cleanup scope.
+  ///
+  /// In the following example, the branch through B's scope is a
+  /// branch-through, while the branch through A's scope is a
+  /// branch-after:
+  ///   for (;;) { A a; B b; break; }
+  ///
+  /// All branch-throughs have a common destination out of the
+  /// cleanup, one possibly shared with the fall-through.  Therefore
+  /// branch-throughs usually don't force a switch after the cleanup.
+  ///
+  /// \return true if the branch-through was new to this scope
+  bool addBranchThrough(mlir::Block *Block) {
+    return getExtInfo().Branches.insert(Block).second;
+  }
+
+  /// Determines if this cleanup scope has any branch throughs.
+  bool hasBranchThroughs() const {
+    if (!ExtInfo)
+      return false;
+    return (ExtInfo->BranchAfters.size() != ExtInfo->Branches.size());
+  }
+
+  static bool classof(const EHScope *Scope) {
+    return (Scope->getKind() == Cleanup);
+  }
+
+  /// Erases auxillary allocas and their usages for an unused cleanup.
+  /// Cleanups should mark these allocas as 'used' if the cleanup is
+  /// emitted, otherwise these instructions would be erased.
+  struct AuxillaryAllocas {
+    llvm::SmallVector<mlir::Operation *, 1> auxAllocas;
+    bool used = false;
+
+    // Records a potentially unused instruction to be erased later.
+    void add(mlir::cir::AllocaOp allocaOp) { auxAllocas.push_back(allocaOp); }
+
+    // Mark all recorded instructions as used. These will not be erased later.
+    void markUsed() {
+      used = true;
+      auxAllocas.clear();
+    }
+
+    ~AuxillaryAllocas() {
+      if (used)
+        return;
+      llvm::SetVector<mlir::Operation *> uses;
+      for (auto *Inst : llvm::reverse(auxAllocas))
+        collectuses(Inst, uses);
+      // Delete uses in the reverse order of insertion.
+      for (auto *I : llvm::reverse(uses))
+        I->erase();
+    }
+
+  private:
+    void collectuses(mlir::Operation *op,
+                     llvm::SetVector<mlir::Operation *> &uses) {
+      if (!op || !uses.insert(op))
+        return;
+      for (auto *User : op->getUsers())
+        collectuses(llvm::cast<mlir::Operation *>(User), uses);
+    }
+  };
+  mutable struct AuxillaryAllocas *auxAllocas = nullptr;
+
+  void markEmitted() {
+    if (!auxAllocas)
+      return;
+    getAuxillaryAllocas().markUsed();
+  }
+
+  AuxillaryAllocas &getAuxillaryAllocas() {
+    if (!auxAllocas) {
+      auxAllocas = new struct AuxillaryAllocas();
+    }
+    return *auxAllocas;
+  }
+};
+// NOTE: there's a bunch of different data classes tacked on after an
+// EHCleanupScope. It is asserted (in EHScopeStack::pushCleanup*) that
+// they don't require greater alignment than ScopeStackAlignment. So,
+// EHCleanupScope ought to have alignment equal to that -- not more
+// (would be misaligned by the stack allocator), and not less (would
+// break the appended classes).
+static_assert(alignof(EHCleanupScope) == EHScopeStack::ScopeStackAlignment,
+              "EHCleanupScope expected alignment");
+
+/// An exceptions scope which filters exceptions thrown through it.
+/// Only exceptions matching the filter types will be permitted to be
+/// thrown.
+///
+/// This is used to implement C++ exception specifications.
+class EHFilterScope : public EHScope {
+  // Essentially ends in a flexible array member:
+  // mlir::Value FilterTypes[0];
+
+  mlir::Value *getFilters() {
+    return reinterpret_cast<mlir::Value *>(this + 1);
+  }
+
+  mlir::Value const *getFilters() const {
+    return reinterpret_cast<mlir::Value const *>(this + 1);
+  }
+
+public:
+  EHFilterScope(unsigned numFilters)
+      : EHScope(Filter, EHScopeStack::stable_end()) {
+    FilterBits.NumFilters = numFilters;
+    assert(FilterBits.NumFilters == numFilters && "NumFilters overflow");
+  }
+
+  static size_t getSizeForNumFilters(unsigned numFilters) {
+    return sizeof(EHFilterScope) + numFilters * sizeof(mlir::Value);
+  }
+
+  unsigned getNumFilters() const { return FilterBits.NumFilters; }
+
+  void setFilter(unsigned i, mlir::Value filterValue) {
+    assert(i < getNumFilters());
+    getFilters()[i] = filterValue;
+  }
+
+  mlir::Value getFilter(unsigned i) const {
+    assert(i < getNumFilters());
+    return getFilters()[i];
+  }
+
+  static bool classof(const EHScope *scope) {
+    return scope->getKind() == Filter;
+  }
+};
+
+/// An exceptions scope which calls std::terminate if any exception
+/// reaches it.
+class EHTerminateScope : public EHScope {
+public:
+  EHTerminateScope(EHScopeStack::stable_iterator enclosingEHScope)
+      : EHScope(Terminate, enclosingEHScope) {}
+  static size_t getSize() { return sizeof(EHTerminateScope); }
+
+  static bool classof(const EHScope *scope) {
+    return scope->getKind() == Terminate;
+  }
+};
+
+/// A non-stable pointer into the scope stack.
+class EHScopeStack::iterator {
+  char *Ptr;
+
+  friend class EHScopeStack;
+  explicit iterator(char *Ptr) : Ptr(Ptr) {}
+
+public:
+  iterator() : Ptr(nullptr) {}
+
+  EHScope *get() const { return reinterpret_cast<EHScope *>(Ptr); }
+
+  EHScope *operator->() const { return get(); }
+  EHScope &operator*() const { return *get(); }
+
+  iterator &operator++() {
+    size_t Size;
+    switch (get()->getKind()) {
+    case EHScope::Catch:
+      Size = EHCatchScope::getSizeForNumHandlers(
+          static_cast<const EHCatchScope *>(get())->getNumHandlers());
+      break;
+
+    case EHScope::Filter:
+      Size = EHFilterScope::getSizeForNumFilters(
+          static_cast<const EHFilterScope *>(get())->getNumFilters());
+      break;
+
+    case EHScope::Cleanup:
+      Size = static_cast<const EHCleanupScope *>(get())->getAllocatedSize();
+      break;
+
+    case EHScope::Terminate:
+      Size = EHTerminateScope::getSize();
+      break;
+    }
+    Ptr += llvm::alignTo(Size, ScopeStackAlignment);
+    return *this;
+  }
+
+  iterator next() {
+    iterator copy = *this;
+    ++copy;
+    return copy;
+  }
+
+  iterator operator++(int) {
+    iterator copy = *this;
+    operator++();
+    return copy;
+  }
+
+  bool encloses(iterator other) const { return Ptr >= other.Ptr; }
+  bool strictlyEncloses(iterator other) const { return Ptr > other.Ptr; }
+
+  bool operator==(iterator other) const { return Ptr == other.Ptr; }
+  bool operator!=(iterator other) const { return Ptr != other.Ptr; }
+};
+
+inline EHScopeStack::iterator EHScopeStack::begin() const {
+  return iterator(StartOfData);
+}
+
+inline EHScopeStack::iterator EHScopeStack::end() const {
+  return iterator(EndOfBuffer);
+}
+
+inline void EHScopeStack::popCatch() {
+  assert(!empty() && "popping exception stack when not empty");
+
+  EHCatchScope &scope = llvm::cast<EHCatchScope>(*begin());
+  InnermostEHScope = scope.getEnclosingEHScope();
+  deallocate(EHCatchScope::getSizeForNumHandlers(scope.getNumHandlers()));
+}
+
+inline void EHScopeStack::popTerminate() {
+  assert(!empty() && "popping exception stack when not empty");
+
+  EHTerminateScope &scope = llvm::cast<EHTerminateScope>(*begin());
+  InnermostEHScope = scope.getEnclosingEHScope();
+  deallocate(EHTerminateScope::getSize());
+}
+
+inline EHScopeStack::iterator EHScopeStack::find(stable_iterator sp) const {
+  assert(sp.isValid() && "finding invalid savepoint");
+  assert(sp.Size <= stable_begin().Size && "finding savepoint after pop");
+  return iterator(EndOfBuffer - sp.Size);
+}
+
+inline EHScopeStack::stable_iterator
+EHScopeStack::stabilize(iterator ir) const {
+  assert(StartOfData <= ir.Ptr && ir.Ptr <= EndOfBuffer);
+  return stable_iterator(EndOfBuffer - ir.Ptr);
+}
+
+/// The exceptions personality for a function.
+struct EHPersonality {
+  const char *PersonalityFn = nullptr;
+
+  // If this is non-null, this personality requires a non-standard
+  // function for rethrowing an exception after a catchall cleanup.
+  // This function must have prototype void(void*).
+  const char *CatchallRethrowFn = nullptr;
+
+  static const EHPersonality &get(CIRGenModule &CGM,
+                                  const clang::FunctionDecl *FD);
+  static const EHPersonality &get(CIRGenFunction &CGF);
+
+  static const EHPersonality GNU_C;
+  static const EHPersonality GNU_C_SJLJ;
+  static const EHPersonality GNU_C_SEH;
+  static const EHPersonality GNU_ObjC;
+  static const EHPersonality GNU_ObjC_SJLJ;
+  static const EHPersonality GNU_ObjC_SEH;
+  static const EHPersonality GNUstep_ObjC;
+  static const EHPersonality GNU_ObjCXX;
+  static const EHPersonality NeXT_ObjC;
+  static const EHPersonality GNU_CPlusPlus;
+  static const EHPersonality GNU_CPlusPlus_SJLJ;
+  static const EHPersonality GNU_CPlusPlus_SEH;
+  static const EHPersonality MSVC_except_handler;
+  static const EHPersonality MSVC_C_specific_handler;
+  static const EHPersonality MSVC_CxxFrameHandler3;
+  static const EHPersonality GNU_Wasm_CPlusPlus;
+  static const EHPersonality XL_CPlusPlus;
+
+  /// Does this personality use landingpads or the family of pad instructions
+  /// designed to form funclets?
+  bool usesFuncletPads() const {
+    return isMSVCPersonality() || isWasmPersonality();
+  }
+
+  bool isMSVCPersonality() const {
+    return this == &MSVC_except_handler || this == &MSVC_C_specific_handler ||
+           this == &MSVC_CxxFrameHandler3;
+  }
+
+  bool isWasmPersonality() const { return this == &GNU_Wasm_CPlusPlus; }
+
+  bool isMSVCXXPersonality() const { return this == &MSVC_CxxFrameHandler3; }
+};
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
new file mode 100644
index 000000000000..5aece0476abd
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
@@ -0,0 +1,576 @@
+//===----- CGCoroutine.cpp - Emit CIR Code for C++ coroutines -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code dealing with C++ code generation of coroutines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenFunction.h"
+#include "clang/AST/StmtCXX.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "llvm/ADT/ScopeExit.h"
+
+using namespace clang;
+using namespace cir;
+
+struct cir::CGCoroData {
+  // What is the current await expression kind and how many
+  // await/yield expressions were encountered so far.
+  // These are used to generate pretty labels for await expressions in LLVM IR.
+  mlir::cir::AwaitKind CurrentAwaitKind = mlir::cir::AwaitKind::init;
+
+  // Stores the __builtin_coro_id emitted in the function so that we can supply
+  // it as the first argument to other builtins.
+  mlir::cir::CallOp CoroId = nullptr;
+
+  // Stores the result of __builtin_coro_begin call.
+  mlir::Value CoroBegin = nullptr;
+
+  // Stores the insertion point for final suspend, this happens after the
+  // promise call (return_xxx promise member) but before a cir.br to the return
+  // block.
+  mlir::Operation *FinalSuspendInsPoint;
+
+  // How many co_return statements are in the coroutine. Used to decide whether
+  // we need to add co_return; equivalent at the end of the user authored body.
+  unsigned CoreturnCount = 0;
+
+  // The promise type's 'unhandled_exception' handler, if it defines one.
+  Stmt *ExceptionHandler = nullptr;
+};
+
+// Defining these here allows to keep CGCoroData private to this file.
+CIRGenFunction::CGCoroInfo::CGCoroInfo() {}
+CIRGenFunction::CGCoroInfo::~CGCoroInfo() {}
+
+static void createCoroData(CIRGenFunction &CGF,
+                           CIRGenFunction::CGCoroInfo &CurCoro,
+                           mlir::cir::CallOp CoroId) {
+  if (CurCoro.Data) {
+    llvm_unreachable("EmitCoroutineBodyStatement called twice?");
+
+    return;
+  }
+
+  CurCoro.Data = std::unique_ptr<CGCoroData>(new CGCoroData);
+  CurCoro.Data->CoroId = CoroId;
+}
+
+namespace {
+// FIXME: both GetParamRef and ParamReferenceReplacerRAII are good template
+// candidates to be shared among LLVM / CIR codegen.
+
+// Hunts for the parameter reference in the parameter copy/move declaration.
+struct GetParamRef : public StmtVisitor<GetParamRef> {
+public:
+  DeclRefExpr *Expr = nullptr;
+  GetParamRef() {}
+  void VisitDeclRefExpr(DeclRefExpr *E) {
+    assert(Expr == nullptr && "multilple declref in param move");
+    Expr = E;
+  }
+  void VisitStmt(Stmt *S) {
+    for (auto *C : S->children()) {
+      if (C)
+        Visit(C);
+    }
+  }
+};
+
+// This class replaces references to parameters to their copies by changing
+// the addresses in CGF.LocalDeclMap and restoring back the original values in
+// its destructor.
+struct ParamReferenceReplacerRAII {
+  CIRGenFunction::DeclMapTy SavedLocals;
+  CIRGenFunction::DeclMapTy &LocalDeclMap;
+
+  ParamReferenceReplacerRAII(CIRGenFunction::DeclMapTy &LocalDeclMap)
+      : LocalDeclMap(LocalDeclMap) {}
+
+  void addCopy(DeclStmt const *PM) {
+    // Figure out what param it refers to.
+
+    assert(PM->isSingleDecl());
+    VarDecl const *VD = static_cast<VarDecl const *>(PM->getSingleDecl());
+    Expr const *InitExpr = VD->getInit();
+    GetParamRef Visitor;
+    Visitor.Visit(const_cast<Expr *>(InitExpr));
+    assert(Visitor.Expr);
+    DeclRefExpr *DREOrig = Visitor.Expr;
+    auto *PD = DREOrig->getDecl();
+
+    auto it = LocalDeclMap.find(PD);
+    assert(it != LocalDeclMap.end() && "parameter is not found");
+    SavedLocals.insert({PD, it->second});
+
+    auto copyIt = LocalDeclMap.find(VD);
+    assert(copyIt != LocalDeclMap.end() && "parameter copy is not found");
+    it->second = copyIt->getSecond();
+  }
+
+  ~ParamReferenceReplacerRAII() {
+    for (auto &&SavedLocal : SavedLocals) {
+      LocalDeclMap.insert({SavedLocal.first, SavedLocal.second});
+    }
+  }
+};
+} // namespace
+
+// Emit coroutine intrinsic and patch up arguments of the token type.
+RValue CIRGenFunction::buildCoroutineIntrinsic(const CallExpr *E,
+                                               unsigned int IID) {
+  llvm_unreachable("NYI");
+}
+
+RValue CIRGenFunction::buildCoroutineFrame() {
+  if (CurCoro.Data && CurCoro.Data->CoroBegin) {
+    return RValue::get(CurCoro.Data->CoroBegin);
+  }
+  llvm_unreachable("NYI");
+}
+
+static mlir::LogicalResult
+buildBodyAndFallthrough(CIRGenFunction &CGF, const CoroutineBodyStmt &S,
+                        Stmt *Body,
+                        const CIRGenFunction::LexicalScope *currLexScope) {
+  if (CGF.buildStmt(Body, /*useCurrentScope=*/true).failed())
+    return mlir::failure();
+  // Note that LLVM checks CanFallthrough by looking into the availability
+  // of the insert block which is kinda brittle and unintuitive, seems to be
+  // related with how landing pads are handled.
+  //
+  // CIRGen handles this by checking pre-existing co_returns in the current
+  // scope instead. Are we missing anything?
+  //
+  // From LLVM IR Gen: const bool CanFallthrough = Builder.GetInsertBlock();
+  const bool CanFallthrough = !currLexScope->hasCoreturn();
+  if (CanFallthrough)
+    if (Stmt *OnFallthrough = S.getFallthroughHandler())
+      if (CGF.buildStmt(OnFallthrough, /*useCurrentScope=*/true).failed())
+        return mlir::failure();
+
+  return mlir::success();
+}
+
+mlir::cir::CallOp CIRGenFunction::buildCoroIDBuiltinCall(mlir::Location loc,
+                                                         mlir::Value nullPtr) {
+  auto int32Ty = builder.getUInt32Ty();
+
+  auto &TI = CGM.getASTContext().getTargetInfo();
+  unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth();
+
+  mlir::Operation *builtin = CGM.getGlobalValue(CGM.builtinCoroId);
+
+  mlir::cir::FuncOp fnOp;
+  if (!builtin) {
+    fnOp = CGM.createCIRFunction(
+        loc, CGM.builtinCoroId,
+        mlir::cir::FuncType::get({int32Ty, VoidPtrTy, VoidPtrTy, VoidPtrTy},
+                                 int32Ty),
+        /*FD=*/nullptr);
+    assert(fnOp && "should always succeed");
+    fnOp.setBuiltinAttr(mlir::UnitAttr::get(builder.getContext()));
+  } else
+    fnOp = cast<mlir::cir::FuncOp>(builtin);
+
+  return builder.createCallOp(loc, fnOp,
+                              mlir::ValueRange{builder.getUInt32(NewAlign, loc),
+                                               nullPtr, nullPtr, nullPtr});
+}
+
+mlir::cir::CallOp
+CIRGenFunction::buildCoroAllocBuiltinCall(mlir::Location loc) {
+  auto boolTy = builder.getBoolTy();
+  auto int32Ty = builder.getUInt32Ty();
+
+  mlir::Operation *builtin = CGM.getGlobalValue(CGM.builtinCoroAlloc);
+
+  mlir::cir::FuncOp fnOp;
+  if (!builtin) {
+    fnOp = CGM.createCIRFunction(loc, CGM.builtinCoroAlloc,
+                                 mlir::cir::FuncType::get({int32Ty}, boolTy),
+                                 /*FD=*/nullptr);
+    assert(fnOp && "should always succeed");
+    fnOp.setBuiltinAttr(mlir::UnitAttr::get(builder.getContext()));
+  } else
+    fnOp = cast<mlir::cir::FuncOp>(builtin);
+
+  return builder.createCallOp(
+      loc, fnOp, mlir::ValueRange{CurCoro.Data->CoroId.getResult()});
+}
+
+mlir::cir::CallOp
+CIRGenFunction::buildCoroBeginBuiltinCall(mlir::Location loc,
+                                          mlir::Value coroframeAddr) {
+  auto int32Ty = builder.getUInt32Ty();
+  mlir::Operation *builtin = CGM.getGlobalValue(CGM.builtinCoroBegin);
+
+  mlir::cir::FuncOp fnOp;
+  if (!builtin) {
+    fnOp = CGM.createCIRFunction(
+        loc, CGM.builtinCoroBegin,
+        mlir::cir::FuncType::get({int32Ty, VoidPtrTy}, VoidPtrTy),
+        /*FD=*/nullptr);
+    assert(fnOp && "should always succeed");
+    fnOp.setBuiltinAttr(mlir::UnitAttr::get(builder.getContext()));
+  } else
+    fnOp = cast<mlir::cir::FuncOp>(builtin);
+
+  return builder.createCallOp(
+      loc, fnOp,
+      mlir::ValueRange{CurCoro.Data->CoroId.getResult(), coroframeAddr});
+}
+
+mlir::cir::CallOp CIRGenFunction::buildCoroEndBuiltinCall(mlir::Location loc,
+                                                          mlir::Value nullPtr) {
+  auto boolTy = builder.getBoolTy();
+  mlir::Operation *builtin = CGM.getGlobalValue(CGM.builtinCoroEnd);
+
+  mlir::cir::FuncOp fnOp;
+  if (!builtin) {
+    fnOp = CGM.createCIRFunction(
+        loc, CGM.builtinCoroEnd,
+        mlir::cir::FuncType::get({VoidPtrTy, boolTy}, boolTy),
+        /*FD=*/nullptr);
+    assert(fnOp && "should always succeed");
+    fnOp.setBuiltinAttr(mlir::UnitAttr::get(builder.getContext()));
+  } else
+    fnOp = cast<mlir::cir::FuncOp>(builtin);
+
+  return builder.createCallOp(
+      loc, fnOp, mlir::ValueRange{nullPtr, builder.getBool(false, loc)});
+}
+
+mlir::LogicalResult
+CIRGenFunction::buildCoroutineBody(const CoroutineBodyStmt &S) {
+  auto openCurlyLoc = getLoc(S.getBeginLoc());
+  auto nullPtrCst = builder.getNullPtr(VoidPtrTy, openCurlyLoc);
+
+  auto Fn = dyn_cast<mlir::cir::FuncOp>(CurFn);
+  assert(Fn && "other callables NYI");
+  Fn.setCoroutineAttr(mlir::UnitAttr::get(builder.getContext()));
+  auto coroId = buildCoroIDBuiltinCall(openCurlyLoc, nullPtrCst);
+  createCoroData(*this, CurCoro, coroId);
+
+  // Backend is allowed to elide memory allocations, to help it, emit
+  // auto mem = coro.alloc() ? 0 : ... allocation code ...;
+  auto coroAlloc = buildCoroAllocBuiltinCall(openCurlyLoc);
+
+  // Initialize address of coroutine frame to null
+  auto astVoidPtrTy = CGM.getASTContext().VoidPtrTy;
+  auto allocaTy = getTypes().convertTypeForMem(astVoidPtrTy);
+  Address coroFrame =
+      CreateTempAlloca(allocaTy, getContext().getTypeAlignInChars(astVoidPtrTy),
+                       openCurlyLoc, "__coro_frame_addr",
+                       /*ArraySize=*/nullptr);
+
+  auto storeAddr = coroFrame.getPointer();
+  builder.CIRBaseBuilderTy::createStore(openCurlyLoc, nullPtrCst, storeAddr);
+  builder.create<mlir::cir::IfOp>(openCurlyLoc, coroAlloc.getResult(),
+                                  /*withElseRegion=*/false,
+                                  /*thenBuilder=*/
+                                  [&](mlir::OpBuilder &b, mlir::Location loc) {
+                                    builder.CIRBaseBuilderTy::createStore(
+                                        loc, buildScalarExpr(S.getAllocate()),
+                                        storeAddr);
+                                    builder.create<mlir::cir::YieldOp>(loc);
+                                  });
+
+  CurCoro.Data->CoroBegin =
+      buildCoroBeginBuiltinCall(
+          openCurlyLoc,
+          builder.create<mlir::cir::LoadOp>(openCurlyLoc, allocaTy, storeAddr))
+          .getResult();
+
+  // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided.
+  if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure())
+    llvm_unreachable("NYI");
+
+  {
+    // FIXME(cir): create a new scope to copy out the params?
+    // LLVM create scope cleanups here, but might be due to the use
+    // of many basic blocks?
+    assert(!MissingFeatures::generateDebugInfo() && "NYI");
+    ParamReferenceReplacerRAII ParamReplacer(LocalDeclMap);
+
+    // Create mapping between parameters and copy-params for coroutine
+    // function.
+    llvm::ArrayRef<const Stmt *> ParamMoves = S.getParamMoves();
+    assert((ParamMoves.size() == 0 || (ParamMoves.size() == FnArgs.size())) &&
+           "ParamMoves and FnArgs should be the same size for coroutine "
+           "function");
+    // For zipping the arg map into debug info.
+    assert(!MissingFeatures::generateDebugInfo() && "NYI");
+
+    // Create parameter copies. We do it before creating a promise, since an
+    // evolution of coroutine TS may allow promise constructor to observe
+    // parameter copies.
+    for (auto *PM : S.getParamMoves()) {
+      if (buildStmt(PM, /*useCurrentScope=*/true).failed())
+        return mlir::failure();
+      ParamReplacer.addCopy(cast<DeclStmt>(PM));
+    }
+
+    if (buildStmt(S.getPromiseDeclStmt(), /*useCurrentScope=*/true).failed())
+      return mlir::failure();
+
+    // ReturnValue should be valid as long as the coroutine's return type
+    // is not void. The assertion could help us to reduce the check later.
+    assert(ReturnValue.isValid() == (bool)S.getReturnStmt());
+    // Now we have the promise, initialize the GRO.
+    // We need to emit `get_return_object` first. According to:
+    // [dcl.fct.def.coroutine]p7
+    // The call to get_return_­object is sequenced before the call to
+    // initial_suspend and is invoked at most once.
+    //
+    // So we couldn't emit return value when we emit return statment,
+    // otherwise the call to get_return_object wouldn't be in front
+    // of initial_suspend.
+    if (ReturnValue.isValid()) {
+      buildAnyExprToMem(S.getReturnValue(), ReturnValue,
+                        S.getReturnValue()->getType().getQualifiers(),
+                        /*IsInit*/ true);
+    }
+
+    // FIXME(cir): EHStack.pushCleanup<CallCoroEnd>(EHCleanup);
+    CurCoro.Data->CurrentAwaitKind = mlir::cir::AwaitKind::init;
+    if (buildStmt(S.getInitSuspendStmt(), /*useCurrentScope=*/true).failed())
+      return mlir::failure();
+
+    CurCoro.Data->CurrentAwaitKind = mlir::cir::AwaitKind::user;
+
+    // FIXME(cir): wrap buildBodyAndFallthrough with try/catch bits.
+    if (S.getExceptionHandler())
+      assert(!MissingFeatures::unhandledException() && "NYI");
+    if (buildBodyAndFallthrough(*this, S, S.getBody(), currLexScope).failed())
+      return mlir::failure();
+
+    // Note that LLVM checks CanFallthrough by looking into the availability
+    // of the insert block which is kinda brittle and unintuitive, seems to be
+    // related with how landing pads are handled.
+    //
+    // CIRGen handles this by checking pre-existing co_returns in the current
+    // scope instead. Are we missing anything?
+    //
+    // From LLVM IR Gen: const bool CanFallthrough = Builder.GetInsertBlock();
+    const bool CanFallthrough = currLexScope->hasCoreturn();
+    const bool HasCoreturns = CurCoro.Data->CoreturnCount > 0;
+    if (CanFallthrough || HasCoreturns) {
+      CurCoro.Data->CurrentAwaitKind = mlir::cir::AwaitKind::final;
+      {
+        mlir::OpBuilder::InsertionGuard guard(builder);
+        builder.setInsertionPoint(CurCoro.Data->FinalSuspendInsPoint);
+        if (buildStmt(S.getFinalSuspendStmt(), /*useCurrentScope=*/true)
+                .failed())
+          return mlir::failure();
+      }
+    }
+  }
+  return mlir::success();
+}
+
+static bool memberCallExpressionCanThrow(const Expr *E) {
+  if (const auto *CE = dyn_cast<CXXMemberCallExpr>(E))
+    if (const auto *Proto =
+            CE->getMethodDecl()->getType()->getAs<FunctionProtoType>())
+      if (isNoexceptExceptionSpec(Proto->getExceptionSpecType()) &&
+          Proto->canThrow() == CT_Cannot)
+        return false;
+  return true;
+}
+
+// Given a suspend expression which roughly looks like:
+//
+//   auto && x = CommonExpr();
+//   if (!x.await_ready()) {
+//      x.await_suspend(...); (*)
+//   }
+//   x.await_resume();
+//
+// where the result of the entire expression is the result of x.await_resume()
+//
+//   (*) If x.await_suspend return type is bool, it allows to veto a suspend:
+//      if (x.await_suspend(...))
+//        llvm_coro_suspend();
+//
+// This is more higher level than LLVM codegen, for that one see llvm's
+// docs/Coroutines.rst for more details.
+namespace {
+struct LValueOrRValue {
+  LValue LV;
+  RValue RV;
+};
+} // namespace
+static LValueOrRValue
+buildSuspendExpression(CIRGenFunction &CGF, CGCoroData &Coro,
+                       CoroutineSuspendExpr const &S, mlir::cir::AwaitKind Kind,
+                       AggValueSlot aggSlot, bool ignoreResult,
+                       mlir::Block *scopeParentBlock,
+                       mlir::Value &tmpResumeRValAddr, bool forLValue) {
+  auto *E = S.getCommonExpr();
+
+  auto awaitBuild = mlir::success();
+  LValueOrRValue awaitRes;
+
+  auto Binder =
+      CIRGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E);
+  auto UnbindOnExit = llvm::make_scope_exit([&] { Binder.unbind(CGF); });
+  auto &builder = CGF.getBuilder();
+
+  [[maybe_unused]] auto awaitOp = builder.create<mlir::cir::AwaitOp>(
+      CGF.getLoc(S.getSourceRange()), Kind,
+      /*readyBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        Expr *condExpr = S.getReadyExpr()->IgnoreParens();
+        builder.createCondition(CGF.evaluateExprAsBool(condExpr));
+      },
+      /*suspendBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        // Note that differently from LLVM codegen we do not emit coro.save
+        // and coro.suspend here, that should be done as part of lowering this
+        // to LLVM dialect (or some other MLIR dialect)
+
+        // A invalid suspendRet indicates "void returning await_suspend"
+        auto suspendRet = CGF.buildScalarExpr(S.getSuspendExpr());
+
+        // Veto suspension if requested by bool returning await_suspend.
+        if (suspendRet) {
+          // From LLVM codegen:
+          // if (SuspendRet != nullptr && SuspendRet->getType()->isIntegerTy(1))
+          llvm_unreachable("NYI");
+        }
+
+        // Signals the parent that execution flows to next region.
+        builder.create<mlir::cir::YieldOp>(loc);
+      },
+      /*resumeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        // Exception handling requires additional IR. If the 'await_resume'
+        // function is marked as 'noexcept', we avoid generating this additional
+        // IR.
+        CXXTryStmt *TryStmt = nullptr;
+        if (Coro.ExceptionHandler && Kind == mlir::cir::AwaitKind::init &&
+            memberCallExpressionCanThrow(S.getResumeExpr())) {
+          llvm_unreachable("NYI");
+        }
+
+        // FIXME(cir): the alloca for the resume expr should be placed in the
+        // enclosing cir.scope instead.
+        if (forLValue)
+          awaitRes.LV = CGF.buildLValue(S.getResumeExpr());
+        else {
+          awaitRes.RV =
+              CGF.buildAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult);
+          if (!awaitRes.RV.isIgnored()) {
+            // Create the alloca in the block before the scope wrapping
+            // cir.await.
+            tmpResumeRValAddr = CGF.buildAlloca(
+                "__coawait_resume_rval", awaitRes.RV.getScalarVal().getType(),
+                loc, CharUnits::One(),
+                builder.getBestAllocaInsertPoint(scopeParentBlock));
+            // Store the rvalue so we can reload it before the promise call.
+            builder.CIRBaseBuilderTy::createStore(
+                loc, awaitRes.RV.getScalarVal(), tmpResumeRValAddr);
+          }
+        }
+
+        if (TryStmt) {
+          llvm_unreachable("NYI");
+        }
+
+        // Returns control back to parent.
+        builder.create<mlir::cir::YieldOp>(loc);
+      });
+
+  assert(awaitBuild.succeeded() && "Should know how to codegen");
+  return awaitRes;
+}
+
+static RValue buildSuspendExpr(CIRGenFunction &CGF,
+                               const CoroutineSuspendExpr &E,
+                               mlir::cir::AwaitKind kind, AggValueSlot aggSlot,
+                               bool ignoreResult) {
+  RValue rval;
+  auto scopeLoc = CGF.getLoc(E.getSourceRange());
+
+  // Since we model suspend / resume as an inner region, we must store
+  // resume scalar results in a tmp alloca, and load it after we build the
+  // suspend expression. An alternative way to do this would be to make
+  // every region return a value when promise.return_value() is used, but
+  // it's a bit awkward given that resume is the only region that actually
+  // returns a value.
+  mlir::Block *currEntryBlock = CGF.currLexScope->getEntryBlock();
+  [[maybe_unused]] mlir::Value tmpResumeRValAddr;
+
+  // No need to explicitly wrap this into a scope since the AST already uses a
+  // ExprWithCleanups, which will wrap this into a cir.scope anyways.
+  rval = buildSuspendExpression(CGF, *CGF.CurCoro.Data, E, kind, aggSlot,
+                                ignoreResult, currEntryBlock, tmpResumeRValAddr,
+                                /*forLValue*/ false)
+             .RV;
+
+  if (ignoreResult || rval.isIgnored())
+    return rval;
+
+  if (rval.isScalar()) {
+    rval = RValue::get(CGF.getBuilder().create<mlir::cir::LoadOp>(
+        scopeLoc, rval.getScalarVal().getType(), tmpResumeRValAddr));
+  } else if (rval.isAggregate()) {
+    // This is probably already handled via AggSlot, remove this assertion
+    // once we have a testcase and prove all pieces work.
+    llvm_unreachable("NYI");
+  } else { // complex
+    llvm_unreachable("NYI");
+  }
+  return rval;
+}
+
+RValue CIRGenFunction::buildCoawaitExpr(const CoawaitExpr &E,
+                                        AggValueSlot aggSlot,
+                                        bool ignoreResult) {
+  return buildSuspendExpr(*this, E, CurCoro.Data->CurrentAwaitKind, aggSlot,
+                          ignoreResult);
+}
+
+RValue CIRGenFunction::buildCoyieldExpr(const CoyieldExpr &E,
+                                        AggValueSlot aggSlot,
+                                        bool ignoreResult) {
+  return buildSuspendExpr(*this, E, mlir::cir::AwaitKind::yield, aggSlot,
+                          ignoreResult);
+}
+
+mlir::LogicalResult CIRGenFunction::buildCoreturnStmt(CoreturnStmt const &S) {
+  ++CurCoro.Data->CoreturnCount;
+  currLexScope->setCoreturn();
+
+  const Expr *RV = S.getOperand();
+  if (RV && RV->getType()->isVoidType() && !isa<InitListExpr>(RV)) {
+    // Make sure to evaluate the non initlist expression of a co_return
+    // with a void expression for side effects.
+    // FIXME(cir): add scope
+    // RunCleanupsScope cleanupScope(*this);
+    buildIgnoredExpr(RV);
+  }
+  if (buildStmt(S.getPromiseCall(), /*useCurrentScope=*/true).failed())
+    return mlir::failure();
+  // Create a new return block (if not existent) and add a branch to
+  // it. The actual return instruction is only inserted during current
+  // scope cleanup handling.
+  auto loc = getLoc(S.getSourceRange());
+  auto *retBlock = currLexScope->getOrCreateRetBlock(*this, loc);
+  CurCoro.Data->FinalSuspendInsPoint =
+      builder.create<mlir::cir::BrOp>(loc, retBlock);
+
+  // Insert the new block to continue codegen after branch to ret block,
+  // this will likely be an empty block.
+  builder.createBlock(builder.getBlock()->getParent());
+
+  // TODO(cir): LLVM codegen for a cleanup on cleanupScope here.
+  return mlir::success();
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenCstEmitter.h b/clang/lib/CIR/CodeGen/CIRGenCstEmitter.h
new file mode 100644
index 000000000000..d95529e50f4a
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenCstEmitter.h
@@ -0,0 +1,167 @@
+//===--- CIRGenCstEmitter.h - CIR constant emission -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A helper class for emitting expressions and values as mlir::cir::ConstantOp
+// and as initializers for global variables.
+//
+// Note: this is based on LLVM's codegen in ConstantEmitter.h, reusing this
+// class interface makes it easier move forward with bringing CIR codegen
+// to completion.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_CIRGEN_CONSTANTEMITTER_H
+#define LLVM_CLANG_LIB_CODEGEN_CIRGEN_CONSTANTEMITTER_H
+
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+
+namespace cir {
+
+class ConstantEmitter {
+public:
+  CIRGenModule &CGM;
+  CIRGenFunction *const CGF;
+
+private:
+  bool Abstract = false;
+
+  /// Whether non-abstract components of the emitter have been initialized.
+  bool InitializedNonAbstract = false;
+
+  /// Whether the emitter has been finalized.
+  bool Finalized = false;
+
+  /// Whether the constant-emission failed.
+  bool Failed = false;
+
+  /// Whether we're in a constant context.
+  bool InConstantContext = false;
+
+  /// The AST address space where this (non-abstract) initializer is going.
+  /// Used for generating appropriate placeholders.
+  clang::LangAS DestAddressSpace;
+
+  llvm::SmallVector<std::pair<llvm::Constant *, llvm::GlobalVariable *>, 4>
+      PlaceholderAddresses;
+
+public:
+  ConstantEmitter(CIRGenModule &CGM, CIRGenFunction *CGF = nullptr)
+      : CGM(CGM), CGF(CGF) {}
+
+  /// Initialize this emission in the context of the given function.
+  /// Use this if the expression might contain contextual references like
+  /// block addresses or PredefinedExprs.
+  ConstantEmitter(CIRGenFunction &CGF) : CGM(CGF.CGM), CGF(&CGF) {}
+
+  ConstantEmitter(const ConstantEmitter &other) = delete;
+  ConstantEmitter &operator=(const ConstantEmitter &other) = delete;
+
+  ~ConstantEmitter();
+
+  /// Is the current emission context abstract?
+  bool isAbstract() const { return Abstract; }
+
+  bool isInConstantContext() const { return InConstantContext; }
+  void setInConstantContext(bool var) { InConstantContext = var; }
+
+  /// Try to emit the initiaizer of the given declaration as an abstract
+  /// constant.  If this succeeds, the emission must be finalized.
+  mlir::Attribute tryEmitForInitializer(const VarDecl &D);
+  mlir::Attribute tryEmitForInitializer(const Expr *E, LangAS destAddrSpace,
+                                        QualType destType);
+
+  mlir::Attribute emitForInitializer(const APValue &value, LangAS destAddrSpace,
+                                     QualType destType);
+
+  void finalize(mlir::cir::GlobalOp global);
+
+  // All of the "abstract" emission methods below permit the emission to
+  // be immediately discarded without finalizing anything.  Therefore, they
+  // must also promise not to do anything that will, in the future, require
+  // finalization:
+  //
+  //   - using the CGF (if present) for anything other than establishing
+  //     semantic context; for example, an expression with ignored
+  //     side-effects must not be emitted as an abstract expression
+  //
+  //   - doing anything that would not be safe to duplicate within an
+  //     initializer or to propagate to another context; for example,
+  //     side effects, or emitting an initialization that requires a
+  //     reference to its current location.
+  mlir::Attribute emitForMemory(mlir::Attribute C, QualType T) {
+    return emitForMemory(CGM, C, T);
+  }
+
+  mlir::Attribute emitNullForMemory(mlir::Location loc, QualType T) {
+    return emitNullForMemory(loc, CGM, T);
+  }
+  static mlir::Attribute emitNullForMemory(mlir::Location loc,
+                                           CIRGenModule &CGM, QualType T);
+  static mlir::Attribute emitForMemory(CIRGenModule &CGM, mlir::Attribute C,
+                                       clang::QualType T);
+
+  /// Try to emit the initializer of the given declaration as an abstract
+  /// constant.
+  mlir::Attribute tryEmitAbstractForInitializer(const VarDecl &D);
+
+  /// Emit the result of the given expression as an abstract constant,
+  /// asserting that it succeeded.  This is only safe to do when the
+  /// expression is known to be a constant expression with either a fairly
+  /// simple type or a known simple form.
+  mlir::Attribute emitAbstract(const Expr *E, QualType T);
+  mlir::Attribute emitAbstract(SourceLocation loc, const APValue &value,
+                               QualType T);
+
+  mlir::Attribute tryEmitConstantExpr(const ConstantExpr *CE);
+
+  // These are private helper routines of the constant emitter that
+  // can't actually be private because things are split out into helper
+  // functions and classes.
+
+  mlir::Attribute tryEmitPrivateForVarInit(const VarDecl &D);
+  mlir::TypedAttr tryEmitPrivate(const Expr *E, QualType T);
+  mlir::TypedAttr tryEmitPrivateForMemory(const Expr *E, QualType T);
+
+  mlir::Attribute tryEmitPrivate(const APValue &value, QualType T);
+  mlir::Attribute tryEmitPrivateForMemory(const APValue &value, QualType T);
+
+  mlir::Attribute tryEmitAbstract(const Expr *E, QualType destType);
+  mlir::Attribute tryEmitAbstractForMemory(const Expr *E, QualType destType);
+
+  mlir::Attribute tryEmitAbstract(const APValue &value, QualType destType);
+  mlir::Attribute tryEmitAbstractForMemory(const APValue &value,
+                                           QualType destType);
+
+private:
+  void initializeNonAbstract(clang::LangAS destAS) {
+    assert(!InitializedNonAbstract);
+    InitializedNonAbstract = true;
+    DestAddressSpace = destAS;
+  }
+  mlir::Attribute markIfFailed(mlir::Attribute init) {
+    if (!init)
+      Failed = true;
+    return init;
+  }
+
+  struct AbstractState {
+    bool OldValue;
+    size_t OldPlaceholdersSize;
+  };
+  AbstractState pushAbstract() {
+    AbstractState saved = {Abstract, PlaceholderAddresses.size()};
+    Abstract = true;
+    return saved;
+  }
+  mlir::Attribute validateAndPopAbstract(mlir::Attribute C, AbstractState save);
+};
+
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenDebugInfo.h b/clang/lib/CIR/CodeGen/CIRGenDebugInfo.h
new file mode 100644
index 000000000000..9aa503bf07e5
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenDebugInfo.h
@@ -0,0 +1,20 @@
+//===--- CIRGenDebugInfo.h - DebugInfo for CIRGen ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the source-level debug info generator for CIR translation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENDEBUGINFO_H
+#define LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENDEBUGINFO_H
+
+namespace cir {
+class CIRGenDebugInfo {};
+} // namespace cir
+
+#endif // LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENDEBUGINFO_H
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
new file mode 100644
index 000000000000..30e4019a24d0
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -0,0 +1,1269 @@
+//===--- CIRGenDecl.cpp - Emit CIR Code for declarations ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit Decl nodes as CIR code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenBuilder.h"
+#include "CIRGenCstEmitter.h"
+#include "CIRGenFunction.h"
+#include "CIRGenOpenMPRuntime.h"
+#include "EHScopeStack.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinAttributeInterfaces.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/SymbolTable.h"
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+
+using namespace cir;
+using namespace clang;
+
+CIRGenFunction::AutoVarEmission
+CIRGenFunction::buildAutoVarAlloca(const VarDecl &D,
+                                   mlir::OpBuilder::InsertPoint ip) {
+  QualType Ty = D.getType();
+  assert(!MissingFeatures::openCL());
+  assert(
+      Ty.getAddressSpace() == LangAS::Default ||
+      (Ty.getAddressSpace() == LangAS::opencl_private && getLangOpts().OpenCL));
+  assert(!D.hasAttr<AnnotateAttr>() && "not implemented");
+
+  auto loc = getLoc(D.getSourceRange());
+  bool NRVO =
+      getContext().getLangOpts().ElideConstructors && D.isNRVOVariable();
+  AutoVarEmission emission(D);
+  bool isEscapingByRef = D.isEscapingByref();
+  emission.IsEscapingByRef = isEscapingByRef;
+
+  CharUnits alignment = getContext().getDeclAlign(&D);
+
+  // If the type is variably-modified, emit all the VLA sizes for it.
+  if (Ty->isVariablyModifiedType())
+    buildVariablyModifiedType(Ty);
+
+  assert(!MissingFeatures::generateDebugInfo());
+  assert(!MissingFeatures::cxxABI());
+
+  Address address = Address::invalid();
+  Address allocaAddr = Address::invalid();
+  Address openMPLocalAddr =
+      getCIRGenModule().getOpenMPRuntime().getAddressOfLocalVariable(*this, &D);
+  assert(!getLangOpts().OpenMPIsTargetDevice && "NYI");
+  if (getLangOpts().OpenMP && openMPLocalAddr.isValid()) {
+    llvm_unreachable("NYI");
+  } else if (Ty->isConstantSizeType()) {
+    // If this value is an array, struct, or vector with a statically
+    // determinable constant initializer, there are optimizations we can do.
+    //
+    // TODO: We should constant-evaluate the initializer of any variable,
+    // as long as it is initialized by a constant expression. Currently,
+    // isConstantInitializer produces wrong answers for structs with
+    // reference or bitfield members, and a few other cases, and checking
+    // for POD-ness protects us from some of these.
+    if (D.getInit() &&
+        (Ty->isArrayType() || Ty->isRecordType() || Ty->isVectorType()) &&
+        (D.isConstexpr() ||
+         ((Ty.isPODType(getContext()) ||
+           getContext().getBaseElementType(Ty)->isObjCObjectPointerType()) &&
+          D.getInit()->isConstantInitializer(getContext(), false)))) {
+
+      // If the variable's a const type, and it's neither an NRVO
+      // candidate nor a __block variable and has no mutable members,
+      // emit it as a global instead.
+      // Exception is if a variable is located in non-constant address space
+      // in OpenCL.
+      // TODO: deal with CGM.getCodeGenOpts().MergeAllConstants
+      // TODO: perhaps we don't need this at all at CIR since this can
+      // be done as part of lowering down to LLVM.
+      if ((!getContext().getLangOpts().OpenCL ||
+           Ty.getAddressSpace() == LangAS::opencl_constant) &&
+          (!NRVO && !D.isEscapingByref() &&
+           CGM.isTypeConstant(Ty, /*ExcludeCtor=*/true,
+                              /*ExcludeDtor=*/false))) {
+        buildStaticVarDecl(D, mlir::cir::GlobalLinkageKind::InternalLinkage);
+
+        // Signal this condition to later callbacks.
+        emission.Addr = Address::invalid();
+        assert(emission.wasEmittedAsGlobal());
+        return emission;
+      }
+      // Otherwise, tell the initialization code that we're in this case.
+      emission.IsConstantAggregate = true;
+    }
+
+    // A normal fixed sized variable becomes an alloca in the entry block,
+    // unless:
+    // - it's an NRVO variable.
+    // - we are compiling OpenMP and it's an OpenMP local variable.
+    if (NRVO) {
+      // The named return value optimization: allocate this variable in the
+      // return slot, so that we can elide the copy when returning this
+      // variable (C++0x [class.copy]p34).
+      address = ReturnValue;
+      allocaAddr = ReturnValue;
+
+      if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
+        const auto *RD = RecordTy->getDecl();
+        const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
+        if ((CXXRD && !CXXRD->hasTrivialDestructor()) ||
+            RD->isNonTrivialToPrimitiveDestroy()) {
+          // In LLVM: Create a flag that is used to indicate when the NRVO was
+          // applied to this variable. Set it to zero to indicate that NRVO was
+          // not applied. For now, use the same approach for CIRGen until we can
+          // be sure it's worth doing something more aggressive.
+          auto falseNVRO = builder.getFalse(loc);
+          Address NRVOFlag = CreateTempAlloca(
+              falseNVRO.getType(), CharUnits::One(), loc, "nrvo",
+              /*ArraySize=*/nullptr, &allocaAddr);
+          assert(builder.getInsertionBlock());
+          builder.createStore(loc, falseNVRO, NRVOFlag);
+
+          // Record the NRVO flag for this variable.
+          NRVOFlags[&D] = NRVOFlag.getPointer();
+          emission.NRVOFlag = NRVOFlag.getPointer();
+        }
+      }
+    } else {
+      if (isEscapingByRef)
+        llvm_unreachable("NYI");
+
+      mlir::Type allocaTy = getTypes().convertTypeForMem(Ty);
+      CharUnits allocaAlignment = alignment;
+      // Create the temp alloca and declare variable using it.
+      mlir::Value addrVal;
+      address = CreateTempAlloca(allocaTy, allocaAlignment, loc, D.getName(),
+                                 /*ArraySize=*/nullptr, &allocaAddr, ip);
+      if (failed(declare(address, &D, Ty, getLoc(D.getSourceRange()), alignment,
+                         addrVal))) {
+        CGM.emitError("Cannot declare variable");
+        return emission;
+      }
+      // TODO: what about emitting lifetime markers for MSVC catch parameters?
+      // TODO: something like @llvm.lifetime.start/end here? revisit this later.
+      assert(!MissingFeatures::shouldEmitLifetimeMarkers());
+    }
+  } else { // not openmp nor constant sized type
+    bool VarAllocated = false;
+    if (getLangOpts().OpenMPIsTargetDevice)
+      llvm_unreachable("NYI");
+
+    if (!VarAllocated) {
+      if (!DidCallStackSave) {
+        // Save the stack.
+        auto defaultTy = AllocaInt8PtrTy;
+        CharUnits Align = CharUnits::fromQuantity(
+            CGM.getDataLayout().getAlignment(defaultTy, false));
+        Address Stack = CreateTempAlloca(defaultTy, Align, loc, "saved_stack");
+
+        mlir::Value V = builder.createStackSave(loc, defaultTy);
+        assert(V.getType() == AllocaInt8PtrTy);
+        builder.createStore(loc, V, Stack);
+
+        DidCallStackSave = true;
+
+        // Push a cleanup block and restore the stack there.
+        // FIXME: in general circumstances, this should be an EH cleanup.
+        pushStackRestore(NormalCleanup, Stack);
+      }
+
+      auto VlaSize = getVLASize(Ty);
+      mlir::Type mTy = convertTypeForMem(VlaSize.Type);
+
+      // Allocate memory for the array.
+      address = CreateTempAlloca(mTy, alignment, loc, "vla", VlaSize.NumElts,
+                                 &allocaAddr, builder.saveInsertionPoint());
+    }
+
+    // If we have debug info enabled, properly describe the VLA dimensions for
+    // this type by registering the vla size expression for each of the
+    // dimensions.
+    assert(!MissingFeatures::generateDebugInfo());
+  }
+
+  emission.Addr = address;
+  setAddrOfLocalVar(&D, emission.Addr);
+  return emission;
+}
+
+/// Determine whether the given initializer is trivial in the sense
+/// that it requires no code to be generated.
+bool CIRGenFunction::isTrivialInitializer(const Expr *Init) {
+  if (!Init)
+    return true;
+
+  if (const CXXConstructExpr *Construct = dyn_cast<CXXConstructExpr>(Init))
+    if (CXXConstructorDecl *Constructor = Construct->getConstructor())
+      if (Constructor->isTrivial() && Constructor->isDefaultConstructor() &&
+          !Construct->requiresZeroInitialization())
+        return true;
+
+  return false;
+}
+
+static void emitStoresForConstant(CIRGenModule &CGM, const VarDecl &D,
+                                  Address addr, bool isVolatile,
+                                  CIRGenBuilderTy &builder,
+                                  mlir::TypedAttr constant, bool IsAutoInit) {
+  auto Ty = constant.getType();
+  cir::CIRDataLayout layout{CGM.getModule()};
+  uint64_t ConstantSize = layout.getTypeAllocSize(Ty);
+  if (!ConstantSize)
+    return;
+  assert(!MissingFeatures::addAutoInitAnnotation());
+  assert(!MissingFeatures::vectorConstants());
+  assert(!MissingFeatures::shouldUseBZeroPlusStoresToInitialize());
+  assert(!MissingFeatures::shouldUseMemSetToInitialize());
+  assert(!MissingFeatures::shouldSplitConstantStore());
+  assert(!MissingFeatures::shouldCreateMemCpyFromGlobal());
+  // In CIR we want to emit a store for the whole thing, later lowering
+  // prepare to LLVM should unwrap this into the best policy (see asserts
+  // above).
+  //
+  // FIXME(cir): This is closer to memcpy behavior but less optimal, instead of
+  // copy from a global, we just create a cir.const out of it.
+
+  if (addr.getElementType() != Ty) {
+    auto ptr = addr.getPointer();
+    ptr = builder.createBitcast(ptr.getLoc(), ptr, builder.getPointerTo(Ty));
+    addr = addr.withPointer(ptr, addr.isKnownNonNull());
+  }
+
+  auto loc = CGM.getLoc(D.getSourceRange());
+  builder.createStore(loc, builder.getConstant(loc, constant), addr);
+}
+
+void CIRGenFunction::buildAutoVarInit(const AutoVarEmission &emission) {
+  assert(emission.Variable && "emission was not valid!");
+
+  // If this was emitted as a global constant, we're done.
+  if (emission.wasEmittedAsGlobal())
+    return;
+
+  const VarDecl &D = *emission.Variable;
+  QualType type = D.getType();
+
+  // If this local has an initializer, emit it now.
+  const Expr *Init = D.getInit();
+
+  // TODO: in LLVM codegen if we are at an unreachable point, the initializer
+  // isn't emitted unless it contains a label. What we want for CIR?
+  assert(builder.getInsertionBlock());
+
+  // Initialize the variable here if it doesn't have a initializer and it is a
+  // C struct that is non-trivial to initialize or an array containing such a
+  // struct.
+  if (!Init && type.isNonTrivialToPrimitiveDefaultInitialize() ==
+                   QualType::PDIK_Struct) {
+    assert(0 && "not implemented");
+    return;
+  }
+
+  const Address Loc = emission.Addr;
+  // Check whether this is a byref variable that's potentially
+  // captured and moved by its own initializer.  If so, we'll need to
+  // emit the initializer first, then copy into the variable.
+  assert(!MissingFeatures::capturedByInit() && "NYI");
+
+  // Note: constexpr already initializes everything correctly.
+  LangOptions::TrivialAutoVarInitKind trivialAutoVarInit =
+      (D.isConstexpr()
+           ? LangOptions::TrivialAutoVarInitKind::Uninitialized
+           : (D.getAttr<UninitializedAttr>()
+                  ? LangOptions::TrivialAutoVarInitKind::Uninitialized
+                  : getContext().getLangOpts().getTrivialAutoVarInit()));
+
+  auto initializeWhatIsTechnicallyUninitialized = [&](Address Loc) {
+    if (trivialAutoVarInit ==
+        LangOptions::TrivialAutoVarInitKind::Uninitialized)
+      return;
+
+    assert(0 && "unimplemented");
+  };
+
+  if (isTrivialInitializer(Init))
+    return initializeWhatIsTechnicallyUninitialized(Loc);
+
+  mlir::Attribute constant;
+  if (emission.IsConstantAggregate ||
+      D.mightBeUsableInConstantExpressions(getContext())) {
+    // FIXME: Differently from LLVM we try not to emit / lower too much
+    // here for CIR since we are interesting in seeing the ctor in some
+    // analysis later on. So CIR's implementation of ConstantEmitter will
+    // frequently return an empty Attribute, to signal we want to codegen
+    // some trivial ctor calls and whatnots.
+    constant = ConstantEmitter(*this).tryEmitAbstractForInitializer(D);
+    if (constant && !mlir::isa<mlir::cir::ZeroAttr>(constant) &&
+        (trivialAutoVarInit !=
+         LangOptions::TrivialAutoVarInitKind::Uninitialized)) {
+      llvm_unreachable("NYI");
+    }
+  }
+
+  // NOTE(cir): In case we have a constant initializer, we can just emit a
+  // store. But, in CIR, we wish to retain any ctor calls, so if it is a
+  // CXX temporary object creation, we ensure the ctor call is used deferring
+  // its removal/optimization to the CIR lowering.
+  if (!constant || isa<CXXTemporaryObjectExpr>(Init)) {
+    initializeWhatIsTechnicallyUninitialized(Loc);
+    LValue lv = LValue::makeAddr(Loc, type, AlignmentSource::Decl);
+    buildExprAsInit(Init, &D, lv);
+    // In case lv has uses it means we indeed initialized something
+    // out of it while trying to build the expression, mark it as such.
+    auto addr = lv.getAddress().getPointer();
+    assert(addr && "Should have an address");
+    auto allocaOp = dyn_cast_or_null<mlir::cir::AllocaOp>(addr.getDefiningOp());
+    assert(allocaOp && "Address should come straight out of the alloca");
+
+    if (!allocaOp.use_empty())
+      allocaOp.setInitAttr(mlir::UnitAttr::get(builder.getContext()));
+    return;
+  }
+
+  // FIXME(cir): migrate most of this file to use mlir::TypedAttr directly.
+  auto typedConstant = mlir::dyn_cast<mlir::TypedAttr>(constant);
+  assert(typedConstant && "expected typed attribute");
+  if (!emission.IsConstantAggregate) {
+    // For simple scalar/complex initialization, store the value directly.
+    LValue lv = makeAddrLValue(Loc, type);
+    assert(Init && "expected initializer");
+    auto initLoc = getLoc(Init->getSourceRange());
+    lv.setNonGC(true);
+    return buildStoreThroughLValue(
+        RValue::get(builder.getConstant(initLoc, typedConstant)), lv);
+  }
+
+  emitStoresForConstant(CGM, D, Loc, type.isVolatileQualified(), builder,
+                        typedConstant, /*IsAutoInit=*/false);
+}
+
+void CIRGenFunction::buildAutoVarCleanups(const AutoVarEmission &emission) {
+  assert(emission.Variable && "emission was not valid!");
+
+  // If this was emitted as a global constant, we're done.
+  if (emission.wasEmittedAsGlobal())
+    return;
+
+  // TODO: in LLVM codegen if we are at an unreachable point codgen
+  // is ignored. What we want for CIR?
+  assert(builder.getInsertionBlock());
+  const VarDecl &D = *emission.Variable;
+
+  // Check the type for a cleanup.
+  if (QualType::DestructionKind dtorKind = D.needsDestruction(getContext()))
+    buildAutoVarTypeCleanup(emission, dtorKind);
+
+  // In GC mode, honor objc_precise_lifetime.
+  if (getContext().getLangOpts().getGC() != LangOptions::NonGC &&
+      D.hasAttr<ObjCPreciseLifetimeAttr>())
+    assert(0 && "not implemented");
+
+  // Handle the cleanup attribute.
+  if (const CleanupAttr *CA = D.getAttr<CleanupAttr>())
+    assert(0 && "not implemented");
+
+  // TODO: handle block variable
+}
+
+/// Emit code and set up symbol table for a variable declaration with auto,
+/// register, or no storage class specifier. These turn into simple stack
+/// objects, globals depending on target.
+void CIRGenFunction::buildAutoVarDecl(const VarDecl &D) {
+  AutoVarEmission emission = buildAutoVarAlloca(D);
+  buildAutoVarInit(emission);
+  buildAutoVarCleanups(emission);
+}
+
+void CIRGenFunction::buildVarDecl(const VarDecl &D) {
+  if (D.hasExternalStorage()) {
+    // Don't emit it now, allow it to be emitted lazily on its first use.
+    return;
+  }
+
+  // Some function-scope variable does not have static storage but still
+  // needs to be emitted like a static variable, e.g. a function-scope
+  // variable in constant address space in OpenCL.
+  if (D.getStorageDuration() != SD_Automatic) {
+    // Static sampler variables translated to function calls.
+    if (D.getType()->isSamplerT())
+      return;
+
+    auto Linkage = CGM.getCIRLinkageVarDefinition(&D, /*IsConstant=*/false);
+
+    // FIXME: We need to force the emission/use of a guard variable for
+    // some variables even if we can constant-evaluate them because
+    // we can't guarantee every translation unit will constant-evaluate them.
+
+    return buildStaticVarDecl(D, Linkage);
+  }
+
+  if (D.getType().getAddressSpace() == LangAS::opencl_local)
+    return CGM.getOpenCLRuntime().buildWorkGroupLocalVarDecl(*this, D);
+
+  assert(D.hasLocalStorage());
+
+  CIRGenFunction::VarDeclContext varDeclCtx{*this, &D};
+  return buildAutoVarDecl(D);
+}
+
+static std::string getStaticDeclName(CIRGenModule &CGM, const VarDecl &D) {
+  if (CGM.getLangOpts().CPlusPlus)
+    return CGM.getMangledName(&D).str();
+
+  // If this isn't C++, we don't need a mangled name, just a pretty one.
+  assert(!D.isExternallyVisible() && "name shouldn't matter");
+  std::string ContextName;
+  const DeclContext *DC = D.getDeclContext();
+  if (auto *CD = dyn_cast<CapturedDecl>(DC))
+    DC = cast<DeclContext>(CD->getNonClosureContext());
+  if (const auto *FD = dyn_cast<FunctionDecl>(DC))
+    ContextName = std::string(CGM.getMangledName(FD));
+  else if (const auto *BD = dyn_cast<BlockDecl>(DC))
+    llvm_unreachable("block decl context for static var is NYI");
+  else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(DC))
+    llvm_unreachable("ObjC decl context for static var is NYI");
+  else
+    llvm_unreachable("Unknown context for static var decl");
+
+  ContextName += "." + D.getNameAsString();
+  return ContextName;
+}
+
+// TODO(cir): LLVM uses a Constant base class. Maybe CIR could leverage an
+// interface for all constants?
+mlir::cir::GlobalOp
+CIRGenModule::getOrCreateStaticVarDecl(const VarDecl &D,
+                                       mlir::cir::GlobalLinkageKind Linkage) {
+  // In general, we don't always emit static var decls once before we reference
+  // them. It is possible to reference them before emitting the function that
+  // contains them, and it is possible to emit the containing function multiple
+  // times.
+  if (mlir::cir::GlobalOp ExistingGV = StaticLocalDeclMap[&D])
+    return ExistingGV;
+
+  QualType Ty = D.getType();
+  assert(Ty->isConstantSizeType() && "VLAs can't be static");
+
+  // Use the label if the variable is renamed with the asm-label extension.
+  std::string Name;
+  if (D.hasAttr<AsmLabelAttr>())
+    llvm_unreachable("asm label is NYI");
+  else
+    Name = getStaticDeclName(*this, D);
+
+  mlir::Type LTy = getTypes().convertTypeForMem(Ty);
+  mlir::cir::AddressSpaceAttr AS =
+      builder.getAddrSpaceAttr(getGlobalVarAddressSpace(&D));
+
+  // OpenCL variables in local address space and CUDA shared
+  // variables cannot have an initializer.
+  mlir::Attribute Init = nullptr;
+  if (D.hasAttr<CUDASharedAttr>() || D.hasAttr<LoaderUninitializedAttr>())
+    llvm_unreachable("CUDA is NYI");
+  else if (Ty.getAddressSpace() != LangAS::opencl_local)
+    Init = builder.getZeroInitAttr(getTypes().ConvertType(Ty));
+
+  mlir::cir::GlobalOp GV = builder.createVersionedGlobal(
+      getModule(), getLoc(D.getLocation()), Name, LTy, false, Linkage, AS);
+  // TODO(cir): infer visibility from linkage in global op builder.
+  GV.setVisibility(getMLIRVisibilityFromCIRLinkage(Linkage));
+  GV.setInitialValueAttr(Init);
+  GV.setAlignment(getASTContext().getDeclAlign(&D).getAsAlign().value());
+
+  if (supportsCOMDAT() && GV.isWeakForLinker())
+    llvm_unreachable("COMDAT globals are NYI");
+
+  if (D.getTLSKind())
+    llvm_unreachable("TLS mode is NYI");
+
+  setGVProperties(GV, &D);
+
+  // Make sure the result is of the correct type.
+  if (AS != builder.getAddrSpaceAttr(Ty.getAddressSpace()))
+    llvm_unreachable("address space cast NYI");
+
+  // Ensure that the static local gets initialized by making sure the parent
+  // function gets emitted eventually.
+  const Decl *DC = cast<Decl>(D.getDeclContext());
+
+  // We can't name blocks or captured statements directly, so try to emit their
+  // parents.
+  if (isa<BlockDecl>(DC) || isa<CapturedDecl>(DC)) {
+    DC = DC->getNonClosureContext();
+    // FIXME: Ensure that global blocks get emitted.
+    if (!DC)
+      llvm_unreachable("address space is NYI");
+  }
+
+  GlobalDecl GD;
+  if (const auto *CD = dyn_cast<CXXConstructorDecl>(DC))
+    llvm_unreachable("C++ constructors static var context is NYI");
+  else if (const auto *DD = dyn_cast<CXXDestructorDecl>(DC))
+    llvm_unreachable("C++ destructors static var context is NYI");
+  else if (const auto *FD = dyn_cast<FunctionDecl>(DC))
+    GD = GlobalDecl(FD);
+  else {
+    // Don't do anything for Obj-C method decls or global closures. We should
+    // never defer them.
+    assert(isa<ObjCMethodDecl>(DC) && "unexpected parent code decl");
+  }
+  if (GD.getDecl() && MissingFeatures::openMP()) {
+    // Disable emission of the parent function for the OpenMP device codegen.
+    llvm_unreachable("OpenMP is NYI");
+  }
+
+  return GV;
+}
+
+/// Add the initializer for 'D' to the global variable that has already been
+/// created for it. If the initializer has a different type than GV does, this
+/// may free GV and return a different one. Otherwise it just returns GV.
+mlir::cir::GlobalOp CIRGenFunction::addInitializerToStaticVarDecl(
+    const VarDecl &D, mlir::cir::GlobalOp GV, mlir::cir::GetGlobalOp GVAddr) {
+  ConstantEmitter emitter(*this);
+  mlir::TypedAttr Init =
+      mlir::dyn_cast<mlir::TypedAttr>(emitter.tryEmitForInitializer(D));
+  assert(Init && "Expected typed attribute");
+
+  // If constant emission failed, then this should be a C++ static
+  // initializer.
+  if (!Init) {
+    if (!getLangOpts().CPlusPlus)
+      CGM.ErrorUnsupported(D.getInit(), "constant l-value expression");
+    else if (D.hasFlexibleArrayInit(getContext()))
+      CGM.ErrorUnsupported(D.getInit(), "flexible array initializer");
+    else {
+      // Since we have a static initializer, this global variable can't
+      // be constant.
+      GV.setConstant(false);
+      llvm_unreachable("C++ guarded init it NYI");
+    }
+    return GV;
+  }
+
+#ifndef NDEBUG
+  CharUnits VarSize = CGM.getASTContext().getTypeSizeInChars(D.getType()) +
+                      D.getFlexibleArrayInitChars(getContext());
+  CharUnits CstSize = CharUnits::fromQuantity(
+      CGM.getDataLayout().getTypeAllocSize(Init.getType()));
+  assert(VarSize == CstSize && "Emitted constant has unexpected size");
+#endif
+
+  // The initializer may differ in type from the global. Rewrite
+  // the global to match the initializer.  (We have to do this
+  // because some types, like unions, can't be completely represented
+  // in the LLVM type system.)
+  if (GV.getSymType() != Init.getType()) {
+    mlir::cir::GlobalOp OldGV = GV;
+    GV = builder.createGlobal(CGM.getModule(), getLoc(D.getSourceRange()),
+                              OldGV.getName(), Init.getType(),
+                              OldGV.getConstant(), GV.getLinkage());
+    // FIXME(cir): OG codegen inserts new GV before old one, we probably don't
+    // need that?
+    GV.setVisibility(OldGV.getVisibility());
+    GV.setGlobalVisibilityAttr(OldGV.getGlobalVisibilityAttr());
+    GV.setInitialValueAttr(Init);
+    GV.setTlsModelAttr(OldGV.getTlsModelAttr());
+    assert(!MissingFeatures::setDSOLocal());
+    assert(!MissingFeatures::setComdat());
+    assert(!MissingFeatures::addressSpaceInGlobalVar());
+
+    // Normally this should be done with a call to CGM.replaceGlobal(OldGV, GV),
+    // but since at this point the current block hasn't been really attached,
+    // there's no visibility into the GetGlobalOp corresponding to this Global.
+    // Given those constraints, thread in the GetGlobalOp and update it
+    // directly.
+    GVAddr.getAddr().setType(
+        mlir::cir::PointerType::get(builder.getContext(), Init.getType()));
+    OldGV->erase();
+  }
+
+  bool NeedsDtor =
+      D.needsDestruction(getContext()) == QualType::DK_cxx_destructor;
+
+  GV.setConstant(
+      CGM.isTypeConstant(D.getType(), /*ExcludeCtor=*/true, !NeedsDtor));
+  GV.setInitialValueAttr(Init);
+
+  emitter.finalize(GV);
+
+  if (NeedsDtor) {
+    // We have a constant initializer, but a nontrivial destructor. We still
+    // need to perform a guarded "initialization" in order to register the
+    // destructor.
+    llvm_unreachable("C++ guarded init is NYI");
+  }
+
+  return GV;
+}
+
+void CIRGenFunction::buildStaticVarDecl(const VarDecl &D,
+                                        mlir::cir::GlobalLinkageKind Linkage) {
+  // Check to see if we already have a global variable for this
+  // declaration.  This can happen when double-emitting function
+  // bodies, e.g. with complete and base constructors.
+  auto globalOp = CGM.getOrCreateStaticVarDecl(D, Linkage);
+  // TODO(cir): we should have a way to represent global ops as values without
+  // having to emit a get global op. Sometimes these emissions are not used.
+  auto addr = getBuilder().createGetGlobal(globalOp);
+  auto getAddrOp = mlir::cast<mlir::cir::GetGlobalOp>(addr.getDefiningOp());
+
+  CharUnits alignment = getContext().getDeclAlign(&D);
+
+  // Store into LocalDeclMap before generating initializer to handle
+  // circular references.
+  mlir::Type elemTy = getTypes().convertTypeForMem(D.getType());
+  setAddrOfLocalVar(&D, Address(addr, elemTy, alignment));
+
+  // We can't have a VLA here, but we can have a pointer to a VLA,
+  // even though that doesn't really make any sense.
+  // Make sure to evaluate VLA bounds now so that we have them for later.
+  if (D.getType()->isVariablyModifiedType())
+    llvm_unreachable("VLAs are NYI");
+
+  // Save the type in case adding the initializer forces a type change.
+  auto expectedType = addr.getType();
+
+  auto var = globalOp;
+
+  // CUDA's local and local static __shared__ variables should not
+  // have any non-empty initializers. This is ensured by Sema.
+  // Whatever initializer such variable may have when it gets here is
+  // a no-op and should not be emitted.
+  bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+                         D.hasAttr<CUDASharedAttr>();
+  // If this value has an initializer, emit it.
+  if (D.getInit() && !isCudaSharedVar)
+    var = addInitializerToStaticVarDecl(D, var, getAddrOp);
+
+  var.setAlignment(alignment.getAsAlign().value());
+
+  if (D.hasAttr<AnnotateAttr>())
+    llvm_unreachable("Global annotations are NYI");
+
+  if (auto *SA = D.getAttr<PragmaClangBSSSectionAttr>())
+    llvm_unreachable("CIR global BSS section attribute is NYI");
+  if (auto *SA = D.getAttr<PragmaClangDataSectionAttr>())
+    llvm_unreachable("CIR global Data section attribute is NYI");
+  if (auto *SA = D.getAttr<PragmaClangRodataSectionAttr>())
+    llvm_unreachable("CIR global Rodata section attribute is NYI");
+  if (auto *SA = D.getAttr<PragmaClangRelroSectionAttr>())
+    llvm_unreachable("CIR global Relro section attribute is NYI");
+
+  if (const SectionAttr *SA = D.getAttr<SectionAttr>())
+    llvm_unreachable("CIR global object file section attribute is NYI");
+
+  if (D.hasAttr<RetainAttr>())
+    llvm_unreachable("llvm.used metadata is NYI");
+  else if (D.hasAttr<UsedAttr>())
+    llvm_unreachable("llvm.compiler.used metadata is NYI");
+
+  // From traditional codegen:
+  // We may have to cast the constant because of the initializer
+  // mismatch above.
+  //
+  // FIXME: It is really dangerous to store this in the map; if anyone
+  // RAUW's the GV uses of this constant will be invalid.
+  auto castedAddr = builder.createBitcast(getAddrOp.getAddr(), expectedType);
+  LocalDeclMap.find(&D)->second = Address(castedAddr, elemTy, alignment);
+  CGM.setStaticLocalDeclAddress(&D, var);
+
+  assert(!MissingFeatures::reportGlobalToASan());
+
+  // Emit global variable debug descriptor for static vars.
+  auto *DI = getDebugInfo();
+  if (DI && CGM.getCodeGenOpts().hasReducedDebugInfo()) {
+    llvm_unreachable("Debug info is NYI");
+  }
+}
+
+void CIRGenFunction::buildNullabilityCheck(LValue LHS, mlir::Value RHS,
+                                           SourceLocation Loc) {
+  if (!SanOpts.has(SanitizerKind::NullabilityAssign))
+    return;
+
+  llvm_unreachable("NYI");
+}
+
+void CIRGenFunction::buildScalarInit(const Expr *init, mlir::Location loc,
+                                     LValue lvalue, bool capturedByInit) {
+  Qualifiers::ObjCLifetime lifetime = Qualifiers::ObjCLifetime::OCL_None;
+  assert(!MissingFeatures::objCLifetime());
+
+  if (!lifetime) {
+    SourceLocRAIIObject Loc{*this, loc};
+    mlir::Value value = buildScalarExpr(init);
+    if (capturedByInit)
+      llvm_unreachable("NYI");
+    assert(!MissingFeatures::emitNullabilityCheck());
+    buildStoreThroughLValue(RValue::get(value), lvalue, true);
+    return;
+  }
+
+  llvm_unreachable("NYI");
+}
+
+void CIRGenFunction::buildExprAsInit(const Expr *init, const ValueDecl *D,
+                                     LValue lvalue, bool capturedByInit) {
+  SourceLocRAIIObject Loc{*this, getLoc(init->getSourceRange())};
+  if (capturedByInit)
+    llvm_unreachable("NYI");
+
+  QualType type = D->getType();
+
+  if (type->isReferenceType()) {
+    RValue rvalue = buildReferenceBindingToExpr(init);
+    if (capturedByInit)
+      llvm_unreachable("NYI");
+    buildStoreThroughLValue(rvalue, lvalue);
+    return;
+  }
+  switch (CIRGenFunction::getEvaluationKind(type)) {
+  case TEK_Scalar:
+    buildScalarInit(init, getLoc(D->getSourceRange()), lvalue);
+    return;
+  case TEK_Complex: {
+    mlir::Value complex = buildComplexExpr(init);
+    if (capturedByInit)
+      llvm_unreachable("NYI");
+    buildStoreOfComplex(getLoc(init->getExprLoc()), complex, lvalue,
+                        /*init*/ true);
+    return;
+  }
+  case TEK_Aggregate:
+    assert(!type->isAtomicType() && "NYI");
+    AggValueSlot::Overlap_t Overlap = AggValueSlot::MayOverlap;
+    if (isa<VarDecl>(D))
+      Overlap = AggValueSlot::DoesNotOverlap;
+    else if (auto *FD = dyn_cast<FieldDecl>(D))
+      assert(false && "Field decl NYI");
+    else
+      assert(false && "Only VarDecl implemented so far");
+    // TODO: how can we delay here if D is captured by its initializer?
+    buildAggExpr(init,
+                 AggValueSlot::forLValue(lvalue, AggValueSlot::IsDestructed,
+                                         AggValueSlot::DoesNotNeedGCBarriers,
+                                         AggValueSlot::IsNotAliased, Overlap));
+    return;
+  }
+  llvm_unreachable("bad evaluation kind");
+}
+
+void CIRGenFunction::buildDecl(const Decl &D) {
+  switch (D.getKind()) {
+  case Decl::ImplicitConceptSpecialization:
+  case Decl::HLSLBuffer:
+  case Decl::TopLevelStmt:
+    llvm_unreachable("NYI");
+  case Decl::BuiltinTemplate:
+  case Decl::TranslationUnit:
+  case Decl::ExternCContext:
+  case Decl::Namespace:
+  case Decl::UnresolvedUsingTypename:
+  case Decl::ClassTemplateSpecialization:
+  case Decl::ClassTemplatePartialSpecialization:
+  case Decl::VarTemplateSpecialization:
+  case Decl::VarTemplatePartialSpecialization:
+  case Decl::TemplateTypeParm:
+  case Decl::UnresolvedUsingValue:
+  case Decl::NonTypeTemplateParm:
+  case Decl::CXXDeductionGuide:
+  case Decl::CXXMethod:
+  case Decl::CXXConstructor:
+  case Decl::CXXDestructor:
+  case Decl::CXXConversion:
+  case Decl::Field:
+  case Decl::MSProperty:
+  case Decl::IndirectField:
+  case Decl::ObjCIvar:
+  case Decl::ObjCAtDefsField:
+  case Decl::ParmVar:
+  case Decl::ImplicitParam:
+  case Decl::ClassTemplate:
+  case Decl::VarTemplate:
+  case Decl::FunctionTemplate:
+  case Decl::TypeAliasTemplate:
+  case Decl::TemplateTemplateParm:
+  case Decl::ObjCMethod:
+  case Decl::ObjCCategory:
+  case Decl::ObjCProtocol:
+  case Decl::ObjCInterface:
+  case Decl::ObjCCategoryImpl:
+  case Decl::ObjCImplementation:
+  case Decl::ObjCProperty:
+  case Decl::ObjCCompatibleAlias:
+  case Decl::PragmaComment:
+  case Decl::PragmaDetectMismatch:
+  case Decl::AccessSpec:
+  case Decl::LinkageSpec:
+  case Decl::Export:
+  case Decl::ObjCPropertyImpl:
+  case Decl::FileScopeAsm:
+  case Decl::Friend:
+  case Decl::FriendTemplate:
+  case Decl::Block:
+  case Decl::Captured:
+  case Decl::UsingShadow:
+  case Decl::ConstructorUsingShadow:
+  case Decl::ObjCTypeParam:
+  case Decl::Binding:
+  case Decl::UnresolvedUsingIfExists:
+    llvm_unreachable("Declaration should not be in declstmts!");
+  case Decl::Record:    // struct/union/class X;
+  case Decl::CXXRecord: // struct/union/class X; [C++]
+    if (auto *DI = getDebugInfo())
+      llvm_unreachable("NYI");
+    return;
+  case Decl::Enum: // enum X;
+    if (auto *DI = getDebugInfo())
+      llvm_unreachable("NYI");
+    return;
+  case Decl::Function:     // void X();
+  case Decl::EnumConstant: // enum ? { X = ? }
+  case Decl::StaticAssert: // static_assert(X, ""); [C++0x]
+  case Decl::Label:        // __label__ x;
+  case Decl::Import:
+  case Decl::MSGuid: // __declspec(uuid("..."))
+  case Decl::TemplateParamObject:
+  case Decl::OMPThreadPrivate:
+  case Decl::OMPAllocate:
+  case Decl::OMPCapturedExpr:
+  case Decl::OMPRequires:
+  case Decl::Empty:
+  case Decl::Concept:
+  case Decl::LifetimeExtendedTemporary:
+  case Decl::RequiresExprBody:
+  case Decl::UnnamedGlobalConstant:
+    // None of these decls require codegen support.
+    return;
+
+  case Decl::NamespaceAlias:
+  case Decl::Using:          // using X; [C++]
+  case Decl::UsingEnum:      // using enum X; [C++]
+  case Decl::UsingDirective: // using namespace X; [C++]
+    assert(!MissingFeatures::generateDebugInfo());
+    return;
+  case Decl::UsingPack:
+    assert(0 && "Not implemented");
+    return;
+  case Decl::Var:
+  case Decl::Decomposition: {
+    const VarDecl &VD = cast<VarDecl>(D);
+    assert(VD.isLocalVarDecl() &&
+           "Should not see file-scope variables inside a function!");
+    buildVarDecl(VD);
+    if (auto *DD = dyn_cast<DecompositionDecl>(&VD))
+      for (auto *B : DD->bindings())
+        if (auto *HD = B->getHoldingVar())
+          buildVarDecl(*HD);
+    return;
+  }
+
+  case Decl::OMPDeclareReduction:
+  case Decl::OMPDeclareMapper:
+    assert(0 && "Not implemented");
+
+  case Decl::Typedef:     // typedef int X;
+  case Decl::TypeAlias: { // using X = int; [C++0x]
+    QualType Ty = cast<TypedefNameDecl>(D).getUnderlyingType();
+    if (auto *DI = getDebugInfo())
+      assert(!MissingFeatures::generateDebugInfo());
+    if (Ty->isVariablyModifiedType())
+      buildVariablyModifiedType(Ty);
+    return;
+  }
+  }
+}
+
+namespace {
+struct DestroyObject final : EHScopeStack::Cleanup {
+  DestroyObject(Address addr, QualType type,
+                CIRGenFunction::Destroyer *destroyer, bool useEHCleanupForArray)
+      : addr(addr), type(type), destroyer(destroyer),
+        useEHCleanupForArray(useEHCleanupForArray) {}
+
+  Address addr;
+  QualType type;
+  CIRGenFunction::Destroyer *destroyer;
+  bool useEHCleanupForArray;
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    // Don't use an EH cleanup recursively from an EH cleanup.
+    [[maybe_unused]] bool useEHCleanupForArray =
+        flags.isForNormalCleanup() && this->useEHCleanupForArray;
+
+    CGF.emitDestroy(addr, type, destroyer, useEHCleanupForArray);
+  }
+};
+
+template <class Derived> struct DestroyNRVOVariable : EHScopeStack::Cleanup {
+  DestroyNRVOVariable(Address addr, QualType type, mlir::Value NRVOFlag)
+      : NRVOFlag(NRVOFlag), Loc(addr), Ty(type) {}
+
+  mlir::Value NRVOFlag;
+  Address Loc;
+  QualType Ty;
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    llvm_unreachable("NYI");
+  }
+
+  virtual ~DestroyNRVOVariable() = default;
+};
+
+struct DestroyNRVOVariableCXX final
+    : DestroyNRVOVariable<DestroyNRVOVariableCXX> {
+  DestroyNRVOVariableCXX(Address addr, QualType type,
+                         const CXXDestructorDecl *Dtor, mlir::Value NRVOFlag)
+      : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, type, NRVOFlag),
+        Dtor(Dtor) {}
+
+  const CXXDestructorDecl *Dtor;
+
+  void emitDestructorCall(CIRGenFunction &CGF) { llvm_unreachable("NYI"); }
+};
+
+struct DestroyNRVOVariableC final : DestroyNRVOVariable<DestroyNRVOVariableC> {
+  DestroyNRVOVariableC(Address addr, mlir::Value NRVOFlag, QualType Ty)
+      : DestroyNRVOVariable<DestroyNRVOVariableC>(addr, Ty, NRVOFlag) {}
+
+  void emitDestructorCall(CIRGenFunction &CGF) { llvm_unreachable("NYI"); }
+};
+
+struct CallStackRestore final : EHScopeStack::Cleanup {
+  Address Stack;
+  CallStackRestore(Address Stack) : Stack(Stack) {}
+  bool isRedundantBeforeReturn() override { return true; }
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    auto loc = Stack.getPointer().getLoc();
+    mlir::Value V = CGF.getBuilder().createLoad(loc, Stack);
+    CGF.getBuilder().createStackRestore(loc, V);
+  }
+};
+
+struct ExtendGCLifetime final : EHScopeStack::Cleanup {
+  const VarDecl &Var;
+  ExtendGCLifetime(const VarDecl *var) : Var(*var) {}
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    llvm_unreachable("NYI");
+  }
+};
+
+struct CallCleanupFunction final : EHScopeStack::Cleanup {
+  // FIXME: mlir::Value used as placeholder, check options before implementing
+  // Emit below.
+  mlir::Value CleanupFn;
+  const CIRGenFunctionInfo &FnInfo;
+  const VarDecl &Var;
+
+  CallCleanupFunction(mlir::Value CleanupFn, const CIRGenFunctionInfo *Info,
+                      const VarDecl *Var)
+      : CleanupFn(CleanupFn), FnInfo(*Info), Var(*Var) {}
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    llvm_unreachable("NYI");
+  }
+};
+} // end anonymous namespace
+
+/// Push the standard destructor for the given type as
+/// at least a normal cleanup.
+void CIRGenFunction::pushDestroy(QualType::DestructionKind dtorKind,
+                                 Address addr, QualType type) {
+  assert(dtorKind && "cannot push destructor for trivial type");
+
+  CleanupKind cleanupKind = getCleanupKind(dtorKind);
+  pushDestroy(cleanupKind, addr, type, getDestroyer(dtorKind),
+              cleanupKind & EHCleanup);
+}
+
+void CIRGenFunction::pushDestroy(CleanupKind cleanupKind, Address addr,
+                                 QualType type, Destroyer *destroyer,
+                                 bool useEHCleanupForArray) {
+  pushFullExprCleanup<DestroyObject>(cleanupKind, addr, type, destroyer,
+                                     useEHCleanupForArray);
+}
+
+namespace {
+/// A cleanup which performs a partial array destroy where the end pointer is
+/// regularly determined and does not need to be loaded from a local.
+class RegularPartialArrayDestroy final : public EHScopeStack::Cleanup {
+  mlir::Value ArrayBegin;
+  mlir::Value ArrayEnd;
+  QualType ElementType;
+  [[maybe_unused]] CIRGenFunction::Destroyer *Destroyer;
+  CharUnits ElementAlign;
+
+public:
+  RegularPartialArrayDestroy(mlir::Value arrayBegin, mlir::Value arrayEnd,
+                             QualType elementType, CharUnits elementAlign,
+                             CIRGenFunction::Destroyer *destroyer)
+      : ArrayBegin(arrayBegin), ArrayEnd(arrayEnd), ElementType(elementType),
+        Destroyer(destroyer), ElementAlign(elementAlign) {}
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    llvm_unreachable("NYI");
+  }
+};
+
+/// A cleanup which performs a partial array destroy where the end pointer is
+/// irregularly determined and must be loaded from a local.
+class IrregularPartialArrayDestroy final : public EHScopeStack::Cleanup {
+  mlir::Value ArrayBegin;
+  Address ArrayEndPointer;
+  QualType ElementType;
+  [[maybe_unused]] CIRGenFunction::Destroyer *Destroyer;
+  CharUnits ElementAlign;
+
+public:
+  IrregularPartialArrayDestroy(mlir::Value arrayBegin, Address arrayEndPointer,
+                               QualType elementType, CharUnits elementAlign,
+                               CIRGenFunction::Destroyer *destroyer)
+      : ArrayBegin(arrayBegin), ArrayEndPointer(arrayEndPointer),
+        ElementType(elementType), Destroyer(destroyer),
+        ElementAlign(elementAlign) {}
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    llvm_unreachable("NYI");
+  }
+};
+} // end anonymous namespace
+
+/// Push an EH cleanup to destroy already-constructed elements of the given
+/// array.  The cleanup may be popped with DeactivateCleanupBlock or
+/// PopCleanupBlock.
+///
+/// \param elementType - the immediate element type of the array;
+///   possibly still an array type
+void CIRGenFunction::pushIrregularPartialArrayCleanup(mlir::Value arrayBegin,
+                                                      Address arrayEndPointer,
+                                                      QualType elementType,
+                                                      CharUnits elementAlign,
+                                                      Destroyer *destroyer) {
+  pushFullExprCleanup<IrregularPartialArrayDestroy>(
+      EHCleanup, arrayBegin, arrayEndPointer, elementType, elementAlign,
+      destroyer);
+}
+
+/// Push an EH cleanup to destroy already-constructed elements of the given
+/// array.  The cleanup may be popped with DeactivateCleanupBlock or
+/// PopCleanupBlock.
+///
+/// \param elementType - the immediate element type of the array;
+///   possibly still an array type
+void CIRGenFunction::pushRegularPartialArrayCleanup(mlir::Value arrayBegin,
+                                                    mlir::Value arrayEnd,
+                                                    QualType elementType,
+                                                    CharUnits elementAlign,
+                                                    Destroyer *destroyer) {
+  pushFullExprCleanup<RegularPartialArrayDestroy>(
+      EHCleanup, arrayBegin, arrayEnd, elementType, elementAlign, destroyer);
+}
+
+/// Destroys all the elements of the given array, beginning from last to first.
+/// The array cannot be zero-length.
+///
+/// \param begin - a type* denoting the first element of the array
+/// \param end - a type* denoting one past the end of the array
+/// \param elementType - the element type of the array
+/// \param destroyer - the function to call to destroy elements
+/// \param useEHCleanup - whether to push an EH cleanup to destroy
+///   the remaining elements in case the destruction of a single
+///   element throws
+void CIRGenFunction::buildArrayDestroy(mlir::Value begin, mlir::Value end,
+                                       QualType elementType,
+                                       CharUnits elementAlign,
+                                       Destroyer *destroyer,
+                                       bool checkZeroLength,
+                                       bool useEHCleanup) {
+  assert(!elementType->isArrayType());
+  if (checkZeroLength) {
+    llvm_unreachable("NYI");
+  }
+
+  // Differently from LLVM traditional codegen, use a higher level
+  // representation instead of lowering directly to a loop.
+  mlir::Type cirElementType = convertTypeForMem(elementType);
+  auto ptrToElmType = builder.getPointerTo(cirElementType);
+
+  // Emit the dtor call that will execute for every array element.
+  builder.create<mlir::cir::ArrayDtor>(
+      *currSrcLoc, begin, [&](mlir::OpBuilder &b, mlir::Location loc) {
+        auto arg = b.getInsertionBlock()->addArgument(ptrToElmType, loc);
+        Address curAddr = Address(arg, ptrToElmType, elementAlign);
+        if (useEHCleanup) {
+          pushRegularPartialArrayCleanup(arg, arg, elementType, elementAlign,
+                                         destroyer);
+        }
+
+        // Perform the actual destruction there.
+        destroyer(*this, curAddr, elementType);
+
+        if (useEHCleanup)
+          PopCleanupBlock();
+
+        builder.create<mlir::cir::YieldOp>(loc);
+      });
+}
+
+/// Immediately perform the destruction of the given object.
+///
+/// \param addr - the address of the object; a type*
+/// \param type - the type of the object; if an array type, all
+///   objects are destroyed in reverse order
+/// \param destroyer - the function to call to destroy individual
+///   elements
+/// \param useEHCleanupForArray - whether an EH cleanup should be
+///   used when destroying array elements, in case one of the
+///   destructions throws an exception
+void CIRGenFunction::emitDestroy(Address addr, QualType type,
+                                 Destroyer *destroyer,
+                                 bool useEHCleanupForArray) {
+  const ArrayType *arrayType = getContext().getAsArrayType(type);
+  if (!arrayType)
+    return destroyer(*this, addr, type);
+
+  auto length = buildArrayLength(arrayType, type, addr);
+
+  CharUnits elementAlign = addr.getAlignment().alignmentOfArrayElement(
+      getContext().getTypeSizeInChars(type));
+
+  // Normally we have to check whether the array is zero-length.
+  bool checkZeroLength = true;
+
+  // But if the array length is constant, we can suppress that.
+  auto constantCount = dyn_cast<mlir::cir::ConstantOp>(length.getDefiningOp());
+  if (constantCount) {
+    auto constIntAttr =
+        mlir::dyn_cast<mlir::cir::IntAttr>(constantCount.getValue());
+    // ...and if it's constant zero, we can just skip the entire thing.
+    if (constIntAttr && constIntAttr.getUInt() == 0)
+      return;
+    checkZeroLength = false;
+  } else {
+    llvm_unreachable("NYI");
+  }
+
+  auto begin = addr.getPointer();
+  mlir::Value end; // Use this for future non-constant counts.
+  buildArrayDestroy(begin, end, type, elementAlign, destroyer, checkZeroLength,
+                    useEHCleanupForArray);
+  if (constantCount.use_empty())
+    constantCount.erase();
+}
+
+CIRGenFunction::Destroyer *
+CIRGenFunction::getDestroyer(QualType::DestructionKind kind) {
+  switch (kind) {
+  case QualType::DK_none:
+    llvm_unreachable("no destroyer for trivial dtor");
+  case QualType::DK_cxx_destructor:
+    return destroyCXXObject;
+  case QualType::DK_objc_strong_lifetime:
+  case QualType::DK_objc_weak_lifetime:
+  case QualType::DK_nontrivial_c_struct:
+    llvm_unreachable("NYI");
+  }
+  llvm_unreachable("Unknown DestructionKind");
+}
+
+void CIRGenFunction::pushStackRestore(CleanupKind Kind, Address SPMem) {
+  EHStack.pushCleanup<CallStackRestore>(Kind, SPMem);
+}
+
+/// Enter a destroy cleanup for the given local variable.
+void CIRGenFunction::buildAutoVarTypeCleanup(
+    const CIRGenFunction::AutoVarEmission &emission,
+    QualType::DestructionKind dtorKind) {
+  assert(dtorKind != QualType::DK_none);
+
+  // Note that for __block variables, we want to destroy the
+  // original stack object, not the possibly forwarded object.
+  Address addr = emission.getObjectAddress(*this);
+
+  const VarDecl *var = emission.Variable;
+  QualType type = var->getType();
+
+  CleanupKind cleanupKind = NormalAndEHCleanup;
+  CIRGenFunction::Destroyer *destroyer = nullptr;
+
+  switch (dtorKind) {
+  case QualType::DK_none:
+    llvm_unreachable("no cleanup for trivially-destructible variable");
+
+  case QualType::DK_cxx_destructor:
+    // If there's an NRVO flag on the emission, we need a different
+    // cleanup.
+    if (emission.NRVOFlag) {
+      assert(!type->isArrayType());
+      CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor();
+      EHStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, type, dtor,
+                                                  emission.NRVOFlag);
+      return;
+    }
+    break;
+
+  case QualType::DK_objc_strong_lifetime:
+    llvm_unreachable("NYI");
+    break;
+
+  case QualType::DK_objc_weak_lifetime:
+    break;
+
+  case QualType::DK_nontrivial_c_struct:
+    llvm_unreachable("NYI");
+  }
+
+  // If we haven't chosen a more specific destroyer, use the default.
+  if (!destroyer)
+    destroyer = getDestroyer(dtorKind);
+
+  // Use an EH cleanup in array destructors iff the destructor itself
+  // is being pushed as an EH cleanup.
+  bool useEHCleanup = (cleanupKind & EHCleanup);
+  EHStack.pushCleanup<DestroyObject>(cleanupKind, addr, type, destroyer,
+                                     useEHCleanup);
+}
+
+/// Push the standard destructor for the given type as an EH-only cleanup.
+void CIRGenFunction::pushEHDestroy(QualType::DestructionKind dtorKind,
+                                   Address addr, QualType type) {
+  assert(dtorKind && "cannot push destructor for trivial type");
+  assert(needsEHCleanup(dtorKind));
+
+  pushDestroy(EHCleanup, addr, type, getDestroyer(dtorKind), true);
+}
+
+// Pushes a destroy and defers its deactivation until its
+// CleanupDeactivationScope is exited.
+void CIRGenFunction::pushDestroyAndDeferDeactivation(
+    QualType::DestructionKind dtorKind, Address addr, QualType type) {
+  assert(dtorKind && "cannot push destructor for trivial type");
+
+  CleanupKind cleanupKind = getCleanupKind(dtorKind);
+  pushDestroyAndDeferDeactivation(
+      cleanupKind, addr, type, getDestroyer(dtorKind), cleanupKind & EHCleanup);
+}
+
+void CIRGenFunction::pushDestroyAndDeferDeactivation(
+    CleanupKind cleanupKind, Address addr, QualType type, Destroyer *destroyer,
+    bool useEHCleanupForArray) {
+  mlir::Operation *flag =
+      builder.create<mlir::cir::UnreachableOp>(builder.getUnknownLoc());
+  pushDestroy(cleanupKind, addr, type, destroyer, useEHCleanupForArray);
+  DeferredDeactivationCleanupStack.push_back({EHStack.stable_begin(), flag});
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclCXX.cpp
new file mode 100644
index 000000000000..682eddbe9581
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenDeclCXX.cpp
@@ -0,0 +1,98 @@
+//===--- CIRGenDeclCXX.cpp - Build CIR Code for C++ declarations ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code dealing with code generation of C++ declarations
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "TargetInfo.h"
+#include "clang/AST/Attr.h"
+#include "clang/Basic/LangOptions.h"
+
+using namespace clang;
+using namespace mlir::cir;
+using namespace cir;
+
+void CIRGenModule::buildCXXGlobalInitFunc() {
+  while (!CXXGlobalInits.empty() && !CXXGlobalInits.back())
+    CXXGlobalInits.pop_back();
+
+  if (CXXGlobalInits.empty()) // TODO(cir): &&
+                              // PrioritizedCXXGlobalInits.empty())
+    return;
+
+  assert(0 && "NYE");
+}
+
+void CIRGenModule::buildCXXGlobalVarDeclInitFunc(const VarDecl *D,
+                                                 mlir::cir::GlobalOp Addr,
+                                                 bool PerformInit) {
+  // According to E.2.3.1 in CUDA-7.5 Programming guide: __device__,
+  // __constant__ and __shared__ variables defined in namespace scope,
+  // that are of class type, cannot have a non-empty constructor. All
+  // the checks have been done in Sema by now. Whatever initializers
+  // are allowed are empty and we just need to ignore them here.
+  if (getLangOpts().CUDAIsDevice && !getLangOpts().GPUAllowDeviceInit &&
+      (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
+       D->hasAttr<CUDASharedAttr>()))
+    return;
+
+  assert(!getLangOpts().OpenMP && "OpenMP global var init not implemented");
+
+  // Check if we've already initialized this decl.
+  auto I = DelayedCXXInitPosition.find(D);
+  if (I != DelayedCXXInitPosition.end() && I->second == ~0U)
+    return;
+
+  buildCXXGlobalVarDeclInit(D, Addr, PerformInit);
+}
+
+void CIRGenModule::buildCXXGlobalVarDeclInit(const VarDecl *D,
+                                             mlir::cir::GlobalOp Addr,
+                                             bool PerformInit) {
+  QualType T = D->getType();
+
+  // TODO: handle address space
+  // The address space of a static local variable (DeclPtr) may be different
+  // from the address space of the "this" argument of the constructor. In that
+  // case, we need an addrspacecast before calling the constructor.
+  //
+  // struct StructWithCtor {
+  //   __device__ StructWithCtor() {...}
+  // };
+  // __device__ void foo() {
+  //   __shared__ StructWithCtor s;
+  //   ...
+  // }
+  //
+  // For example, in the above CUDA code, the static local variable s has a
+  // "shared" address space qualifier, but the constructor of StructWithCtor
+  // expects "this" in the "generic" address space.
+  assert(!MissingFeatures::addressSpace());
+
+  if (!T->isReferenceType()) {
+    if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd &&
+        D->hasAttr<OMPThreadPrivateDeclAttr>()) {
+      llvm_unreachable("NYI");
+    }
+    bool NeedsDtor =
+        D->needsDestruction(getASTContext()) == QualType::DK_cxx_destructor;
+    // PerformInit, constant store invariant / destroy handled below.
+    bool isCstStorage =
+        D->getType().isConstantStorage(getASTContext(), true, !NeedsDtor);
+    codegenGlobalInitCxxStructor(D, Addr, PerformInit, NeedsDtor, isCstStorage);
+    return;
+  }
+
+  assert(PerformInit && "cannot have constant initializer which needs "
+                        "destruction for reference");
+  // TODO(cir): buildReferenceBindingToExpr
+  llvm_unreachable("NYI");
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenException.cpp b/clang/lib/CIR/CodeGen/CIRGenException.cpp
new file mode 100644
index 000000000000..d7cea55dd462
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenException.cpp
@@ -0,0 +1,873 @@
+//===--- CIRGenException.cpp - Emit CIR Code for C++ exceptions -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code dealing with C++ exception related code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenCXXABI.h"
+#include "CIRGenCleanup.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+
+#include "clang/AST/StmtVisitor.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
+
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Value.h"
+#include "llvm/Support/SaveAndRestore.h"
+
+using namespace cir;
+using namespace clang;
+
+const EHPersonality EHPersonality::GNU_C = {"__gcc_personality_v0", nullptr};
+const EHPersonality EHPersonality::GNU_C_SJLJ = {"__gcc_personality_sj0",
+                                                 nullptr};
+const EHPersonality EHPersonality::GNU_C_SEH = {"__gcc_personality_seh0",
+                                                nullptr};
+const EHPersonality EHPersonality::NeXT_ObjC = {"__objc_personality_v0",
+                                                nullptr};
+const EHPersonality EHPersonality::GNU_CPlusPlus = {"__gxx_personality_v0",
+                                                    nullptr};
+const EHPersonality EHPersonality::GNU_CPlusPlus_SJLJ = {
+    "__gxx_personality_sj0", nullptr};
+const EHPersonality EHPersonality::GNU_CPlusPlus_SEH = {
+    "__gxx_personality_seh0", nullptr};
+const EHPersonality EHPersonality::GNU_ObjC = {"__gnu_objc_personality_v0",
+                                               "objc_exception_throw"};
+const EHPersonality EHPersonality::GNU_ObjC_SJLJ = {
+    "__gnu_objc_personality_sj0", "objc_exception_throw"};
+const EHPersonality EHPersonality::GNU_ObjC_SEH = {
+    "__gnu_objc_personality_seh0", "objc_exception_throw"};
+const EHPersonality EHPersonality::GNU_ObjCXX = {
+    "__gnustep_objcxx_personality_v0", nullptr};
+const EHPersonality EHPersonality::GNUstep_ObjC = {
+    "__gnustep_objc_personality_v0", nullptr};
+const EHPersonality EHPersonality::MSVC_except_handler = {"_except_handler3",
+                                                          nullptr};
+const EHPersonality EHPersonality::MSVC_C_specific_handler = {
+    "__C_specific_handler", nullptr};
+const EHPersonality EHPersonality::MSVC_CxxFrameHandler3 = {
+    "__CxxFrameHandler3", nullptr};
+const EHPersonality EHPersonality::GNU_Wasm_CPlusPlus = {
+    "__gxx_wasm_personality_v0", nullptr};
+const EHPersonality EHPersonality::XL_CPlusPlus = {"__xlcxx_personality_v1",
+                                                   nullptr};
+
+static const EHPersonality &getCPersonality(const TargetInfo &Target,
+                                            const LangOptions &L) {
+  const llvm::Triple &T = Target.getTriple();
+  if (T.isWindowsMSVCEnvironment())
+    return EHPersonality::MSVC_CxxFrameHandler3;
+  if (L.hasSjLjExceptions())
+    return EHPersonality::GNU_C_SJLJ;
+  if (L.hasDWARFExceptions())
+    return EHPersonality::GNU_C;
+  if (L.hasSEHExceptions())
+    return EHPersonality::GNU_C_SEH;
+  return EHPersonality::GNU_C;
+}
+
+static const EHPersonality &getObjCPersonality(const TargetInfo &Target,
+                                               const LangOptions &L) {
+  const llvm::Triple &T = Target.getTriple();
+  if (T.isWindowsMSVCEnvironment())
+    return EHPersonality::MSVC_CxxFrameHandler3;
+
+  switch (L.ObjCRuntime.getKind()) {
+  case ObjCRuntime::FragileMacOSX:
+    return getCPersonality(Target, L);
+  case ObjCRuntime::MacOSX:
+  case ObjCRuntime::iOS:
+  case ObjCRuntime::WatchOS:
+    return EHPersonality::NeXT_ObjC;
+  case ObjCRuntime::GNUstep:
+    if (L.ObjCRuntime.getVersion() >= VersionTuple(1, 7))
+      return EHPersonality::GNUstep_ObjC;
+    [[fallthrough]];
+  case ObjCRuntime::GCC:
+  case ObjCRuntime::ObjFW:
+    if (L.hasSjLjExceptions())
+      return EHPersonality::GNU_ObjC_SJLJ;
+    if (L.hasSEHExceptions())
+      return EHPersonality::GNU_ObjC_SEH;
+    return EHPersonality::GNU_ObjC;
+  }
+  llvm_unreachable("bad runtime kind");
+}
+
+static const EHPersonality &getCXXPersonality(const TargetInfo &Target,
+                                              const LangOptions &L) {
+  const llvm::Triple &T = Target.getTriple();
+  if (T.isWindowsMSVCEnvironment())
+    return EHPersonality::MSVC_CxxFrameHandler3;
+  if (T.isOSAIX())
+    return EHPersonality::XL_CPlusPlus;
+  if (L.hasSjLjExceptions())
+    return EHPersonality::GNU_CPlusPlus_SJLJ;
+  if (L.hasDWARFExceptions())
+    return EHPersonality::GNU_CPlusPlus;
+  if (L.hasSEHExceptions())
+    return EHPersonality::GNU_CPlusPlus_SEH;
+  if (L.hasWasmExceptions())
+    return EHPersonality::GNU_Wasm_CPlusPlus;
+  return EHPersonality::GNU_CPlusPlus;
+}
+
+/// Determines the personality function to use when both C++
+/// and Objective-C exceptions are being caught.
+static const EHPersonality &getObjCXXPersonality(const TargetInfo &Target,
+                                                 const LangOptions &L) {
+  if (Target.getTriple().isWindowsMSVCEnvironment())
+    return EHPersonality::MSVC_CxxFrameHandler3;
+
+  switch (L.ObjCRuntime.getKind()) {
+  // In the fragile ABI, just use C++ exception handling and hope
+  // they're not doing crazy exception mixing.
+  case ObjCRuntime::FragileMacOSX:
+    return getCXXPersonality(Target, L);
+
+  // The ObjC personality defers to the C++ personality for non-ObjC
+  // handlers.  Unlike the C++ case, we use the same personality
+  // function on targets using (backend-driven) SJLJ EH.
+  case ObjCRuntime::MacOSX:
+  case ObjCRuntime::iOS:
+  case ObjCRuntime::WatchOS:
+    return getObjCPersonality(Target, L);
+
+  case ObjCRuntime::GNUstep:
+    return EHPersonality::GNU_ObjCXX;
+
+  // The GCC runtime's personality function inherently doesn't support
+  // mixed EH.  Use the ObjC personality just to avoid returning null.
+  case ObjCRuntime::GCC:
+  case ObjCRuntime::ObjFW:
+    return getObjCPersonality(Target, L);
+  }
+  llvm_unreachable("bad runtime kind");
+}
+
+static const EHPersonality &getSEHPersonalityMSVC(const llvm::Triple &T) {
+  if (T.getArch() == llvm::Triple::x86)
+    return EHPersonality::MSVC_except_handler;
+  return EHPersonality::MSVC_C_specific_handler;
+}
+
+const EHPersonality &EHPersonality::get(CIRGenModule &CGM,
+                                        const FunctionDecl *FD) {
+  const llvm::Triple &T = CGM.getTarget().getTriple();
+  const LangOptions &L = CGM.getLangOpts();
+  const TargetInfo &Target = CGM.getTarget();
+
+  // Functions using SEH get an SEH personality.
+  if (FD && FD->usesSEHTry())
+    return getSEHPersonalityMSVC(T);
+
+  if (L.ObjC)
+    return L.CPlusPlus ? getObjCXXPersonality(Target, L)
+                       : getObjCPersonality(Target, L);
+  return L.CPlusPlus ? getCXXPersonality(Target, L)
+                     : getCPersonality(Target, L);
+}
+
+const EHPersonality &EHPersonality::get(CIRGenFunction &CGF) {
+  const auto *FD = CGF.CurCodeDecl;
+  // For outlined finallys and filters, use the SEH personality in case they
+  // contain more SEH. This mostly only affects finallys. Filters could
+  // hypothetically use gnu statement expressions to sneak in nested SEH.
+  FD = FD ? FD : CGF.CurSEHParent.getDecl();
+  return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(FD));
+}
+
+void CIRGenFunction::buildCXXThrowExpr(const CXXThrowExpr *E) {
+  if (const Expr *SubExpr = E->getSubExpr()) {
+    QualType ThrowType = SubExpr->getType();
+    if (ThrowType->isObjCObjectPointerType()) {
+      llvm_unreachable("NYI");
+    } else {
+      CGM.getCXXABI().buildThrow(*this, E);
+    }
+  } else {
+    CGM.getCXXABI().buildRethrow(*this, /*isNoReturn=*/true);
+  }
+
+  // In LLVM codegen the expression emitters expect to leave this
+  // path by starting a new basic block. We do not need that in CIR.
+}
+
+namespace {
+/// A cleanup to free the exception object if its initialization
+/// throws.
+struct FreeException final : EHScopeStack::Cleanup {
+  mlir::Value exn;
+  FreeException(mlir::Value exn) : exn(exn) {}
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    llvm_unreachable("call to cxa_free or equivalent op NYI");
+  }
+};
+} // end anonymous namespace
+
+// Emits an exception expression into the given location.  This
+// differs from buildAnyExprToMem only in that, if a final copy-ctor
+// call is required, an exception within that copy ctor causes
+// std::terminate to be invoked.
+void CIRGenFunction::buildAnyExprToExn(const Expr *e, Address addr) {
+  // Make sure the exception object is cleaned up if there's an
+  // exception during initialization.
+  pushFullExprCleanup<FreeException>(EHCleanup, addr.getPointer());
+  EHScopeStack::stable_iterator cleanup = EHStack.stable_begin();
+
+  // __cxa_allocate_exception returns a void*;  we need to cast this
+  // to the appropriate type for the object.
+  auto ty = convertTypeForMem(e->getType());
+  Address typedAddr = addr.withElementType(ty);
+
+  // From LLVM's codegen:
+  // FIXME: this isn't quite right!  If there's a final unelided call
+  // to a copy constructor, then according to [except.terminate]p1 we
+  // must call std::terminate() if that constructor throws, because
+  // technically that copy occurs after the exception expression is
+  // evaluated but before the exception is caught.  But the best way
+  // to handle that is to teach EmitAggExpr to do the final copy
+  // differently if it can't be elided.
+  buildAnyExprToMem(e, typedAddr, e->getType().getQualifiers(),
+                    /*IsInit*/ true);
+
+  // Deactivate the cleanup block.
+  auto op = typedAddr.getPointer().getDefiningOp();
+  assert(op &&
+         "expected valid Operation *, block arguments are not meaningful here");
+  DeactivateCleanupBlock(cleanup, op);
+}
+
+static mlir::Block *getResumeBlockFromCatch(mlir::cir::TryOp &tryOp,
+                                            mlir::cir::GlobalOp globalParent) {
+  assert(tryOp && "cir.try expected");
+  unsigned numCatchRegions = tryOp.getCatchRegions().size();
+  assert(numCatchRegions && "expected at least one region");
+  auto &fallbackRegion = tryOp.getCatchRegions()[numCatchRegions - 1];
+  return &fallbackRegion.getBlocks().back();
+  return nullptr;
+}
+
+mlir::Block *CIRGenFunction::getEHResumeBlock(bool isCleanup,
+                                              mlir::cir::TryOp tryOp) {
+
+  if (ehResumeBlock)
+    return ehResumeBlock;
+  // Just like some other try/catch related logic: return the basic block
+  // pointer but only use it to denote we're tracking things, but there
+  // shouldn't be any changes to that block after work done in this function.
+  ehResumeBlock = getResumeBlockFromCatch(tryOp, CGM.globalOpContext);
+  if (!ehResumeBlock->empty())
+    return ehResumeBlock;
+
+  auto ip = getBuilder().saveInsertionPoint();
+  getBuilder().setInsertionPointToStart(ehResumeBlock);
+
+  const EHPersonality &Personality = EHPersonality::get(*this);
+
+  // This can always be a call
+  // because we necessarily didn't
+  // find anything on the EH stack
+  // which needs our help.
+  const char *RethrowName = Personality.CatchallRethrowFn;
+  if (RethrowName != nullptr && !isCleanup) {
+    // FIXME(cir): upon testcase
+    // this should just add the
+    // 'rethrow' attribute to
+    // mlir::cir::ResumeOp below.
+    llvm_unreachable("NYI");
+  }
+
+  getBuilder().create<mlir::cir::ResumeOp>(tryOp.getLoc(), mlir::Value{},
+                                           mlir::Value{});
+  getBuilder().restoreInsertionPoint(ip);
+  return ehResumeBlock;
+}
+
+mlir::LogicalResult CIRGenFunction::buildCXXTryStmt(const CXXTryStmt &S) {
+  auto loc = getLoc(S.getSourceRange());
+  mlir::OpBuilder::InsertPoint scopeIP;
+
+  // Create a scope to hold try local storage for catch params.
+  [[maybe_unused]] auto s = builder.create<mlir::cir::ScopeOp>(
+      loc, /*scopeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        scopeIP = getBuilder().saveInsertionPoint();
+      });
+
+  auto r = mlir::success();
+  {
+    mlir::OpBuilder::InsertionGuard guard(getBuilder());
+    getBuilder().restoreInsertionPoint(scopeIP);
+    r = buildCXXTryStmtUnderScope(S);
+    getBuilder().create<mlir::cir::YieldOp>(loc);
+  }
+  return r;
+}
+
+mlir::LogicalResult
+CIRGenFunction::buildCXXTryStmtUnderScope(const CXXTryStmt &S) {
+  const llvm::Triple &T = getTarget().getTriple();
+  // If we encounter a try statement on in an OpenMP target region offloaded to
+  // a GPU, we treat it as a basic block.
+  const bool IsTargetDevice =
+      (CGM.getLangOpts().OpenMPIsTargetDevice && (T.isNVPTX() || T.isAMDGCN()));
+  assert(!IsTargetDevice && "NYI");
+
+  auto hasCatchAll = [&]() {
+    if (!S.getNumHandlers())
+      return false;
+    unsigned lastHandler = S.getNumHandlers() - 1;
+    if (!S.getHandler(lastHandler)->getExceptionDecl())
+      return true;
+    return false;
+  };
+
+  auto numHandlers = S.getNumHandlers();
+  auto tryLoc = getLoc(S.getBeginLoc());
+
+  mlir::OpBuilder::InsertPoint beginInsertTryBody;
+
+  // Create the scope to represent only the C/C++ `try {}` part. However,
+  // don't populate right away. Reserve some space to store the exception
+  // info but don't emit the bulk right away, for now only make sure the
+  // scope returns the exception information.
+  auto tryOp = builder.create<mlir::cir::TryOp>(
+      tryLoc, /*scopeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        beginInsertTryBody = getBuilder().saveInsertionPoint();
+      },
+      // Don't emit the code right away for catch clauses, for
+      // now create the regions and consume the try scope result.
+      // Note that clauses are later populated in
+      // CIRGenFunction::buildLandingPad.
+      [&](mlir::OpBuilder &b, mlir::Location loc,
+          mlir::OperationState &result) {
+        mlir::OpBuilder::InsertionGuard guard(b);
+        auto numRegionsToCreate = numHandlers;
+        if (!hasCatchAll())
+          numRegionsToCreate++;
+        // Once for each handler + (catch_all or unwind).
+        for (int i = 0, e = numRegionsToCreate; i != e; ++i) {
+          auto *r = result.addRegion();
+          builder.createBlock(r);
+        }
+      });
+
+  // Finally emit the body for try/catch.
+  auto emitTryCatchBody = [&]() -> mlir::LogicalResult {
+    auto loc = tryOp.getLoc();
+    mlir::OpBuilder::InsertionGuard guard(getBuilder());
+    getBuilder().restoreInsertionPoint(beginInsertTryBody);
+    CIRGenFunction::LexicalScope tryScope{*this, loc,
+                                          getBuilder().getInsertionBlock()};
+
+    {
+      tryScope.setAsTry(tryOp);
+      // Attach the basic blocks for the catch regions.
+      enterCXXTryStmt(S, tryOp);
+      // Emit the body for the `try {}` part.
+      {
+        CIRGenFunction::LexicalScope tryBodyScope{
+            *this, loc, getBuilder().getInsertionBlock()};
+        if (buildStmt(S.getTryBlock(), /*useCurrentScope=*/true).failed())
+          return mlir::failure();
+      }
+    }
+
+    {
+      // Emit catch clauses.
+      exitCXXTryStmt(S);
+    }
+
+    return mlir::success();
+  };
+
+  return emitTryCatchBody();
+}
+
+/// Emit the structure of the dispatch block for the given catch scope.
+/// It is an invariant that the dispatch block already exists.
+static void buildCatchDispatchBlock(CIRGenFunction &CGF,
+                                    EHCatchScope &catchScope,
+                                    mlir::cir::TryOp tryOp) {
+  if (EHPersonality::get(CGF).isWasmPersonality())
+    llvm_unreachable("NYI");
+  if (EHPersonality::get(CGF).usesFuncletPads())
+    llvm_unreachable("NYI");
+
+  auto *dispatchBlock = catchScope.getCachedEHDispatchBlock();
+  assert(dispatchBlock);
+
+  // If there's only a single catch-all, getEHDispatchBlock returned
+  // that catch-all as the dispatch block.
+  if (catchScope.getNumHandlers() == 1 &&
+      catchScope.getHandler(0).isCatchAll()) {
+    // assert(dispatchBlock == catchScope.getHandler(0).Block);
+    return;
+  }
+
+  // In traditional LLVM codegen, the right handler is selected (with
+  // calls to eh_typeid_for) and the selector value is loaded. After that,
+  // blocks get connected for later codegen. In CIR, these are all
+  // implicit behaviors of cir.catch - not a lot of work to do.
+  //
+  // Test against each of the exception types we claim to catch.
+  for (unsigned i = 0, e = catchScope.getNumHandlers();; ++i) {
+    assert(i < e && "ran off end of handlers!");
+    const EHCatchScope::Handler &handler = catchScope.getHandler(i);
+
+    auto typeValue = handler.Type.RTTI;
+    assert(handler.Type.Flags == 0 && "catch handler flags not supported");
+    assert(typeValue && "fell into catch-all case!");
+    // Check for address space mismatch: if (typeValue->getType() !=
+    // argTy)
+    assert(!MissingFeatures::addressSpace());
+
+    bool nextIsEnd = false;
+    // If this is the last handler, we're at the end, and the next
+    // block is the block for the enclosing EH scope. Make sure to call
+    // getEHDispatchBlock for caching it.
+    if (i + 1 == e) {
+      (void)CGF.getEHDispatchBlock(catchScope.getEnclosingEHScope(), tryOp);
+      nextIsEnd = true;
+
+      // If the next handler is a catch-all, we're at the end, and the
+      // next block is that handler.
+    } else if (catchScope.getHandler(i + 1).isCatchAll()) {
+      // Block already created when creating catch regions, just mark this
+      // is the end.
+      nextIsEnd = true;
+    }
+
+    // If the next handler is a catch-all, we're completely done.
+    if (nextIsEnd)
+      return;
+  }
+}
+
+void CIRGenFunction::enterCXXTryStmt(const CXXTryStmt &S,
+                                     mlir::cir::TryOp tryOp,
+                                     bool IsFnTryBlock) {
+  unsigned NumHandlers = S.getNumHandlers();
+  EHCatchScope *CatchScope = EHStack.pushCatch(NumHandlers);
+  for (unsigned I = 0; I != NumHandlers; ++I) {
+    const CXXCatchStmt *C = S.getHandler(I);
+
+    mlir::Block *Handler = &tryOp.getCatchRegions()[I].getBlocks().front();
+    if (C->getExceptionDecl()) {
+      // FIXME: Dropping the reference type on the type into makes it
+      // impossible to correctly implement catch-by-reference
+      // semantics for pointers.  Unfortunately, this is what all
+      // existing compilers do, and it's not clear that the standard
+      // personality routine is capable of doing this right.  See C++ DR 388 :
+      // http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#388
+      Qualifiers CaughtTypeQuals;
+      QualType CaughtType = CGM.getASTContext().getUnqualifiedArrayType(
+          C->getCaughtType().getNonReferenceType(), CaughtTypeQuals);
+
+      CatchTypeInfo TypeInfo{nullptr, 0};
+      if (CaughtType->isObjCObjectPointerType())
+        llvm_unreachable("NYI");
+      else
+        TypeInfo = CGM.getCXXABI().getAddrOfCXXCatchHandlerType(
+            getLoc(S.getSourceRange()), CaughtType, C->getCaughtType());
+      CatchScope->setHandler(I, TypeInfo, Handler);
+    } else {
+      // No exception decl indicates '...', a catch-all.
+      CatchScope->setHandler(I, CGM.getCXXABI().getCatchAllTypeInfo(), Handler);
+      // Under async exceptions, catch(...) need to catch HW exception too
+      // Mark scope with SehTryBegin as a SEH __try scope
+      if (getLangOpts().EHAsynch)
+        llvm_unreachable("NYI");
+    }
+  }
+}
+
+void CIRGenFunction::exitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
+  unsigned NumHandlers = S.getNumHandlers();
+  EHCatchScope &CatchScope = cast<EHCatchScope>(*EHStack.begin());
+  assert(CatchScope.getNumHandlers() == NumHandlers);
+  mlir::cir::TryOp tryOp = currLexScope->getTry();
+
+  // If the catch was not required, bail out now.
+  if (!CatchScope.hasEHBranches()) {
+    CatchScope.clearHandlerBlocks();
+    EHStack.popCatch();
+    // Drop all basic block from all catch regions.
+    SmallVector<mlir::Block *> eraseBlocks;
+    for (mlir::Region &r : tryOp.getCatchRegions()) {
+      if (r.empty())
+        continue;
+      for (mlir::Block &b : r.getBlocks())
+        eraseBlocks.push_back(&b);
+    }
+    for (mlir::Block *b : eraseBlocks)
+      b->erase();
+    tryOp.setCatchTypesAttr({});
+    return;
+  }
+
+  // Emit the structure of the EH dispatch for this catch.
+  buildCatchDispatchBlock(*this, CatchScope, tryOp);
+
+  // Copy the handler blocks off before we pop the EH stack.  Emitting
+  // the handlers might scribble on this memory.
+  SmallVector<EHCatchScope::Handler, 8> Handlers(
+      CatchScope.begin(), CatchScope.begin() + NumHandlers);
+
+  EHStack.popCatch();
+
+  // Determine if we need an implicit rethrow for all these catch handlers;
+  // see the comment below.
+  bool doImplicitRethrow = false;
+  if (IsFnTryBlock)
+    doImplicitRethrow = isa<CXXDestructorDecl>(CurCodeDecl) ||
+                        isa<CXXConstructorDecl>(CurCodeDecl);
+
+  // Wasm uses Windows-style EH instructions, but merges all catch clauses into
+  // one big catchpad. So we save the old funclet pad here before we traverse
+  // each catch handler.
+  SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad);
+  mlir::Block *WasmCatchStartBlock = nullptr;
+  if (EHPersonality::get(*this).isWasmPersonality()) {
+    llvm_unreachable("NYI");
+  }
+
+  bool HasCatchAll = false;
+  for (unsigned I = NumHandlers; I != 0; --I) {
+    HasCatchAll |= Handlers[I - 1].isCatchAll();
+    mlir::Block *CatchBlock = Handlers[I - 1].Block;
+    mlir::OpBuilder::InsertionGuard guard(getBuilder());
+    getBuilder().setInsertionPointToStart(CatchBlock);
+
+    // Catch the exception if this isn't a catch-all.
+    const CXXCatchStmt *C = S.getHandler(I - 1);
+
+    // Enter a cleanup scope, including the catch variable and the
+    // end-catch.
+    RunCleanupsScope CatchScope(*this);
+
+    // Initialize the catch variable and set up the cleanups.
+    SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad);
+    CGM.getCXXABI().emitBeginCatch(*this, C);
+
+    // Emit the PGO counter increment.
+    assert(!MissingFeatures::incrementProfileCounter());
+
+    // Perform the body of the catch.
+    (void)buildStmt(C->getHandlerBlock(), /*useCurrentScope=*/true);
+
+    // [except.handle]p11:
+    //   The currently handled exception is rethrown if control
+    //   reaches the end of a handler of the function-try-block of a
+    //   constructor or destructor.
+
+    // It is important that we only do this on fallthrough and not on
+    // return.  Note that it's illegal to put a return in a
+    // constructor function-try-block's catch handler (p14), so this
+    // really only applies to destructors.
+    if (doImplicitRethrow && HaveInsertPoint()) {
+      llvm_unreachable("NYI");
+    }
+
+    // Fall out through the catch cleanups.
+    CatchScope.ForceCleanup();
+  }
+
+  // Because in wasm we merge all catch clauses into one big catchpad, in case
+  // none of the types in catch handlers matches after we test against each   of
+  // them, we should unwind to the next EH enclosing scope. We generate a   call
+  // to rethrow function here to do that.
+  if (EHPersonality::get(*this).isWasmPersonality() && !HasCatchAll) {
+    assert(WasmCatchStartBlock);
+    // Navigate for the "rethrow" block we created in emitWasmCatchPadBlock().
+    // Wasm uses landingpad-style conditional branches to compare selectors, so
+    // we follow the false destination for each of the cond branches to reach
+    // the rethrow block.
+    llvm_unreachable("NYI");
+  }
+
+  assert(!MissingFeatures::incrementProfileCounter());
+}
+
+/// Check whether this is a non-EH scope, i.e. a scope which doesn't
+/// affect exception handling.  Currently, the only non-EH scopes are
+/// normal-only cleanup scopes.
+static bool isNonEHScope(const EHScope &S) {
+  switch (S.getKind()) {
+  case EHScope::Cleanup:
+    return !cast<EHCleanupScope>(S).isEHCleanup();
+  case EHScope::Filter:
+  case EHScope::Catch:
+  case EHScope::Terminate:
+    return false;
+  }
+
+  llvm_unreachable("Invalid EHScope Kind!");
+}
+
+mlir::Operation *CIRGenFunction::buildLandingPad(mlir::cir::TryOp tryOp) {
+  assert(EHStack.requiresLandingPad());
+  assert(!CGM.getLangOpts().IgnoreExceptions &&
+         "LandingPad should not be emitted when -fignore-exceptions are in "
+         "effect.");
+  EHScope &innermostEHScope = *EHStack.find(EHStack.getInnermostEHScope());
+  switch (innermostEHScope.getKind()) {
+  case EHScope::Terminate:
+    return getTerminateLandingPad();
+
+  case EHScope::Catch:
+  case EHScope::Cleanup:
+  case EHScope::Filter:
+    if (auto *lpad = innermostEHScope.getCachedLandingPad())
+      return lpad;
+  }
+
+  // If there's an existing TryOp, it means we got a `cir.try` scope
+  // that leads to this "landing pad" creation site. Otherwise, exceptions
+  // are enabled but a throwing function is called anyways (common pattern
+  // with function local static initializers).
+  {
+    // Save the current CIR generation state.
+    mlir::OpBuilder::InsertionGuard guard(builder);
+    assert(!MissingFeatures::generateDebugInfo() && "NYI");
+
+    // Traditional LLVM codegen creates the lpad basic block, extract
+    // values, landing pad instructions, etc.
+
+    // Accumulate all the handlers in scope.
+    bool hasCatchAll = false;
+    bool hasCleanup = false;
+    bool hasFilter = false;
+    SmallVector<mlir::Value, 4> filterTypes;
+    llvm::SmallPtrSet<mlir::Attribute, 4> catchTypes;
+    SmallVector<mlir::Attribute, 4> clauses;
+
+    for (EHScopeStack::iterator I = EHStack.begin(), E = EHStack.end(); I != E;
+         ++I) {
+
+      switch (I->getKind()) {
+      case EHScope::Cleanup:
+        // If we have a cleanup, remember that.
+        hasCleanup = (hasCleanup || cast<EHCleanupScope>(*I).isEHCleanup());
+        continue;
+
+      case EHScope::Filter: {
+        llvm_unreachable("NYI");
+      }
+
+      case EHScope::Terminate:
+        // Terminate scopes are basically catch-alls.
+        // assert(!hasCatchAll);
+        // hasCatchAll = true;
+        // goto done;
+        llvm_unreachable("NYI");
+
+      case EHScope::Catch:
+        break;
+      }
+
+      EHCatchScope &catchScope = cast<EHCatchScope>(*I);
+      for (unsigned hi = 0, he = catchScope.getNumHandlers(); hi != he; ++hi) {
+        EHCatchScope::Handler handler = catchScope.getHandler(hi);
+        assert(handler.Type.Flags == 0 &&
+               "landingpads do not support catch handler flags");
+
+        // If this is a catch-all, register that and abort.
+        if (!handler.Type.RTTI) {
+          assert(!hasCatchAll);
+          hasCatchAll = true;
+          goto done;
+        }
+
+        // Check whether we already have a handler for this type.
+        if (catchTypes.insert(handler.Type.RTTI).second) {
+          // If not, keep track to later add to catch op.
+          clauses.push_back(handler.Type.RTTI);
+        }
+      }
+    }
+
+  done:
+    // If we have a catch-all, add null to the landingpad.
+    assert(!(hasCatchAll && hasFilter));
+    if (hasCatchAll) {
+      // Attach the catch_all region. Can't coexist with an unwind one.
+      auto catchAll = mlir::cir::CatchAllAttr::get(builder.getContext());
+      clauses.push_back(catchAll);
+
+      // If we have an EH filter, we need to add those handlers in the
+      // right place in the landingpad, which is to say, at the end.
+    } else if (hasFilter) {
+      // Create a filter expression: a constant array indicating which filter
+      // types there are. The personality routine only lands here if the filter
+      // doesn't match.
+      llvm_unreachable("NYI");
+
+      // Otherwise, signal that we at least have cleanups.
+    } else if (hasCleanup) {
+      tryOp.setCleanup(true);
+    }
+
+    assert((clauses.size() > 0 || hasCleanup) && "no catch clauses!");
+
+    // If there's no catch_all, attach the unwind region. This needs to be the
+    // last region in the TryOp operation catch list.
+    if (!hasCatchAll) {
+      auto catchUnwind = mlir::cir::CatchUnwindAttr::get(builder.getContext());
+      clauses.push_back(catchUnwind);
+    }
+
+    // Add final array of clauses into TryOp.
+    tryOp.setCatchTypesAttr(
+        mlir::ArrayAttr::get(builder.getContext(), clauses));
+
+    // In traditional LLVM codegen. this tells the backend how to generate the
+    // landing pad by generating a branch to the dispatch block.
+    mlir::Block *dispatch =
+        getEHDispatchBlock(EHStack.getInnermostEHScope(), tryOp);
+    (void)dispatch;
+  }
+
+  return tryOp;
+}
+
+// Differently from LLVM traditional codegen, there are no dispatch blocks
+// to look at given cir.try_call does not jump to blocks like invoke does.
+// However, we keep this around since other parts of CIRGen use
+// getCachedEHDispatchBlock to infer state.
+mlir::Block *
+CIRGenFunction::getEHDispatchBlock(EHScopeStack::stable_iterator si,
+                                   mlir::cir::TryOp tryOp) {
+  if (EHPersonality::get(*this).usesFuncletPads())
+    llvm_unreachable("NYI");
+
+  // The dispatch block for the end of the scope chain is a block that
+  // just resumes unwinding.
+  if (si == EHStack.stable_end())
+    return getEHResumeBlock(true, tryOp);
+
+  // Otherwise, we should look at the actual scope.
+  EHScope &scope = *EHStack.find(si);
+
+  auto *dispatchBlock = scope.getCachedEHDispatchBlock();
+  if (!dispatchBlock) {
+    switch (scope.getKind()) {
+    case EHScope::Catch: {
+      // LLVM does some optimization with branches here, CIR just keep track of
+      // the corresponding calls.
+      assert(callWithExceptionCtx && "expected call information");
+      {
+        mlir::OpBuilder::InsertionGuard guard(getBuilder());
+        assert(callWithExceptionCtx.getCleanup().empty() &&
+               "one per call: expected empty region at this point");
+        dispatchBlock = builder.createBlock(&callWithExceptionCtx.getCleanup());
+        builder.createYield(callWithExceptionCtx.getLoc());
+      }
+      break;
+    }
+
+    case EHScope::Cleanup: {
+      assert(callWithExceptionCtx && "expected call information");
+      {
+        mlir::OpBuilder::InsertionGuard guard(getBuilder());
+        assert(callWithExceptionCtx.getCleanup().empty() &&
+               "one per call: expected empty region at this point");
+        dispatchBlock = builder.createBlock(&callWithExceptionCtx.getCleanup());
+        builder.createYield(callWithExceptionCtx.getLoc());
+      }
+      break;
+    }
+
+    case EHScope::Filter:
+      llvm_unreachable("NYI");
+      break;
+
+    case EHScope::Terminate:
+      llvm_unreachable("NYI");
+      break;
+    }
+    scope.setCachedEHDispatchBlock(dispatchBlock);
+  }
+  return dispatchBlock;
+}
+
+bool CIRGenFunction::isInvokeDest() {
+  if (!EHStack.requiresLandingPad())
+    return false;
+
+  // If exceptions are disabled/ignored and SEH is not in use, then there is no
+  // invoke destination. SEH "works" even if exceptions are off. In practice,
+  // this means that C++ destructors and other EH cleanups don't run, which is
+  // consistent with MSVC's behavior, except in the presence of -EHa
+  const LangOptions &LO = CGM.getLangOpts();
+  if (!LO.Exceptions || LO.IgnoreExceptions) {
+    if (!LO.Borland && !LO.MicrosoftExt)
+      return false;
+    if (!currentFunctionUsesSEHTry())
+      return false;
+  }
+
+  // CUDA device code doesn't have exceptions.
+  if (LO.CUDA && LO.CUDAIsDevice)
+    return false;
+
+  return true;
+}
+
+mlir::Operation *CIRGenFunction::getInvokeDestImpl(mlir::cir::TryOp tryOp) {
+  assert(EHStack.requiresLandingPad());
+  assert(!EHStack.empty());
+  assert(isInvokeDest());
+
+  // Check the innermost scope for a cached landing pad.  If this is
+  // a non-EH cleanup, we'll check enclosing scopes in EmitLandingPad.
+  auto *LP = EHStack.begin()->getCachedLandingPad();
+  if (LP)
+    return LP;
+
+  const EHPersonality &Personality = EHPersonality::get(*this);
+
+  // FIXME(cir): add personality function
+  // if (!CurFn->hasPersonalityFn())
+  //   CurFn->setPersonalityFn(getOpaquePersonalityFn(CGM, Personality));
+
+  if (Personality.usesFuncletPads()) {
+    // We don't need separate landing pads in the funclet model.
+    llvm::errs() << "PersonalityFn: " << Personality.PersonalityFn << "\n";
+    llvm_unreachable("NYI");
+  } else {
+    LP = buildLandingPad(tryOp);
+  }
+
+  assert(LP);
+
+  // Cache the landing pad on the innermost scope.  If this is a
+  // non-EH scope, cache the landing pad on the enclosing scope, too.
+  for (EHScopeStack::iterator ir = EHStack.begin(); true; ++ir) {
+    ir->setCachedLandingPad(LP);
+    if (!isNonEHScope(*ir))
+      break;
+  }
+
+  return LP;
+}
+
+mlir::Operation *CIRGenFunction::getTerminateLandingPad() {
+  llvm_unreachable("NYI");
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
new file mode 100644
index 000000000000..db2d82c00dfb
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -0,0 +1,3248 @@
+//===--- CIRGenExpr.cpp - Emit LLVM Code from Expressions -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit Expr nodes as CIR code.
+//
+//===----------------------------------------------------------------------===//
+#include "CIRGenCXXABI.h"
+#include "CIRGenCall.h"
+#include "CIRGenCstEmitter.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "CIRGenOpenMPRuntime.h"
+#include "CIRGenTBAA.h"
+#include "CIRGenValue.h"
+#include "EHScopeStack.h"
+#include "TargetInfo.h"
+
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/GlobalDecl.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include "llvm/ADT/StringExtras.h"
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/Value.h"
+
+using namespace cir;
+using namespace clang;
+using namespace mlir::cir;
+
+static mlir::cir::FuncOp buildFunctionDeclPointer(CIRGenModule &CGM,
+                                                  GlobalDecl GD) {
+  const auto *FD = cast<FunctionDecl>(GD.getDecl());
+
+  if (FD->hasAttr<WeakRefAttr>()) {
+    mlir::Operation *aliasee = CGM.getWeakRefReference(FD);
+    return dyn_cast<FuncOp>(aliasee);
+  }
+
+  auto V = CGM.GetAddrOfFunction(GD);
+
+  return V;
+}
+
+static Address buildPreserveStructAccess(CIRGenFunction &CGF, LValue base,
+                                         Address addr, const FieldDecl *field) {
+  llvm_unreachable("NYI");
+}
+
+/// Get the address of a zero-sized field within a record. The resulting address
+/// doesn't necessarily have the right type.
+static Address buildAddrOfFieldStorage(CIRGenFunction &CGF, Address Base,
+                                       const FieldDecl *field,
+                                       llvm::StringRef fieldName,
+                                       unsigned fieldIndex) {
+  if (field->isZeroSize(CGF.getContext()))
+    llvm_unreachable("NYI");
+
+  auto loc = CGF.getLoc(field->getLocation());
+
+  auto fieldType = CGF.convertType(field->getType());
+  auto fieldPtr =
+      mlir::cir::PointerType::get(CGF.getBuilder().getContext(), fieldType);
+  // For most cases fieldName is the same as field->getName() but for lambdas,
+  // which do not currently carry the name, so it can be passed down from the
+  // CaptureStmt.
+  auto memberAddr = CGF.getBuilder().createGetMember(
+      loc, fieldPtr, Base.getPointer(), fieldName, fieldIndex);
+
+  // Retrieve layout information, compute alignment and return the final
+  // address.
+  const RecordDecl *rec = field->getParent();
+  auto &layout = CGF.CGM.getTypes().getCIRGenRecordLayout(rec);
+  unsigned idx = layout.getCIRFieldNo(field);
+  auto offset = CharUnits::fromQuantity(layout.getCIRType().getElementOffset(
+      CGF.CGM.getDataLayout().layout, idx));
+  auto addr =
+      Address(memberAddr, Base.getAlignment().alignmentAtOffset(offset));
+  return addr;
+}
+
+static bool hasAnyVptr(const QualType Type, const ASTContext &Context) {
+  const auto *RD = Type.getTypePtr()->getAsCXXRecordDecl();
+  if (!RD)
+    return false;
+
+  if (RD->isDynamicClass())
+    return true;
+
+  for (const auto &Base : RD->bases())
+    if (hasAnyVptr(Base.getType(), Context))
+      return true;
+
+  for (const FieldDecl *Field : RD->fields())
+    if (hasAnyVptr(Field->getType(), Context))
+      return true;
+
+  return false;
+}
+
+static Address buildPointerWithAlignment(const Expr *expr,
+                                         LValueBaseInfo *baseInfo,
+                                         TBAAAccessInfo *tbaaInfo,
+                                         KnownNonNull_t isKnownNonNull,
+                                         CIRGenFunction &cgf) {
+  // We allow this with ObjC object pointers because of fragile ABIs.
+  assert(expr->getType()->isPointerType() ||
+         expr->getType()->isObjCObjectPointerType());
+  expr = expr->IgnoreParens();
+
+  // Casts:
+  if (const CastExpr *CE = dyn_cast<CastExpr>(expr)) {
+    if (const auto *ECE = dyn_cast<ExplicitCastExpr>(CE))
+      cgf.CGM.buildExplicitCastExprType(ECE, &cgf);
+
+    switch (CE->getCastKind()) {
+    default: {
+      llvm::errs() << CE->getCastKindName() << "\n";
+      assert(0 && "not implemented");
+    }
+    // Non-converting casts (but not C's implicit conversion from void*).
+    case CK_BitCast:
+    case CK_NoOp:
+    case CK_AddressSpaceConversion:
+      if (auto PtrTy =
+              CE->getSubExpr()->getType()->getAs<clang::PointerType>()) {
+        if (PtrTy->getPointeeType()->isVoidType())
+          break;
+        assert(!MissingFeatures::tbaa());
+
+        LValueBaseInfo innerBaseInfo;
+        Address addr = cgf.buildPointerWithAlignment(
+            CE->getSubExpr(), &innerBaseInfo, tbaaInfo, isKnownNonNull);
+        if (baseInfo)
+          *baseInfo = innerBaseInfo;
+
+        if (isa<ExplicitCastExpr>(CE)) {
+          assert(!MissingFeatures::tbaa());
+          LValueBaseInfo TargetTypeBaseInfo;
+
+          CharUnits Align = cgf.CGM.getNaturalPointeeTypeAlignment(
+              expr->getType(), &TargetTypeBaseInfo);
+
+          // If the source l-value is opaque, honor the alignment of the
+          // casted-to type.
+          if (innerBaseInfo.getAlignmentSource() != AlignmentSource::Decl) {
+            if (baseInfo)
+              baseInfo->mergeForCast(TargetTypeBaseInfo);
+            addr = Address(addr.getPointer(), addr.getElementType(), Align,
+                           isKnownNonNull);
+          }
+        }
+
+        if (cgf.SanOpts.has(SanitizerKind::CFIUnrelatedCast) &&
+            CE->getCastKind() == CK_BitCast) {
+          if (auto PT = expr->getType()->getAs<clang::PointerType>())
+            llvm_unreachable("NYI");
+        }
+
+        auto ElemTy =
+            cgf.getTypes().convertTypeForMem(expr->getType()->getPointeeType());
+        addr = cgf.getBuilder().createElementBitCast(
+            cgf.getLoc(expr->getSourceRange()), addr, ElemTy);
+        if (CE->getCastKind() == CK_AddressSpaceConversion) {
+          assert(!MissingFeatures::addressSpace());
+          llvm_unreachable("NYI");
+        }
+        return addr;
+      }
+      break;
+
+    // Nothing to do here...
+    case CK_LValueToRValue:
+    case CK_NullToPointer:
+    case CK_IntegralToPointer:
+      break;
+
+    // Array-to-pointer decay. TODO(cir): BaseInfo and TBAAInfo.
+    case CK_ArrayToPointerDecay:
+      return cgf.buildArrayToPointerDecay(CE->getSubExpr());
+
+    case CK_UncheckedDerivedToBase:
+    case CK_DerivedToBase: {
+      // TODO: Support accesses to members of base classes in TBAA. For now, we
+      // conservatively pretend that the complete object is of the base class
+      // type.
+      assert(!MissingFeatures::tbaa());
+      Address Addr = cgf.buildPointerWithAlignment(CE->getSubExpr(), baseInfo);
+      auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl();
+      return cgf.getAddressOfBaseClass(
+          Addr, Derived, CE->path_begin(), CE->path_end(),
+          cgf.shouldNullCheckClassCastValue(CE), CE->getExprLoc());
+    }
+    }
+  }
+
+  // Unary &.
+  if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(expr)) {
+    // TODO(cir): maybe we should use cir.unary for pointers here instead.
+    if (UO->getOpcode() == UO_AddrOf) {
+      LValue LV = cgf.buildLValue(UO->getSubExpr());
+      if (baseInfo)
+        *baseInfo = LV.getBaseInfo();
+      assert(!MissingFeatures::tbaa());
+      return LV.getAddress();
+    }
+  }
+
+  // std::addressof and variants.
+  if (auto *Call = dyn_cast<CallExpr>(expr)) {
+    switch (Call->getBuiltinCallee()) {
+    default:
+      break;
+    case Builtin::BIaddressof:
+    case Builtin::BI__addressof:
+    case Builtin::BI__builtin_addressof: {
+      llvm_unreachable("NYI");
+    }
+    }
+  }
+
+  // TODO: conditional operators, comma.
+
+  // Otherwise, use the alignment of the type.
+  return cgf.makeNaturalAddressForPointer(
+      cgf.buildScalarExpr(expr), expr->getType()->getPointeeType(), CharUnits(),
+      /*ForPointeeType=*/true, baseInfo, tbaaInfo, isKnownNonNull);
+}
+
+/// Helper method to check if the underlying ABI is AAPCS
+static bool isAAPCS(const TargetInfo &TargetInfo) {
+  return TargetInfo.getABI().starts_with("aapcs");
+}
+
+Address CIRGenFunction::getAddrOfBitFieldStorage(LValue base,
+                                                 const FieldDecl *field,
+                                                 mlir::Type fieldType,
+                                                 unsigned index) {
+  if (index == 0)
+    return base.getAddress();
+  auto loc = getLoc(field->getLocation());
+  auto fieldPtr =
+      mlir::cir::PointerType::get(getBuilder().getContext(), fieldType);
+  auto sea = getBuilder().createGetMember(loc, fieldPtr, base.getPointer(),
+                                          field->getName(), index);
+  return Address(sea, CharUnits::One());
+}
+
+static bool useVolatileForBitField(const CIRGenModule &cgm, LValue base,
+                                   const CIRGenBitFieldInfo &info,
+                                   const FieldDecl *field) {
+  return isAAPCS(cgm.getTarget()) && cgm.getCodeGenOpts().AAPCSBitfieldWidth &&
+         info.VolatileStorageSize != 0 &&
+         field->getType()
+             .withCVRQualifiers(base.getVRQualifiers())
+             .isVolatileQualified();
+}
+
+LValue CIRGenFunction::buildLValueForBitField(LValue base,
+                                              const FieldDecl *field) {
+
+  LValueBaseInfo BaseInfo = base.getBaseInfo();
+  const RecordDecl *rec = field->getParent();
+  auto &layout = CGM.getTypes().getCIRGenRecordLayout(field->getParent());
+  auto &info = layout.getBitFieldInfo(field);
+  auto useVolatile = useVolatileForBitField(CGM, base, info, field);
+  unsigned Idx = layout.getCIRFieldNo(field);
+
+  if (useVolatile ||
+      (IsInPreservedAIRegion ||
+       (getDebugInfo() && rec->hasAttr<BPFPreserveAccessIndexAttr>()))) {
+    llvm_unreachable("NYI");
+  }
+
+  Address Addr = getAddrOfBitFieldStorage(base, field, info.StorageType, Idx);
+
+  auto loc = getLoc(field->getLocation());
+  if (Addr.getElementType() != info.StorageType)
+    Addr = builder.createElementBitCast(loc, Addr, info.StorageType);
+
+  QualType fieldType =
+      field->getType().withCVRQualifiers(base.getVRQualifiers());
+  assert(!MissingFeatures::tbaa() && "NYI TBAA for bit fields");
+  LValueBaseInfo fieldBaseInfo(BaseInfo.getAlignmentSource());
+  return LValue::MakeBitfield(Addr, info, fieldType, fieldBaseInfo,
+                              TBAAAccessInfo());
+}
+
+LValue CIRGenFunction::buildLValueForField(LValue base,
+                                           const FieldDecl *field) {
+  LValueBaseInfo BaseInfo = base.getBaseInfo();
+
+  if (field->isBitField())
+    return buildLValueForBitField(base, field);
+
+  // Fields of may-alias structures are may-alais themselves.
+  // FIXME: this hould get propagated down through anonymous structs and unions.
+  QualType FieldType = field->getType();
+  const RecordDecl *rec = field->getParent();
+  AlignmentSource BaseAlignSource = BaseInfo.getAlignmentSource();
+  LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(BaseAlignSource));
+  if (MissingFeatures::tbaa() || rec->hasAttr<MayAliasAttr>() ||
+      FieldType->isVectorType()) {
+    assert(!MissingFeatures::tbaa() && "NYI");
+  } else if (rec->isUnion()) {
+    assert(!MissingFeatures::tbaa() && "NYI");
+  } else {
+    // If no base type been assigned for the base access, then try to generate
+    // one for this base lvalue.
+    assert(!MissingFeatures::tbaa() && "NYI");
+  }
+
+  Address addr = base.getAddress();
+  if (auto *ClassDef = dyn_cast<CXXRecordDecl>(rec)) {
+    if (CGM.getCodeGenOpts().StrictVTablePointers &&
+        ClassDef->isDynamicClass()) {
+      llvm_unreachable("NYI");
+    }
+  }
+
+  unsigned RecordCVR = base.getVRQualifiers();
+  if (rec->isUnion()) {
+    // NOTE(cir): the element to be loaded/stored need to type-match the
+    // source/destination, so we emit a GetMemberOp here.
+    llvm::StringRef fieldName = field->getName();
+    unsigned fieldIndex = field->getFieldIndex();
+    if (CGM.LambdaFieldToName.count(field))
+      fieldName = CGM.LambdaFieldToName[field];
+    addr = buildAddrOfFieldStorage(*this, addr, field, fieldName, fieldIndex);
+
+    if (CGM.getCodeGenOpts().StrictVTablePointers &&
+        hasAnyVptr(FieldType, getContext()))
+      // Because unions can easily skip invariant.barriers, we need to add
+      // a barrier every time CXXRecord field with vptr is referenced.
+      assert(!MissingFeatures::createInvariantGroup());
+
+    if (IsInPreservedAIRegion ||
+        (getDebugInfo() && rec->hasAttr<BPFPreserveAccessIndexAttr>())) {
+      assert(!MissingFeatures::generateDebugInfo());
+    }
+
+    if (FieldType->isReferenceType())
+      llvm_unreachable("NYI");
+  } else {
+    if (!IsInPreservedAIRegion &&
+        (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) {
+      llvm::StringRef fieldName = field->getName();
+      auto &layout = CGM.getTypes().getCIRGenRecordLayout(field->getParent());
+      unsigned fieldIndex = layout.getCIRFieldNo(field);
+
+      if (CGM.LambdaFieldToName.count(field))
+        fieldName = CGM.LambdaFieldToName[field];
+      addr = buildAddrOfFieldStorage(*this, addr, field, fieldName, fieldIndex);
+    } else
+      // Remember the original struct field index
+      addr = buildPreserveStructAccess(*this, base, addr, field);
+  }
+
+  // If this is a reference field, load the reference right now.
+  if (FieldType->isReferenceType()) {
+    assert(!MissingFeatures::tbaa());
+    LValue RefLVal = makeAddrLValue(addr, FieldType, FieldBaseInfo);
+    if (RecordCVR & Qualifiers::Volatile)
+      RefLVal.getQuals().addVolatile();
+    addr = buildLoadOfReference(RefLVal, getLoc(field->getSourceRange()),
+                                &FieldBaseInfo);
+
+    // Qualifiers on the struct don't apply to the referencee.
+    RecordCVR = 0;
+    FieldType = FieldType->getPointeeType();
+  }
+
+  // Make sure that the address is pointing to the right type. This is critical
+  // for both unions and structs. A union needs a bitcast, a struct element will
+  // need a bitcast if the CIR type laid out doesn't match the desired type.
+  // TODO(CIR): CodeGen requires a bitcast here for unions or for structs where
+  // the LLVM type doesn't match the desired type. No idea when the latter might
+  // occur, though.
+
+  if (field->hasAttr<AnnotateAttr>())
+    llvm_unreachable("NYI");
+
+  if (MissingFeatures::tbaa())
+    // Next line should take a TBAA object
+    llvm_unreachable("NYI");
+  LValue LV = makeAddrLValue(addr, FieldType, FieldBaseInfo);
+  LV.getQuals().addCVRQualifiers(RecordCVR);
+
+  // __weak attribute on a field is ignored.
+  if (LV.getQuals().getObjCGCAttr() == Qualifiers::Weak)
+    llvm_unreachable("NYI");
+
+  return LV;
+}
+
+LValue CIRGenFunction::buildLValueForFieldInitialization(
+    LValue Base, const clang::FieldDecl *Field, llvm::StringRef FieldName) {
+  QualType FieldType = Field->getType();
+
+  if (!FieldType->isReferenceType())
+    return buildLValueForField(Base, Field);
+
+  auto &layout = CGM.getTypes().getCIRGenRecordLayout(Field->getParent());
+  unsigned FieldIndex = layout.getCIRFieldNo(Field);
+
+  Address V = buildAddrOfFieldStorage(*this, Base.getAddress(), Field,
+                                      FieldName, FieldIndex);
+
+  // Make sure that the address is pointing to the right type.
+  auto memTy = getTypes().convertTypeForMem(FieldType);
+  V = builder.createElementBitCast(getLoc(Field->getSourceRange()), V, memTy);
+
+  // TODO: Generate TBAA information that describes this access as a structure
+  // member access and not just an access to an object of the field's type. This
+  // should be similar to what we do in EmitLValueForField().
+  LValueBaseInfo BaseInfo = Base.getBaseInfo();
+  AlignmentSource FieldAlignSource = BaseInfo.getAlignmentSource();
+  LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(FieldAlignSource));
+  assert(!MissingFeatures::tbaa() && "NYI");
+  return makeAddrLValue(V, FieldType, FieldBaseInfo);
+}
+
+LValue
+CIRGenFunction::buildCompoundLiteralLValue(const CompoundLiteralExpr *E) {
+  if (E->isFileScope()) {
+    llvm_unreachable("NYI");
+  }
+
+  if (E->getType()->isVariablyModifiedType()) {
+    llvm_unreachable("NYI");
+  }
+
+  Address DeclPtr = CreateMemTemp(E->getType(), getLoc(E->getSourceRange()),
+                                  ".compoundliteral");
+  const Expr *InitExpr = E->getInitializer();
+  LValue Result = makeAddrLValue(DeclPtr, E->getType(), AlignmentSource::Decl);
+
+  buildAnyExprToMem(InitExpr, DeclPtr, E->getType().getQualifiers(),
+                    /*Init*/ true);
+
+  // Block-scope compound literals are destroyed at the end of the enclosing
+  // scope in C.
+  if (!getLangOpts().CPlusPlus)
+    if (QualType::DestructionKind DtorKind = E->getType().isDestructedType())
+      llvm_unreachable("NYI");
+
+  return Result;
+}
+
+// Detect the unusual situation where an inline version is shadowed by a
+// non-inline version. In that case we should pick the external one
+// everywhere. That's GCC behavior too.
+static bool onlyHasInlineBuiltinDeclaration(const FunctionDecl *FD) {
+  for (const FunctionDecl *PD = FD; PD; PD = PD->getPreviousDecl())
+    if (!PD->isInlineBuiltinDeclaration())
+      return false;
+  return true;
+}
+
+static CIRGenCallee buildDirectCallee(CIRGenModule &CGM, GlobalDecl GD) {
+  const auto *FD = cast<FunctionDecl>(GD.getDecl());
+
+  if (auto builtinID = FD->getBuiltinID()) {
+    std::string NoBuiltinFD = ("no-builtin-" + FD->getName()).str();
+    std::string NoBuiltins = "no-builtins";
+
+    auto *A = FD->getAttr<AsmLabelAttr>();
+    StringRef Ident = A ? A->getLabel() : FD->getName();
+    std::string FDInlineName = (Ident + ".inline").str();
+
+    auto &CGF = *CGM.getCurrCIRGenFun();
+    bool IsPredefinedLibFunction =
+        CGM.getASTContext().BuiltinInfo.isPredefinedLibFunction(builtinID);
+    bool HasAttributeNoBuiltin = false;
+    assert(!MissingFeatures::attributeNoBuiltin() && "NYI");
+    // bool HasAttributeNoBuiltin =
+    //     CGF.CurFn->getAttributes().hasFnAttr(NoBuiltinFD) ||
+    //     CGF.CurFn->getAttributes().hasFnAttr(NoBuiltins);
+
+    // When directing calling an inline builtin, call it through it's mangled
+    // name to make it clear it's not the actual builtin.
+    auto Fn = cast<mlir::cir::FuncOp>(CGF.CurFn);
+    if (Fn.getName() != FDInlineName && onlyHasInlineBuiltinDeclaration(FD)) {
+      assert(0 && "NYI");
+    }
+
+    // Replaceable builtins provide their own implementation of a builtin. If we
+    // are in an inline builtin implementation, avoid trivial infinite
+    // recursion. Honor __attribute__((no_builtin("foo"))) or
+    // __attribute__((no_builtin)) on the current function unless foo is
+    // not a predefined library function which means we must generate the
+    // builtin no matter what.
+    else if (!IsPredefinedLibFunction || !HasAttributeNoBuiltin)
+      return CIRGenCallee::forBuiltin(builtinID, FD);
+  }
+
+  auto CalleePtr = buildFunctionDeclPointer(CGM, GD);
+
+  assert(!CGM.getLangOpts().CUDA && "NYI");
+
+  return CIRGenCallee::forDirect(CalleePtr, GD);
+}
+
+// TODO: this can also be abstrated into common AST helpers
+bool CIRGenFunction::hasBooleanRepresentation(QualType Ty) {
+
+  if (Ty->isBooleanType())
+    return true;
+
+  if (const EnumType *ET = Ty->getAs<EnumType>())
+    return ET->getDecl()->getIntegerType()->isBooleanType();
+
+  if (const AtomicType *AT = Ty->getAs<AtomicType>())
+    return hasBooleanRepresentation(AT->getValueType());
+
+  return false;
+}
+
+CIRGenCallee CIRGenFunction::buildCallee(const clang::Expr *E) {
+  E = E->IgnoreParens();
+
+  // Look through function-to-pointer decay.
+  if (const auto *ICE = dyn_cast<ImplicitCastExpr>(E)) {
+    if (ICE->getCastKind() == CK_FunctionToPointerDecay ||
+        ICE->getCastKind() == CK_BuiltinFnToFnPtr) {
+      return buildCallee(ICE->getSubExpr());
+    }
+    // Resolve direct calls.
+  } else if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+    const auto *FD = dyn_cast<FunctionDecl>(DRE->getDecl());
+    assert(FD &&
+           "DeclRef referring to FunctionDecl only thing supported so far");
+    return buildDirectCallee(CGM, FD);
+  } else if (auto ME = dyn_cast<MemberExpr>(E)) {
+    if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) {
+      buildIgnoredExpr(ME->getBase());
+      return buildDirectCallee(CGM, FD);
+    }
+  }
+
+  assert(!dyn_cast<SubstNonTypeTemplateParmExpr>(E) && "NYI");
+  assert(!dyn_cast<CXXPseudoDestructorExpr>(E) && "NYI");
+
+  // Otherwise, we have an indirect reference.
+  mlir::Value calleePtr;
+  QualType functionType;
+  if (auto ptrType = E->getType()->getAs<clang::PointerType>()) {
+    calleePtr = buildScalarExpr(E);
+    functionType = ptrType->getPointeeType();
+  } else {
+    functionType = E->getType();
+    calleePtr = buildLValue(E).getPointer();
+  }
+  assert(functionType->isFunctionType());
+
+  GlobalDecl GD;
+  if (const auto *VD =
+          dyn_cast_or_null<VarDecl>(E->getReferencedDeclOfCallee()))
+    GD = GlobalDecl(VD);
+
+  CIRGenCalleeInfo calleeInfo(functionType->getAs<FunctionProtoType>(), GD);
+  CIRGenCallee callee(calleeInfo, calleePtr.getDefiningOp());
+  return callee;
+
+  assert(false && "Nothing else supported yet!");
+}
+
+mlir::Value CIRGenFunction::buildToMemory(mlir::Value Value, QualType Ty) {
+  // Bool has a different representation in memory than in registers.
+  return Value;
+}
+
+void CIRGenFunction::buildStoreOfScalar(mlir::Value value, LValue lvalue) {
+  // TODO: constant matrix type, no init, non temporal, TBAA
+  buildStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
+                     lvalue.getType(), lvalue.getBaseInfo(),
+                     lvalue.getTBAAInfo(), false, false);
+}
+
+void CIRGenFunction::buildStoreOfScalar(mlir::Value value, Address addr,
+                                        bool isVolatile, QualType ty,
+                                        LValueBaseInfo baseInfo,
+                                        TBAAAccessInfo tbaaInfo, bool isInit,
+                                        bool isNontemporal) {
+  value = buildToMemory(value, ty);
+
+  LValue atomicLValue =
+      LValue::makeAddr(addr, ty, getContext(), baseInfo, tbaaInfo);
+  if (ty->isAtomicType() ||
+      (!isInit && LValueIsSuitableForInlineAtomic(atomicLValue))) {
+    buildAtomicStore(RValue::get(value), atomicLValue, isInit);
+    return;
+  }
+
+  mlir::Type SrcTy = value.getType();
+  if (const auto *ClangVecTy = ty->getAs<clang::VectorType>()) {
+    auto VecTy = dyn_cast<mlir::cir::VectorType>(SrcTy);
+    if (!CGM.getCodeGenOpts().PreserveVec3Type &&
+        ClangVecTy->getNumElements() == 3) {
+      // Handle vec3 special.
+      if (VecTy && VecTy.getSize() == 3) {
+        // Our source is a vec3, do a shuffle vector to make it a vec4.
+        value = builder.createVecShuffle(value.getLoc(), value,
+                                         ArrayRef<int64_t>{0, 1, 2, -1});
+        SrcTy = mlir::cir::VectorType::get(VecTy.getContext(),
+                                           VecTy.getEltType(), 4);
+      }
+      if (addr.getElementType() != SrcTy) {
+        addr = addr.withElementType(SrcTy);
+      }
+    }
+  }
+
+  // Update the alloca with more info on initialization.
+  assert(addr.getPointer() && "expected pointer to exist");
+  auto SrcAlloca =
+      dyn_cast_or_null<mlir::cir::AllocaOp>(addr.getPointer().getDefiningOp());
+  if (currVarDecl && SrcAlloca) {
+    const VarDecl *VD = currVarDecl;
+    assert(VD && "VarDecl expected");
+    if (VD->hasInit())
+      SrcAlloca.setInitAttr(mlir::UnitAttr::get(builder.getContext()));
+  }
+
+  assert(currSrcLoc && "must pass in source location");
+  builder.createStore(*currSrcLoc, value, addr, isVolatile);
+
+  if (isNontemporal) {
+    llvm_unreachable("NYI");
+  }
+
+  if (MissingFeatures::tbaa())
+    llvm_unreachable("NYI");
+}
+
+void CIRGenFunction::buildStoreOfScalar(mlir::Value value, LValue lvalue,
+                                        bool isInit) {
+  if (lvalue.getType()->isConstantMatrixType()) {
+    llvm_unreachable("NYI");
+  }
+
+  buildStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
+                     lvalue.getType(), lvalue.getBaseInfo(),
+                     lvalue.getTBAAInfo(), isInit, lvalue.isNontemporal());
+}
+
+/// Given an expression that represents a value lvalue, this
+/// method emits the address of the lvalue, then loads the result as an rvalue,
+/// returning the rvalue.
+RValue CIRGenFunction::buildLoadOfLValue(LValue LV, SourceLocation Loc) {
+  assert(!LV.getType()->isFunctionType());
+  assert(!(LV.getType()->isConstantMatrixType()) && "not implemented");
+
+  if (LV.isBitField())
+    return buildLoadOfBitfieldLValue(LV, Loc);
+
+  if (LV.isSimple())
+    return RValue::get(buildLoadOfScalar(LV, Loc));
+
+  if (LV.isVectorElt()) {
+    auto load = builder.createLoad(getLoc(Loc), LV.getVectorAddress());
+    return RValue::get(builder.create<mlir::cir::VecExtractOp>(
+        getLoc(Loc), load, LV.getVectorIdx()));
+  }
+
+  if (LV.isExtVectorElt()) {
+    return buildLoadOfExtVectorElementLValue(LV);
+  }
+
+  llvm_unreachable("NYI");
+}
+
+int64_t CIRGenFunction::getAccessedFieldNo(unsigned int idx,
+                                           const mlir::ArrayAttr elts) {
+  auto elt = mlir::dyn_cast<mlir::IntegerAttr>(elts[idx]);
+  assert(elt && "The indices should be integer attributes");
+  return elt.getInt();
+}
+
+// If this is a reference to a subset of the elements of a vector, create an
+// appropriate shufflevector.
+RValue CIRGenFunction::buildLoadOfExtVectorElementLValue(LValue LV) {
+  mlir::Location loc = LV.getExtVectorPointer().getLoc();
+  mlir::Value Vec = builder.createLoad(loc, LV.getExtVectorAddress());
+
+  // HLSL allows treating scalars as one-element vectors. Converting the scalar
+  // IR value to a vector here allows the rest of codegen to behave as normal.
+  if (getLangOpts().HLSL && !mlir::isa<mlir::cir::VectorType>(Vec.getType())) {
+    llvm_unreachable("HLSL NYI");
+  }
+
+  const mlir::ArrayAttr Elts = LV.getExtVectorElts();
+
+  // If the result of the expression is a non-vector type, we must be extracting
+  // a single element.  Just codegen as an extractelement.
+  const auto *ExprVT = LV.getType()->getAs<clang::VectorType>();
+  if (!ExprVT) {
+    int64_t InIdx = getAccessedFieldNo(0, Elts);
+    mlir::cir::ConstantOp Elt =
+        builder.getConstInt(loc, builder.getSInt64Ty(), InIdx);
+    return RValue::get(builder.create<mlir::cir::VecExtractOp>(loc, Vec, Elt));
+  }
+
+  // Always use shuffle vector to try to retain the original program structure
+  unsigned NumResultElts = ExprVT->getNumElements();
+
+  SmallVector<int64_t, 4> Mask;
+  for (unsigned i = 0; i != NumResultElts; ++i)
+    Mask.push_back(getAccessedFieldNo(i, Elts));
+
+  Vec = builder.createVecShuffle(loc, Vec, Mask);
+  return RValue::get(Vec);
+}
+
+RValue CIRGenFunction::buildLoadOfBitfieldLValue(LValue LV,
+                                                 SourceLocation Loc) {
+  const CIRGenBitFieldInfo &info = LV.getBitFieldInfo();
+
+  // Get the output type.
+  mlir::Type resLTy = convertType(LV.getType());
+  Address ptr = LV.getBitFieldAddress();
+
+  bool useVolatile = LV.isVolatileQualified() &&
+                     info.VolatileStorageSize != 0 && isAAPCS(CGM.getTarget());
+
+  auto field = builder.createGetBitfield(getLoc(Loc), resLTy, ptr.getPointer(),
+                                         ptr.getElementType(), info,
+                                         LV.isVolatile(), useVolatile);
+  assert(!MissingFeatures::emitScalarRangeCheck() && "NYI");
+  return RValue::get(field);
+}
+
+void CIRGenFunction::buildStoreThroughExtVectorComponentLValue(RValue Src,
+                                                               LValue Dst) {
+  mlir::Location loc = Dst.getExtVectorPointer().getLoc();
+
+  // HLSL allows storing to scalar values through ExtVector component LValues.
+  // To support this we need to handle the case where the destination address is
+  // a scalar.
+  Address DstAddr = Dst.getExtVectorAddress();
+  if (!mlir::isa<mlir::cir::VectorType>(DstAddr.getElementType())) {
+    llvm_unreachable("HLSL NYI");
+  }
+
+  // This access turns into a read/modify/write of the vector.  Load the input
+  // value now.
+  mlir::Value Vec = builder.createLoad(loc, DstAddr);
+  const mlir::ArrayAttr Elts = Dst.getExtVectorElts();
+
+  mlir::Value SrcVal = Src.getScalarVal();
+
+  if (const clang::VectorType *VTy =
+          Dst.getType()->getAs<clang::VectorType>()) {
+    unsigned NumSrcElts = VTy->getNumElements();
+    unsigned NumDstElts = cast<mlir::cir::VectorType>(Vec.getType()).getSize();
+    if (NumDstElts == NumSrcElts) {
+      // Use shuffle vector is the src and destination are the same number of
+      // elements and restore the vector mask since it is on the side it will be
+      // stored.
+      SmallVector<int64_t, 4> Mask(NumDstElts);
+      for (unsigned i = 0; i != NumSrcElts; ++i)
+        Mask[getAccessedFieldNo(i, Elts)] = i;
+
+      Vec = builder.createVecShuffle(loc, SrcVal, Mask);
+    } else if (NumDstElts > NumSrcElts) {
+      // Extended the source vector to the same length and then shuffle it
+      // into the destination.
+      // FIXME: since we're shuffling with undef, can we just use the indices
+      //        into that?  This could be simpler.
+      SmallVector<int64_t, 4> ExtMask;
+      for (unsigned i = 0; i != NumSrcElts; ++i)
+        ExtMask.push_back(i);
+      ExtMask.resize(NumDstElts, -1);
+      mlir::Value ExtSrcVal = builder.createVecShuffle(loc, SrcVal, ExtMask);
+      // build identity
+      SmallVector<int64_t, 4> Mask;
+      for (unsigned i = 0; i != NumDstElts; ++i)
+        Mask.push_back(i);
+
+      // When the vector size is odd and .odd or .hi is used, the last element
+      // of the Elts constant array will be one past the size of the vector.
+      // Ignore the last element here, if it is greater than the mask size.
+      if ((unsigned)getAccessedFieldNo(NumSrcElts - 1, Elts) == Mask.size())
+        NumSrcElts--;
+
+      // modify when what gets shuffled in
+      for (unsigned i = 0; i != NumSrcElts; ++i)
+        Mask[getAccessedFieldNo(i, Elts)] = i + NumDstElts;
+      Vec = builder.createVecShuffle(loc, Vec, ExtSrcVal, Mask);
+    } else {
+      // We should never shorten the vector
+      llvm_unreachable("unexpected shorten vector length");
+    }
+  } else {
+    // If the Src is a scalar (not a vector), and the target is a vector it must
+    // be updating one element.
+    unsigned InIdx = getAccessedFieldNo(0, Elts);
+    auto Elt = builder.getSInt64(InIdx, loc);
+
+    Vec = builder.create<mlir::cir::VecInsertOp>(loc, Vec, SrcVal, Elt);
+  }
+
+  builder.createStore(loc, Vec, Dst.getExtVectorAddress(),
+                      Dst.isVolatileQualified());
+}
+
+void CIRGenFunction::buildStoreThroughLValue(RValue Src, LValue Dst,
+                                             bool isInit) {
+  if (!Dst.isSimple()) {
+    if (Dst.isVectorElt()) {
+      // Read/modify/write the vector, inserting the new element
+      mlir::Location loc = Dst.getVectorPointer().getLoc();
+      mlir::Value Vector = builder.createLoad(loc, Dst.getVectorAddress());
+      Vector = builder.create<mlir::cir::VecInsertOp>(
+          loc, Vector, Src.getScalarVal(), Dst.getVectorIdx());
+      builder.createStore(loc, Vector, Dst.getVectorAddress());
+      return;
+    }
+
+    if (Dst.isExtVectorElt())
+      return buildStoreThroughExtVectorComponentLValue(Src, Dst);
+
+    assert(Dst.isBitField() && "NIY LValue type");
+    mlir::Value result;
+    return buildStoreThroughBitfieldLValue(Src, Dst, result);
+  }
+  assert(Dst.isSimple() && "only implemented simple");
+
+  // There's special magic for assigning into an ARC-qualified l-value.
+  if (Qualifiers::ObjCLifetime Lifetime = Dst.getQuals().getObjCLifetime()) {
+    llvm_unreachable("NYI");
+  }
+
+  if (Dst.isObjCWeak() && !Dst.isNonGC()) {
+    llvm_unreachable("NYI");
+  }
+
+  if (Dst.isObjCStrong() && !Dst.isNonGC()) {
+    llvm_unreachable("NYI");
+  }
+
+  assert(Src.isScalar() && "Can't emit an agg store with this method");
+  buildStoreOfScalar(Src.getScalarVal(), Dst, isInit);
+}
+
+void CIRGenFunction::buildStoreThroughBitfieldLValue(RValue Src, LValue Dst,
+                                                     mlir::Value &Result) {
+  // According to the AACPS:
+  // When a volatile bit-field is written, and its container does not overlap
+  // with any non-bit-field member, its container must be read exactly once
+  // and written exactly once using the access width appropriate to the type
+  // of the container. The two accesses are not atomic.
+  if (Dst.isVolatileQualified() && isAAPCS(CGM.getTarget()) &&
+      CGM.getCodeGenOpts().ForceAAPCSBitfieldLoad)
+    llvm_unreachable("volatile bit-field is not implemented for the AACPS");
+
+  const CIRGenBitFieldInfo &info = Dst.getBitFieldInfo();
+  mlir::Type resLTy = getTypes().convertTypeForMem(Dst.getType());
+  Address ptr = Dst.getBitFieldAddress();
+
+  const bool useVolatile =
+      CGM.getCodeGenOpts().AAPCSBitfieldWidth && Dst.isVolatileQualified() &&
+      info.VolatileStorageSize != 0 && isAAPCS(CGM.getTarget());
+
+  mlir::Value dstAddr = Dst.getAddress().getPointer();
+
+  Result = builder.createSetBitfield(
+      dstAddr.getLoc(), resLTy, dstAddr, ptr.getElementType(),
+      Src.getScalarVal(), info, Dst.isVolatileQualified(), useVolatile);
+}
+
+static LValue buildGlobalVarDeclLValue(CIRGenFunction &CGF, const Expr *E,
+                                       const VarDecl *VD) {
+  QualType T = E->getType();
+
+  // If it's thread_local, emit a call to its wrapper function instead.
+  if (VD->getTLSKind() == VarDecl::TLS_Dynamic &&
+      CGF.CGM.getCXXABI().usesThreadWrapperFunction(VD))
+    assert(0 && "not implemented");
+
+  // Check if the variable is marked as declare target with link clause in
+  // device codegen.
+  if (CGF.getLangOpts().OpenMP)
+    llvm_unreachable("not implemented");
+
+  // Traditional LLVM codegen handles thread local separately, CIR handles
+  // as part of getAddrOfGlobalVar.
+  auto V = CGF.CGM.getAddrOfGlobalVar(VD);
+
+  auto RealVarTy = CGF.getTypes().convertTypeForMem(VD->getType());
+  mlir::cir::PointerType realPtrTy = CGF.getBuilder().getPointerTo(
+      RealVarTy, cast_if_present<mlir::cir::AddressSpaceAttr>(
+                     cast<mlir::cir::PointerType>(V.getType()).getAddrSpace()));
+  if (realPtrTy != V.getType())
+    V = CGF.getBuilder().createBitcast(V.getLoc(), V, realPtrTy);
+
+  CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
+  Address Addr(V, RealVarTy, Alignment);
+  // Emit reference to the private copy of the variable if it is an OpenMP
+  // threadprivate variable.
+  if (CGF.getLangOpts().OpenMP && !CGF.getLangOpts().OpenMPSimd &&
+      VD->hasAttr<clang::OMPThreadPrivateDeclAttr>()) {
+    assert(0 && "NYI");
+  }
+  LValue LV;
+  if (VD->getType()->isReferenceType())
+    assert(0 && "NYI");
+  else
+    LV = CGF.makeAddrLValue(Addr, T, AlignmentSource::Decl);
+  assert(!MissingFeatures::setObjCGCLValueClass() && "NYI");
+  return LV;
+}
+
+static LValue buildCapturedFieldLValue(CIRGenFunction &CGF, const FieldDecl *FD,
+                                       mlir::Value ThisValue) {
+  QualType TagType = CGF.getContext().getTagDeclType(FD->getParent());
+  LValue LV = CGF.MakeNaturalAlignAddrLValue(ThisValue, TagType);
+  return CGF.buildLValueForField(LV, FD);
+}
+
+static LValue buildFunctionDeclLValue(CIRGenFunction &CGF, const Expr *E,
+                                      GlobalDecl GD) {
+  const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
+  auto funcOp = buildFunctionDeclPointer(CGF.CGM, GD);
+  auto loc = CGF.getLoc(E->getSourceRange());
+  CharUnits align = CGF.getContext().getDeclAlign(FD);
+
+  mlir::Type fnTy = funcOp.getFunctionType();
+  auto ptrTy = mlir::cir::PointerType::get(CGF.getBuilder().getContext(), fnTy);
+  mlir::Value addr = CGF.getBuilder().create<mlir::cir::GetGlobalOp>(
+      loc, ptrTy, funcOp.getSymName());
+
+  if (funcOp.getFunctionType() !=
+      CGF.CGM.getTypes().ConvertType(FD->getType())) {
+    fnTy = CGF.CGM.getTypes().ConvertType(FD->getType());
+    ptrTy = mlir::cir::PointerType::get(CGF.getBuilder().getContext(), fnTy);
+
+    addr = CGF.getBuilder().create<mlir::cir::CastOp>(
+        addr.getLoc(), ptrTy, mlir::cir::CastKind::bitcast, addr);
+  }
+
+  return CGF.makeAddrLValue(Address(addr, fnTy, align), E->getType(),
+                            AlignmentSource::Decl);
+}
+
+LValue CIRGenFunction::buildDeclRefLValue(const DeclRefExpr *E) {
+  const NamedDecl *ND = E->getDecl();
+  QualType T = E->getType();
+
+  assert(E->isNonOdrUse() != NOUR_Unevaluated &&
+         "should not emit an unevaluated operand");
+
+  if (const auto *VD = dyn_cast<VarDecl>(ND)) {
+    // Global Named registers access via intrinsics only
+    if (VD->getStorageClass() == SC_Register && VD->hasAttr<AsmLabelAttr>() &&
+        !VD->isLocalVarDecl())
+      llvm_unreachable("NYI");
+
+    assert(E->isNonOdrUse() != NOUR_Constant && "not implemented");
+
+    // Check for captured variables.
+    if (E->refersToEnclosingVariableOrCapture()) {
+      VD = VD->getCanonicalDecl();
+      if (auto *FD = LambdaCaptureFields.lookup(VD))
+        return buildCapturedFieldLValue(*this, FD, CXXABIThisValue);
+      assert(!MissingFeatures::CGCapturedStmtInfo() && "NYI");
+      // TODO[OpenMP]: Find the appropiate captured variable value and return
+      // it.
+      // TODO[OpenMP]: Set non-temporal information in the captured LVal.
+      // LLVM codegen:
+      assert(!MissingFeatures::openMP());
+      // Address addr = GetAddrOfBlockDecl(VD);
+      // return MakeAddrLValue(addr, T, AlignmentSource::Decl);
+    }
+  }
+
+  // FIXME(CIR): We should be able to assert this for FunctionDecls as well!
+  // FIXME(CIR): We should be able to assert this for all DeclRefExprs, not just
+  // those with a valid source location.
+  assert((ND->isUsed(false) || !isa<VarDecl>(ND) || E->isNonOdrUse() ||
+          !E->getLocation().isValid()) &&
+         "Should not use decl without marking it used!");
+
+  if (ND->hasAttr<WeakRefAttr>()) {
+    llvm_unreachable("NYI");
+  }
+
+  if (const auto *VD = dyn_cast<VarDecl>(ND)) {
+    // Check if this is a global variable
+    if (VD->hasLinkage() || VD->isStaticDataMember())
+      return buildGlobalVarDeclLValue(*this, E, VD);
+
+    Address addr = Address::invalid();
+
+    // The variable should generally be present in the local decl map.
+    auto iter = LocalDeclMap.find(VD);
+    if (iter != LocalDeclMap.end()) {
+      addr = iter->second;
+    }
+    // Otherwise, it might be static local we haven't emitted yet for some
+    // reason; most likely, because it's in an outer function.
+    else if (VD->isStaticLocal()) {
+      mlir::cir::GlobalOp var = CGM.getOrCreateStaticVarDecl(
+          *VD, CGM.getCIRLinkageVarDefinition(VD, /*IsConstant=*/false));
+      addr = Address(builder.createGetGlobal(var), convertType(VD->getType()),
+                     getContext().getDeclAlign(VD));
+    } else {
+      llvm_unreachable("DeclRefExpr for decl not entered in LocalDeclMap?");
+    }
+
+    // Handle threadlocal function locals.
+    if (VD->getTLSKind() != VarDecl::TLS_None)
+      llvm_unreachable("thread-local storage is NYI");
+
+    // Check for OpenMP threadprivate variables.
+    if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd &&
+        VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
+      llvm_unreachable("NYI");
+    }
+
+    // Drill into block byref variables.
+    bool isBlockByref = VD->isEscapingByref();
+    if (isBlockByref) {
+      llvm_unreachable("NYI");
+    }
+
+    // Drill into reference types.
+    LValue LV =
+        VD->getType()->isReferenceType()
+            ? buildLoadOfReferenceLValue(addr, getLoc(E->getSourceRange()),
+                                         VD->getType(), AlignmentSource::Decl)
+            : makeAddrLValue(addr, T, AlignmentSource::Decl);
+
+    // Statics are defined as globals, so they are not include in the function's
+    // symbol table.
+    assert((VD->isStaticLocal() || symbolTable.count(VD)) &&
+           "non-static locals should be already mapped");
+
+    bool isLocalStorage = VD->hasLocalStorage();
+
+    bool NonGCable =
+        isLocalStorage && !VD->getType()->isReferenceType() && !isBlockByref;
+
+    if (NonGCable && MissingFeatures::setNonGC()) {
+      llvm_unreachable("garbage collection is NYI");
+    }
+
+    bool isImpreciseLifetime =
+        (isLocalStorage && !VD->hasAttr<ObjCPreciseLifetimeAttr>());
+    if (isImpreciseLifetime && MissingFeatures::ARC())
+      llvm_unreachable("imprecise lifetime is NYI");
+    assert(!MissingFeatures::setObjCGCLValueClass());
+
+    // Statics are defined as globals, so they are not include in the function's
+    // symbol table.
+    assert((VD->isStaticLocal() || symbolTable.lookup(VD)) &&
+           "Name lookup must succeed for non-static local variables");
+
+    return LV;
+  }
+
+  if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
+    LValue LV = buildFunctionDeclLValue(*this, E, FD);
+
+    // Emit debuginfo for the function declaration if the target wants to.
+    if (getContext().getTargetInfo().allowDebugInfoForExternalRef())
+      assert(!MissingFeatures::generateDebugInfo());
+
+    return LV;
+  }
+
+  // FIXME: While we're emitting a binding from an enclosing scope, all other
+  // DeclRefExprs we see should be implicitly treated as if they also refer to
+  // an enclosing scope.
+  if (const auto *BD = dyn_cast<BindingDecl>(ND)) {
+    if (E->refersToEnclosingVariableOrCapture()) {
+      auto *FD = LambdaCaptureFields.lookup(BD);
+      return buildCapturedFieldLValue(*this, FD, CXXABIThisValue);
+    }
+    return buildLValue(BD->getBinding());
+  }
+
+  // We can form DeclRefExprs naming GUID declarations when reconstituting
+  // non-type template parameters into expressions.
+  if (const auto *GD = dyn_cast<MSGuidDecl>(ND))
+    llvm_unreachable("NYI");
+
+  if (const auto *TPO = dyn_cast<TemplateParamObjectDecl>(ND))
+    llvm_unreachable("NYI");
+
+  llvm_unreachable("Unhandled DeclRefExpr");
+}
+
+LValue
+CIRGenFunction::buildPointerToDataMemberBinaryExpr(const BinaryOperator *E) {
+  assert((E->getOpcode() == BO_PtrMemD || E->getOpcode() == BO_PtrMemI) &&
+         "unexpected binary operator opcode");
+
+  auto baseAddr = Address::invalid();
+  if (E->getOpcode() == BO_PtrMemD)
+    baseAddr = buildLValue(E->getLHS()).getAddress();
+  else
+    baseAddr = buildPointerWithAlignment(E->getLHS());
+
+  const auto *memberPtrTy = E->getRHS()->getType()->castAs<MemberPointerType>();
+
+  auto memberPtr = buildScalarExpr(E->getRHS());
+
+  LValueBaseInfo baseInfo;
+  // TODO(cir): add TBAA
+  assert(!MissingFeatures::tbaa());
+  auto memberAddr = buildCXXMemberDataPointerAddress(E, baseAddr, memberPtr,
+                                                     memberPtrTy, &baseInfo);
+
+  return makeAddrLValue(memberAddr, memberPtrTy->getPointeeType(), baseInfo);
+}
+
+LValue
+CIRGenFunction::buildExtVectorElementExpr(const ExtVectorElementExpr *E) {
+  // Emit the base vector as an l-value.
+  LValue base;
+
+  // ExtVectorElementExpr's base can either be a vector or pointer to vector.
+  if (E->isArrow()) {
+    // If it is a pointer to a vector, emit the address and form an lvalue with
+    // it.
+    LValueBaseInfo BaseInfo;
+    // TODO(cir): Support TBAA
+    assert(!MissingFeatures::tbaa());
+    Address Ptr = buildPointerWithAlignment(E->getBase(), &BaseInfo);
+    const auto *PT = E->getBase()->getType()->castAs<clang::PointerType>();
+    base = makeAddrLValue(Ptr, PT->getPointeeType(), BaseInfo);
+    base.getQuals().removeObjCGCAttr();
+  } else if (E->getBase()->isGLValue()) {
+    // Otherwise, if the base is an lvalue ( as in the case of foo.x.x),
+    // emit the base as an lvalue.
+    assert(E->getBase()->getType()->isVectorType());
+    base = buildLValue(E->getBase());
+  } else {
+    // Otherwise, the base is a normal rvalue (as in (V+V).x), emit it as such.
+    assert(E->getBase()->getType()->isVectorType() &&
+           "Result must be a vector");
+    mlir::Value Vec = buildScalarExpr(E->getBase());
+
+    // Store the vector to memory (because LValue wants an address).
+    QualType BaseTy = E->getBase()->getType();
+    Address VecMem = CreateMemTemp(BaseTy, Vec.getLoc(), "tmp");
+    builder.createStore(Vec.getLoc(), Vec, VecMem);
+    base = makeAddrLValue(VecMem, BaseTy, AlignmentSource::Decl);
+  }
+
+  QualType type =
+      E->getType().withCVRQualifiers(base.getQuals().getCVRQualifiers());
+
+  // Encode the element access list into a vector of unsigned indices.
+  SmallVector<uint32_t, 4> indices;
+  E->getEncodedElementAccess(indices);
+
+  if (base.isSimple()) {
+    SmallVector<int64_t, 4> attrElts;
+    for (uint32_t i : indices) {
+      attrElts.push_back(static_cast<int64_t>(i));
+    }
+    auto elts = builder.getI64ArrayAttr(attrElts);
+    return LValue::MakeExtVectorElt(base.getAddress(), elts, type,
+                                    base.getBaseInfo(), base.getTBAAInfo());
+  }
+  assert(base.isExtVectorElt() && "Can only subscript lvalue vec elts here!");
+
+  mlir::ArrayAttr baseElts = base.getExtVectorElts();
+
+  // Composite the two indices
+  SmallVector<int64_t, 4> attrElts;
+  for (uint32_t i : indices) {
+    attrElts.push_back(getAccessedFieldNo(i, baseElts));
+  }
+  auto elts = builder.getI64ArrayAttr(attrElts);
+
+  return LValue::MakeExtVectorElt(base.getExtVectorAddress(), elts, type,
+                                  base.getBaseInfo(), base.getTBAAInfo());
+}
+
+LValue CIRGenFunction::buildBinaryOperatorLValue(const BinaryOperator *E) {
+  // Comma expressions just emit their LHS then their RHS as an l-value.
+  if (E->getOpcode() == BO_Comma) {
+    buildIgnoredExpr(E->getLHS());
+    return buildLValue(E->getRHS());
+  }
+
+  if (E->getOpcode() == BO_PtrMemD || E->getOpcode() == BO_PtrMemI)
+    return buildPointerToDataMemberBinaryExpr(E);
+
+  assert(E->getOpcode() == BO_Assign && "unexpected binary l-value");
+
+  // Note that in all of these cases, __block variables need the RHS
+  // evaluated first just in case the variable gets moved by the RHS.
+
+  switch (CIRGenFunction::getEvaluationKind(E->getType())) {
+  case TEK_Scalar: {
+    assert(E->getLHS()->getType().getObjCLifetime() ==
+               clang::Qualifiers::ObjCLifetime::OCL_None &&
+           "not implemented");
+
+    RValue RV = buildAnyExpr(E->getRHS());
+    LValue LV = buildLValue(E->getLHS());
+
+    SourceLocRAIIObject Loc{*this, getLoc(E->getSourceRange())};
+    if (LV.isBitField()) {
+      mlir::Value result;
+      buildStoreThroughBitfieldLValue(RV, LV, result);
+    } else {
+      buildStoreThroughLValue(RV, LV);
+    }
+    if (getLangOpts().OpenMP)
+      CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this,
+                                                                E->getLHS());
+    return LV;
+  }
+
+  case TEK_Complex:
+    return buildComplexAssignmentLValue(E);
+  case TEK_Aggregate:
+    assert(0 && "not implemented");
+  }
+  llvm_unreachable("bad evaluation kind");
+}
+
+/// Given an expression of pointer type, try to
+/// derive a more accurate bound on the alignment of the pointer.
+Address CIRGenFunction::buildPointerWithAlignment(
+    const Expr *expr, LValueBaseInfo *baseInfo, TBAAAccessInfo *tbaaInfo,
+    KnownNonNull_t isKnownNonNull) {
+  Address addr = ::buildPointerWithAlignment(expr, baseInfo, tbaaInfo,
+                                             isKnownNonNull, *this);
+  if (isKnownNonNull && !addr.isKnownNonNull())
+    addr.setKnownNonNull();
+  return addr;
+}
+
+/// Perform the usual unary conversions on the specified
+/// expression and compare the result against zero, returning an Int1Ty value.
+mlir::Value CIRGenFunction::evaluateExprAsBool(const Expr *E) {
+  // TODO(cir): PGO
+  if (const MemberPointerType *MPT = E->getType()->getAs<MemberPointerType>()) {
+    assert(0 && "not implemented");
+  }
+
+  QualType BoolTy = getContext().BoolTy;
+  SourceLocation Loc = E->getExprLoc();
+  // TODO(cir): CGFPOptionsRAII for FP stuff.
+  if (!E->getType()->isAnyComplexType())
+    return buildScalarConversion(buildScalarExpr(E), E->getType(), BoolTy, Loc);
+
+  llvm_unreachable("complex to scalar not implemented");
+}
+
+LValue CIRGenFunction::buildUnaryOpLValue(const UnaryOperator *E) {
+  // __extension__ doesn't affect lvalue-ness.
+  if (E->getOpcode() == UO_Extension)
+    return buildLValue(E->getSubExpr());
+
+  QualType ExprTy = getContext().getCanonicalType(E->getSubExpr()->getType());
+  switch (E->getOpcode()) {
+  default:
+    llvm_unreachable("Unknown unary operator lvalue!");
+  case UO_Deref: {
+    QualType T = E->getSubExpr()->getType()->getPointeeType();
+    assert(!T.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type");
+
+    LValueBaseInfo BaseInfo;
+    // TODO: add TBAAInfo
+    Address Addr = buildPointerWithAlignment(E->getSubExpr(), &BaseInfo);
+
+    // Tag 'load' with deref attribute.
+    if (auto loadOp =
+            dyn_cast<::mlir::cir::LoadOp>(Addr.getPointer().getDefiningOp())) {
+      loadOp.setIsDerefAttr(mlir::UnitAttr::get(builder.getContext()));
+    }
+
+    LValue LV = LValue::makeAddr(Addr, T, BaseInfo);
+    // TODO: set addr space
+    // TODO: ObjC/GC/__weak write barrier stuff.
+    return LV;
+  }
+  case UO_Real:
+  case UO_Imag: {
+    LValue LV = buildLValue(E->getSubExpr());
+    assert(LV.isSimple() && "real/imag on non-ordinary l-value");
+
+    // __real is valid on scalars.  This is a faster way of testing that.
+    // __imag can only produce an rvalue on scalars.
+    if (E->getOpcode() == UO_Real &&
+        !mlir::isa<mlir::cir::ComplexType>(LV.getAddress().getElementType())) {
+      assert(E->getSubExpr()->getType()->isArithmeticType());
+      return LV;
+    }
+
+    QualType T = ExprTy->castAs<clang::ComplexType>()->getElementType();
+
+    auto Loc = getLoc(E->getExprLoc());
+    Address Component =
+        (E->getOpcode() == UO_Real
+             ? buildAddrOfRealComponent(Loc, LV.getAddress(), LV.getType())
+             : buildAddrOfImagComponent(Loc, LV.getAddress(), LV.getType()));
+    // TODO(cir): TBAA info.
+    assert(!MissingFeatures::tbaa());
+    LValue ElemLV = makeAddrLValue(Component, T, LV.getBaseInfo());
+    ElemLV.getQuals().addQualifiers(LV.getQuals());
+    return ElemLV;
+  }
+  case UO_PreInc:
+  case UO_PreDec: {
+    bool isInc = E->isIncrementOp();
+    bool isPre = E->isPrefix();
+    LValue LV = buildLValue(E->getSubExpr());
+
+    if (E->getType()->isAnyComplexType()) {
+      buildComplexPrePostIncDec(E, LV, isInc, true /*isPre*/);
+    } else {
+      buildScalarPrePostIncDec(E, LV, isInc, isPre);
+    }
+
+    return LV;
+  }
+  }
+}
+
+/// Emit code to compute the specified expression which
+/// can have any type.  The result is returned as an RValue struct.
+RValue CIRGenFunction::buildAnyExpr(const Expr *E, AggValueSlot aggSlot,
+                                    bool ignoreResult) {
+  switch (CIRGenFunction::getEvaluationKind(E->getType())) {
+  case TEK_Scalar:
+    return RValue::get(buildScalarExpr(E));
+  case TEK_Complex:
+    return RValue::getComplex(buildComplexExpr(E));
+  case TEK_Aggregate: {
+    if (!ignoreResult && aggSlot.isIgnored())
+      aggSlot = CreateAggTemp(E->getType(), getLoc(E->getSourceRange()),
+                              getCounterAggTmpAsString());
+    buildAggExpr(E, aggSlot);
+    return aggSlot.asRValue();
+  }
+  }
+  llvm_unreachable("bad evaluation kind");
+}
+
+RValue CIRGenFunction::buildCallExpr(const clang::CallExpr *E,
+                                     ReturnValueSlot ReturnValue) {
+  assert(!E->getCallee()->getType()->isBlockPointerType() && "ObjC Blocks NYI");
+
+  if (const auto *CE = dyn_cast<CXXMemberCallExpr>(E))
+    return buildCXXMemberCallExpr(CE, ReturnValue);
+
+  assert(!dyn_cast<CUDAKernelCallExpr>(E) && "CUDA NYI");
+  if (const auto *CE = dyn_cast<CXXOperatorCallExpr>(E))
+    if (const CXXMethodDecl *MD =
+            dyn_cast_or_null<CXXMethodDecl>(CE->getCalleeDecl()))
+      return buildCXXOperatorMemberCallExpr(CE, MD, ReturnValue);
+
+  CIRGenCallee callee = buildCallee(E->getCallee());
+
+  if (callee.isBuiltin())
+    return buildBuiltinExpr(callee.getBuiltinDecl(), callee.getBuiltinID(), E,
+                            ReturnValue);
+
+  assert(!callee.isPsuedoDestructor() && "NYI");
+
+  return buildCall(E->getCallee()->getType(), callee, E, ReturnValue);
+}
+
+RValue CIRGenFunction::GetUndefRValue(QualType ty) {
+  if (ty->isVoidType())
+    return RValue::get(nullptr);
+
+  switch (getEvaluationKind(ty)) {
+  case TEK_Complex: {
+    llvm_unreachable("NYI");
+  }
+
+  // If this is a use of an undefined aggregate type, the aggregate must have
+  // an identifiable address. Just because the contents of the value are
+  // undefined doesn't mean that the address can't be taken and compared.
+  case TEK_Aggregate: {
+    llvm_unreachable("NYI");
+  }
+
+  case TEK_Scalar:
+    llvm_unreachable("NYI");
+  }
+  llvm_unreachable("bad evaluation kind");
+}
+
+LValue CIRGenFunction::buildStmtExprLValue(const StmtExpr *E) {
+  // Can only get l-value for message expression returning aggregate type
+  RValue RV = buildAnyExprToTemp(E);
+  return makeAddrLValue(RV.getAggregateAddress(), E->getType(),
+                        AlignmentSource::Decl);
+}
+
+RValue CIRGenFunction::buildCall(clang::QualType CalleeType,
+                                 const CIRGenCallee &OrigCallee,
+                                 const clang::CallExpr *E,
+                                 ReturnValueSlot ReturnValue,
+                                 mlir::Value Chain) {
+  // Get the actual function type. The callee type will always be a pointer to
+  // function type or a block pointer type.
+  assert(CalleeType->isFunctionPointerType() &&
+         "Call must have function pointer type!");
+
+  auto *TargetDecl = OrigCallee.getAbstractInfo().getCalleeDecl().getDecl();
+  (void)TargetDecl;
+
+  CalleeType = getContext().getCanonicalType(CalleeType);
+
+  auto PointeeType = cast<clang::PointerType>(CalleeType)->getPointeeType();
+
+  CIRGenCallee Callee = OrigCallee;
+
+  if (getLangOpts().CPlusPlus)
+    assert(!SanOpts.has(SanitizerKind::Function) && "Sanitizers NYI");
+
+  const auto *FnType = cast<FunctionType>(PointeeType);
+
+  assert(!SanOpts.has(SanitizerKind::CFIICall) && "Sanitizers NYI");
+
+  CallArgList Args;
+
+  assert(!Chain && "FIX THIS");
+
+  // C++17 requires that we evaluate arguments to a call using assignment syntax
+  // right-to-left, and that we evaluate arguments to certain other operators
+  // left-to-right. Note that we allow this to override the order dictated by
+  // the calling convention on the MS ABI, which means that parameter
+  // destruction order is not necessarily reverse construction order.
+  // FIXME: Revisit this based on C++ committee response to unimplementability.
+  EvaluationOrder Order = EvaluationOrder::Default;
+  if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(E)) {
+    if (OCE->isAssignmentOp())
+      Order = EvaluationOrder::ForceRightToLeft;
+    else {
+      switch (OCE->getOperator()) {
+      case OO_LessLess:
+      case OO_GreaterGreater:
+      case OO_AmpAmp:
+      case OO_PipePipe:
+      case OO_Comma:
+      case OO_ArrowStar:
+        Order = EvaluationOrder::ForceLeftToRight;
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  buildCallArgs(Args, dyn_cast<FunctionProtoType>(FnType), E->arguments(),
+                E->getDirectCallee(), /*ParamsToSkip*/ 0, Order);
+
+  const CIRGenFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall(
+      Args, FnType, /*ChainCall=*/Chain.getAsOpaquePointer());
+
+  // C99 6.5.2.2p6:
+  //   If the expression that denotes the called function has a type that does
+  //   not include a prototype, [the default argument promotions are performed].
+  //   If the number of arguments does not equal the number of parameters, the
+  //   behavior is undefined. If the function is defined with at type that
+  //   includes a prototype, and either the prototype ends with an ellipsis (,
+  //   ...) or the types of the arguments after promotion are not compatible
+  //   with the types of the parameters, the behavior is undefined. If the
+  //   function is defined with a type that does not include a prototype, and
+  //   the types of the arguments after promotion are not compatible with those
+  //   of the parameters after promotion, the behavior is undefined [except in
+  //   some trivial cases].
+  // That is, in the general case, we should assume that a call through an
+  // unprototyped function type works like a *non-variadic* call. The way we
+  // make this work is to cast to the exxact type fo the promoted arguments.
+  //
+  // Chain calls use the same code path to add the inviisble chain parameter to
+  // the function type.
+  if (isa<FunctionNoProtoType>(FnType) || Chain) {
+    assert(!MissingFeatures::chainCall());
+    assert(!MissingFeatures::addressSpace());
+    auto CalleeTy = getTypes().GetFunctionType(FnInfo);
+    // get non-variadic function type
+    CalleeTy = mlir::cir::FuncType::get(CalleeTy.getInputs(),
+                                        CalleeTy.getReturnType(), false);
+    auto CalleePtrTy =
+        mlir::cir::PointerType::get(builder.getContext(), CalleeTy);
+
+    auto *Fn = Callee.getFunctionPointer();
+    mlir::Value Addr;
+    if (auto funcOp = llvm::dyn_cast<mlir::cir::FuncOp>(Fn)) {
+      Addr = builder.create<mlir::cir::GetGlobalOp>(
+          getLoc(E->getSourceRange()),
+          mlir::cir::PointerType::get(builder.getContext(),
+                                      funcOp.getFunctionType()),
+          funcOp.getSymName());
+    } else {
+      Addr = Fn->getResult(0);
+    }
+
+    Fn = builder.createBitcast(Addr, CalleePtrTy).getDefiningOp();
+    Callee.setFunctionPointer(Fn);
+  }
+
+  assert(!CGM.getLangOpts().HIP && "HIP NYI");
+
+  assert(!MustTailCall && "Must tail NYI");
+  mlir::cir::CIRCallOpInterface callOP;
+  RValue Call = buildCall(FnInfo, Callee, ReturnValue, Args, &callOP,
+                          E == MustTailCall, getLoc(E->getExprLoc()), E);
+
+  assert(!getDebugInfo() && "Debug Info NYI");
+
+  return Call;
+}
+
+/// Emit code to compute the specified expression, ignoring the result.
+void CIRGenFunction::buildIgnoredExpr(const Expr *E) {
+  if (E->isPRValue())
+    return (void)buildAnyExpr(E, AggValueSlot::ignored(), true);
+
+  // Just emit it as an l-value and drop the result.
+  buildLValue(E);
+}
+
+Address CIRGenFunction::buildArrayToPointerDecay(const Expr *E,
+                                                 LValueBaseInfo *BaseInfo) {
+  assert(E->getType()->isArrayType() &&
+         "Array to pointer decay must have array source type!");
+
+  // Expressions of array type can't be bitfields or vector elements.
+  LValue LV = buildLValue(E);
+  Address Addr = LV.getAddress();
+
+  // If the array type was an incomplete type, we need to make sure
+  // the decay ends up being the right type.
+  auto lvalueAddrTy =
+      mlir::dyn_cast<mlir::cir::PointerType>(Addr.getPointer().getType());
+  assert(lvalueAddrTy && "expected pointer");
+
+  if (E->getType()->isVariableArrayType())
+    return Addr;
+
+  auto pointeeTy =
+      mlir::dyn_cast<mlir::cir::ArrayType>(lvalueAddrTy.getPointee());
+  assert(pointeeTy && "expected array");
+
+  mlir::Type arrayTy = convertType(E->getType());
+  assert(mlir::isa<mlir::cir::ArrayType>(arrayTy) && "expected array");
+  assert(pointeeTy == arrayTy);
+
+  // The result of this decay conversion points to an array element within the
+  // base lvalue. However, since TBAA currently does not support representing
+  // accesses to elements of member arrays, we conservatively represent accesses
+  // to the pointee object as if it had no any base lvalue specified.
+  // TODO: Support TBAA for member arrays.
+  QualType EltType = E->getType()->castAsArrayTypeUnsafe()->getElementType();
+  if (BaseInfo)
+    *BaseInfo = LV.getBaseInfo();
+  assert(!MissingFeatures::tbaa() && "NYI");
+
+  mlir::Value ptr = CGM.getBuilder().maybeBuildArrayDecay(
+      CGM.getLoc(E->getSourceRange()), Addr.getPointer(),
+      getTypes().convertTypeForMem(EltType));
+  return Address(ptr, Addr.getAlignment());
+}
+
+/// If the specified expr is a simple decay from an array to pointer,
+/// return the array subexpression.
+/// FIXME: this could be abstracted into a commeon AST helper.
+static const Expr *isSimpleArrayDecayOperand(const Expr *E) {
+  // If this isn't just an array->pointer decay, bail out.
+  const auto *CE = dyn_cast<CastExpr>(E);
+  if (!CE || CE->getCastKind() != CK_ArrayToPointerDecay)
+    return nullptr;
+
+  // If this is a decay from variable width array, bail out.
+  const Expr *SubExpr = CE->getSubExpr();
+  if (SubExpr->getType()->isVariableArrayType())
+    return nullptr;
+
+  return SubExpr;
+}
+
+/// Given an array base, check whether its member access belongs to a record
+/// with preserve_access_index attribute or not.
+/// TODO(cir): don't need to be specific to LLVM's codegen, refactor into common
+/// AST helpers.
+static bool isPreserveAIArrayBase(CIRGenFunction &CGF, const Expr *ArrayBase) {
+  if (!ArrayBase || !CGF.getDebugInfo())
+    return false;
+
+  // Only support base as either a MemberExpr or DeclRefExpr.
+  // DeclRefExpr to cover cases like:
+  //    struct s { int a; int b[10]; };
+  //    struct s *p;
+  //    p[1].a
+  // p[1] will generate a DeclRefExpr and p[1].a is a MemberExpr.
+  // p->b[5] is a MemberExpr example.
+  const Expr *E = ArrayBase->IgnoreImpCasts();
+  if (const auto *ME = dyn_cast<MemberExpr>(E))
+    return ME->getMemberDecl()->hasAttr<BPFPreserveAccessIndexAttr>();
+
+  if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+    const auto *VarDef = dyn_cast<VarDecl>(DRE->getDecl());
+    if (!VarDef)
+      return false;
+
+    const auto *PtrT = VarDef->getType()->getAs<clang::PointerType>();
+    if (!PtrT)
+      return false;
+
+    const auto *PointeeT =
+        PtrT->getPointeeType()->getUnqualifiedDesugaredType();
+    if (const auto *RecT = dyn_cast<RecordType>(PointeeT))
+      return RecT->getDecl()->hasAttr<BPFPreserveAccessIndexAttr>();
+    return false;
+  }
+
+  return false;
+}
+
+static mlir::IntegerAttr getConstantIndexOrNull(mlir::Value idx) {
+  // TODO(cir): should we consider using MLIRs IndexType instead of IntegerAttr?
+  if (auto constantOp = dyn_cast<mlir::cir::ConstantOp>(idx.getDefiningOp()))
+    return mlir::dyn_cast<mlir::IntegerAttr>(constantOp.getValue());
+  return {};
+}
+
+static CharUnits getArrayElementAlign(CharUnits arrayAlign, mlir::Value idx,
+                                      CharUnits eltSize) {
+  // If we have a constant index, we can use the exact offset of the
+  // element we're accessing.
+  auto constantIdx = getConstantIndexOrNull(idx);
+  if (constantIdx) {
+    CharUnits offset = constantIdx.getValue().getZExtValue() * eltSize;
+    return arrayAlign.alignmentAtOffset(offset);
+    // Otherwise, use the worst-case alignment for any element.
+  } else {
+    return arrayAlign.alignmentOfArrayElement(eltSize);
+  }
+}
+
+static mlir::Value
+buildArraySubscriptPtr(CIRGenFunction &CGF, mlir::Location beginLoc,
+                       mlir::Location endLoc, mlir::Value ptr, mlir::Type eltTy,
+                       ArrayRef<mlir::Value> indices, bool inbounds,
+                       bool signedIndices, bool shouldDecay,
+                       const llvm::Twine &name = "arrayidx") {
+  assert(indices.size() == 1 && "cannot handle multiple indices yet");
+  auto idx = indices.back();
+  auto &CGM = CGF.getCIRGenModule();
+  // TODO(cir): LLVM codegen emits in bound gep check here, is there anything
+  // that would enhance tracking this later in CIR?
+  if (inbounds)
+    assert(!MissingFeatures::emitCheckedInBoundsGEP() && "NYI");
+  return CGM.getBuilder().getArrayElement(beginLoc, endLoc, ptr, eltTy, idx,
+                                          shouldDecay);
+}
+
+static QualType getFixedSizeElementType(const ASTContext &ctx,
+                                        const VariableArrayType *vla) {
+  QualType eltType;
+  do {
+    eltType = vla->getElementType();
+  } while ((vla = ctx.getAsVariableArrayType(eltType)));
+  return eltType;
+}
+
+static Address buildArraySubscriptPtr(
+    CIRGenFunction &CGF, mlir::Location beginLoc, mlir::Location endLoc,
+    Address addr, ArrayRef<mlir::Value> indices, QualType eltType,
+    bool inbounds, bool signedIndices, mlir::Location loc, bool shouldDecay,
+    QualType *arrayType = nullptr, const Expr *Base = nullptr,
+    const llvm::Twine &name = "arrayidx") {
+  // Determine the element size of the statically-sized base.  This is
+  // the thing that the indices are expressed in terms of.
+  if (auto vla = CGF.getContext().getAsVariableArrayType(eltType)) {
+    eltType = getFixedSizeElementType(CGF.getContext(), vla);
+  }
+
+  // We can use that to compute the best alignment of the element.
+  CharUnits eltSize = CGF.getContext().getTypeSizeInChars(eltType);
+  CharUnits eltAlign =
+      getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize);
+
+  mlir::Value eltPtr;
+  auto LastIndex = getConstantIndexOrNull(indices.back());
+  if (!LastIndex ||
+      (!CGF.IsInPreservedAIRegion && !isPreserveAIArrayBase(CGF, Base))) {
+    eltPtr = buildArraySubscriptPtr(CGF, beginLoc, endLoc, addr.getPointer(),
+                                    addr.getElementType(), indices, inbounds,
+                                    signedIndices, shouldDecay, name);
+  } else {
+    // assert(!UnimplementedFeature::generateDebugInfo() && "NYI");
+    // assert(indices.size() == 1 && "cannot handle multiple indices yet");
+    // auto idx = indices.back();
+    // auto &CGM = CGF.getCIRGenModule();
+    // eltPtr = CGM.getBuilder().getArrayElement(beginLoc, endLoc,
+    //                             addr.getPointer(), addr.getElementType(),
+    //                             idx);
+    assert(0 && "NYI");
+  }
+
+  return Address(eltPtr, CGF.getTypes().convertTypeForMem(eltType), eltAlign);
+}
+
+LValue CIRGenFunction::buildArraySubscriptExpr(const ArraySubscriptExpr *E,
+                                               bool Accessed) {
+  // The index must always be an integer, which is not an aggregate.  Emit it
+  // in lexical order (this complexity is, sadly, required by C++17).
+  mlir::Value IdxPre =
+      (E->getLHS() == E->getIdx()) ? buildScalarExpr(E->getIdx()) : nullptr;
+  bool SignedIndices = false;
+  auto EmitIdxAfterBase = [&, IdxPre](bool Promote) -> mlir::Value {
+    mlir::Value Idx = IdxPre;
+    if (E->getLHS() != E->getIdx()) {
+      assert(E->getRHS() == E->getIdx() && "index was neither LHS nor RHS");
+      Idx = buildScalarExpr(E->getIdx());
+    }
+
+    QualType IdxTy = E->getIdx()->getType();
+    bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType();
+    SignedIndices |= IdxSigned;
+
+    if (SanOpts.has(SanitizerKind::ArrayBounds))
+      llvm_unreachable("array bounds sanitizer is NYI");
+
+    // Extend or truncate the index type to 32 or 64-bits.
+    auto ptrTy = mlir::dyn_cast<mlir::cir::PointerType>(Idx.getType());
+    if (Promote && ptrTy && mlir::isa<mlir::cir::IntType>(ptrTy.getPointee()))
+      llvm_unreachable("index type cast is NYI");
+
+    return Idx;
+  };
+  IdxPre = nullptr;
+
+  // If the base is a vector type, then we are forming a vector element
+  // with this subscript.
+  if (E->getBase()->getType()->isVectorType() &&
+      !isa<ExtVectorElementExpr>(E->getBase())) {
+    LValue lhs = buildLValue(E->getBase());
+    auto index = EmitIdxAfterBase(/*Promote=*/false);
+    return LValue::MakeVectorElt(lhs.getAddress(), index,
+                                 E->getBase()->getType(), lhs.getBaseInfo(),
+                                 lhs.getTBAAInfo());
+  }
+
+  // All the other cases basically behave like simple offsetting.
+
+  // Handle the extvector case we ignored above.
+  if (isa<ExtVectorElementExpr>(E->getBase())) {
+    llvm_unreachable("extvector subscript is NYI");
+  }
+
+  assert(!MissingFeatures::tbaa() && "TBAA is NYI");
+  LValueBaseInfo EltBaseInfo;
+  Address Addr = Address::invalid();
+  if (const VariableArrayType *vla =
+          getContext().getAsVariableArrayType(E->getType())) {
+    // The base must be a pointer, which is not an aggregate.  Emit
+    // it.  It needs to be emitted first in case it's what captures
+    // the VLA bounds.
+    Addr = buildPointerWithAlignment(E->getBase(), &EltBaseInfo);
+    auto Idx = EmitIdxAfterBase(/*Promote*/ true);
+
+    // The element count here is the total number of non-VLA elements.
+    mlir::Value numElements = getVLASize(vla).NumElts;
+    Idx = builder.createCast(mlir::cir::CastKind::integral, Idx,
+                             numElements.getType());
+    Idx = builder.createMul(Idx, numElements);
+
+    QualType ptrType = E->getBase()->getType();
+    Addr = buildArraySubscriptPtr(
+        *this, CGM.getLoc(E->getBeginLoc()), CGM.getLoc(E->getEndLoc()), Addr,
+        {Idx}, E->getType(), !getLangOpts().isSignedOverflowDefined(),
+        SignedIndices, CGM.getLoc(E->getExprLoc()), /*shouldDecay=*/false,
+        &ptrType, E->getBase());
+  } else if (const ObjCObjectType *OIT =
+                 E->getType()->getAs<ObjCObjectType>()) {
+    llvm_unreachable("ObjC object type subscript is NYI");
+  } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
+    // If this is A[i] where A is an array, the frontend will have decayed
+    // the base to be a ArrayToPointerDecay implicit cast.  While correct, it is
+    // inefficient at -O0 to emit a "gep A, 0, 0" when codegen'ing it, then
+    // a "gep x, i" here.  Emit one "gep A, 0, i".
+    assert(Array->getType()->isArrayType() &&
+           "Array to pointer decay must have array source type!");
+    LValue ArrayLV;
+    // For simple multidimensional array indexing, set the 'accessed' flag
+    // for better bounds-checking of the base expression.
+    if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Array))
+      ArrayLV = buildArraySubscriptExpr(ASE, /*Accessed=*/true);
+    else
+      ArrayLV = buildLValue(Array);
+    auto Idx = EmitIdxAfterBase(/*Promote=*/true);
+
+    // Propagate the alignment from the array itself to the result.
+    QualType arrayType = Array->getType();
+    Addr = buildArraySubscriptPtr(
+        *this, CGM.getLoc(Array->getBeginLoc()), CGM.getLoc(Array->getEndLoc()),
+        ArrayLV.getAddress(), {Idx}, E->getType(),
+        !getLangOpts().isSignedOverflowDefined(), SignedIndices,
+        CGM.getLoc(E->getExprLoc()), /*shouldDecay=*/true, &arrayType,
+        E->getBase());
+    EltBaseInfo = ArrayLV.getBaseInfo();
+    // TODO(cir): EltTBAAInfo
+    assert(!MissingFeatures::tbaa() && "TBAA is NYI");
+  } else {
+    // The base must be a pointer; emit it with an estimate of its alignment.
+    // TODO(cir): EltTBAAInfo
+    assert(!MissingFeatures::tbaa() && "TBAA is NYI");
+    Addr = buildPointerWithAlignment(E->getBase(), &EltBaseInfo);
+    auto Idx = EmitIdxAfterBase(/*Promote*/ true);
+    QualType ptrType = E->getBase()->getType();
+    Addr = buildArraySubscriptPtr(
+        *this, CGM.getLoc(E->getBeginLoc()), CGM.getLoc(E->getEndLoc()), Addr,
+        Idx, E->getType(), !getLangOpts().isSignedOverflowDefined(),
+        SignedIndices, CGM.getLoc(E->getExprLoc()), /*shouldDecay=*/false,
+        &ptrType, E->getBase());
+  }
+
+  LValue LV = LValue::makeAddr(Addr, E->getType(), EltBaseInfo);
+
+  if (getLangOpts().ObjC && getLangOpts().getGC() != LangOptions::NonGC) {
+    llvm_unreachable("ObjC is NYI");
+  }
+
+  return LV;
+}
+
+LValue CIRGenFunction::buildStringLiteralLValue(const StringLiteral *E) {
+  auto sym = CGM.getAddrOfConstantStringFromLiteral(E).getSymbol();
+
+  auto cstGlobal = mlir::SymbolTable::lookupSymbolIn(CGM.getModule(), sym);
+  assert(cstGlobal && "Expected global");
+
+  auto g = dyn_cast<mlir::cir::GlobalOp>(cstGlobal);
+  assert(g && "unaware of other symbol providers");
+
+  auto ptrTy = mlir::cir::PointerType::get(CGM.getBuilder().getContext(),
+                                           g.getSymType());
+  assert(g.getAlignment() && "expected alignment for string literal");
+  auto align = *g.getAlignment();
+  auto addr = builder.create<mlir::cir::GetGlobalOp>(
+      getLoc(E->getSourceRange()), ptrTy, g.getSymName());
+  return makeAddrLValue(
+      Address(addr, g.getSymType(), CharUnits::fromQuantity(align)),
+      E->getType(), AlignmentSource::Decl);
+}
+
+/// Casts are never lvalues unless that cast is to a reference type. If the cast
+/// is to a reference, we can have the usual lvalue result, otherwise if a cast
+/// is needed by the code generator in an lvalue context, then it must mean that
+/// we need the address of an aggregate in order to access one of its members.
+/// This can happen for all the reasons that casts are permitted with aggregate
+/// result, including noop aggregate casts, and cast from scalar to union.
+LValue CIRGenFunction::buildCastLValue(const CastExpr *E) {
+  switch (E->getCastKind()) {
+  case CK_HLSLArrayRValue:
+  case CK_HLSLVectorTruncation:
+  case CK_ToVoid:
+  case CK_BitCast:
+  case CK_LValueToRValueBitCast:
+  case CK_ArrayToPointerDecay:
+  case CK_FunctionToPointerDecay:
+  case CK_NullToMemberPointer:
+  case CK_NullToPointer:
+  case CK_IntegralToPointer:
+  case CK_PointerToIntegral:
+  case CK_PointerToBoolean:
+  case CK_VectorSplat:
+  case CK_IntegralCast:
+  case CK_BooleanToSignedIntegral:
+  case CK_IntegralToBoolean:
+  case CK_IntegralToFloating:
+  case CK_FloatingToIntegral:
+  case CK_FloatingToBoolean:
+  case CK_FloatingCast:
+  case CK_FloatingRealToComplex:
+  case CK_FloatingComplexToReal:
+  case CK_FloatingComplexToBoolean:
+  case CK_FloatingComplexCast:
+  case CK_FloatingComplexToIntegralComplex:
+  case CK_IntegralRealToComplex:
+  case CK_IntegralComplexToReal:
+  case CK_IntegralComplexToBoolean:
+  case CK_IntegralComplexCast:
+  case CK_IntegralComplexToFloatingComplex:
+  case CK_DerivedToBaseMemberPointer:
+  case CK_BaseToDerivedMemberPointer:
+  case CK_MemberPointerToBoolean:
+  case CK_ReinterpretMemberPointer:
+  case CK_AnyPointerToBlockPointerCast:
+  case CK_ARCProduceObject:
+  case CK_ARCConsumeObject:
+  case CK_ARCReclaimReturnedObject:
+  case CK_ARCExtendBlockObject:
+  case CK_CopyAndAutoreleaseBlockObject:
+  case CK_IntToOCLSampler:
+  case CK_FloatingToFixedPoint:
+  case CK_FixedPointToFloating:
+  case CK_FixedPointCast:
+  case CK_FixedPointToBoolean:
+  case CK_FixedPointToIntegral:
+  case CK_IntegralToFixedPoint:
+  case CK_MatrixCast:
+    llvm_unreachable("NYI");
+
+  case CK_Dependent:
+    llvm_unreachable("dependent cast kind in IR gen!");
+
+  case CK_BuiltinFnToFnPtr:
+    llvm_unreachable("builtin functions are handled elsewhere");
+
+  // These are never l-values; just use the aggregate emission code.
+  case CK_NonAtomicToAtomic:
+  case CK_AtomicToNonAtomic:
+    assert(0 && "NYI");
+
+  case CK_Dynamic: {
+    LValue LV = buildLValue(E->getSubExpr());
+    Address V = LV.getAddress();
+    const auto *DCE = cast<CXXDynamicCastExpr>(E);
+    return MakeNaturalAlignAddrLValue(buildDynamicCast(V, DCE), E->getType());
+  }
+
+  case CK_ConstructorConversion:
+  case CK_UserDefinedConversion:
+  case CK_CPointerToObjCPointerCast:
+  case CK_BlockPointerToObjCPointerCast:
+  case CK_LValueToRValue:
+    return buildLValue(E->getSubExpr());
+
+  case CK_NoOp: {
+    // CK_NoOp can model a qualification conversion, which can remove an array
+    // bound and change the IR type.
+    LValue LV = buildLValue(E->getSubExpr());
+    if (LV.isSimple()) {
+      Address V = LV.getAddress();
+      if (V.isValid()) {
+        auto T = getTypes().convertTypeForMem(E->getType());
+        if (V.getElementType() != T)
+          LV.setAddress(
+              builder.createElementBitCast(getLoc(E->getSourceRange()), V, T));
+      }
+    }
+    return LV;
+  }
+
+  case CK_UncheckedDerivedToBase:
+  case CK_DerivedToBase: {
+    const auto *DerivedClassTy =
+        E->getSubExpr()->getType()->castAs<RecordType>();
+    auto *DerivedClassDecl = cast<CXXRecordDecl>(DerivedClassTy->getDecl());
+
+    LValue LV = buildLValue(E->getSubExpr());
+    Address This = LV.getAddress();
+
+    // Perform the derived-to-base conversion
+    Address Base = getAddressOfBaseClass(
+        This, DerivedClassDecl, E->path_begin(), E->path_end(),
+        /*NullCheckValue=*/false, E->getExprLoc());
+
+    // TODO: Support accesses to members of base classes in TBAA. For now, we
+    // conservatively pretend that the complete object is of the base class
+    // type.
+    assert(!MissingFeatures::tbaa());
+    return makeAddrLValue(Base, E->getType(), LV.getBaseInfo());
+  }
+  case CK_ToUnion:
+    assert(0 && "NYI");
+  case CK_BaseToDerived: {
+    assert(0 && "NYI");
+  }
+  case CK_LValueBitCast: {
+    assert(0 && "NYI");
+  }
+  case CK_AddressSpaceConversion: {
+    LValue LV = buildLValue(E->getSubExpr());
+    QualType DestTy = getContext().getPointerType(E->getType());
+    auto SrcAS =
+        builder.getAddrSpaceAttr(E->getSubExpr()->getType().getAddressSpace());
+    auto DestAS = builder.getAddrSpaceAttr(E->getType().getAddressSpace());
+    mlir::Value V = getTargetHooks().performAddrSpaceCast(
+        *this, LV.getPointer(), SrcAS, DestAS, ConvertType(DestTy));
+    assert(!MissingFeatures::tbaa());
+    return makeAddrLValue(Address(V, getTypes().convertTypeForMem(E->getType()),
+                                  LV.getAddress().getAlignment()),
+                          E->getType(), LV.getBaseInfo());
+  }
+  case CK_ObjCObjectLValueCast: {
+    assert(0 && "NYI");
+  }
+  case CK_ZeroToOCLOpaqueType:
+    llvm_unreachable("NULL to OpenCL opaque type lvalue cast is not valid");
+  }
+
+  llvm_unreachable("Unhandled lvalue cast kind?");
+}
+
+// TODO(cir): candidate for common helper between LLVM and CIR codegen.
+static DeclRefExpr *tryToConvertMemberExprToDeclRefExpr(CIRGenFunction &CGF,
+                                                        const MemberExpr *ME) {
+  if (auto *VD = dyn_cast<VarDecl>(ME->getMemberDecl())) {
+    // Try to emit static variable member expressions as DREs.
+    return DeclRefExpr::Create(
+        CGF.getContext(), NestedNameSpecifierLoc(), SourceLocation(), VD,
+        /*RefersToEnclosingVariableOrCapture=*/false, ME->getExprLoc(),
+        ME->getType(), ME->getValueKind(), nullptr, nullptr, ME->isNonOdrUse());
+  }
+  return nullptr;
+}
+
+LValue CIRGenFunction::buildCheckedLValue(const Expr *E, TypeCheckKind TCK) {
+  LValue LV;
+  if (SanOpts.has(SanitizerKind::ArrayBounds) && isa<ArraySubscriptExpr>(E))
+    assert(0 && "not implemented");
+  else
+    LV = buildLValue(E);
+  if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple()) {
+    SanitizerSet SkippedChecks;
+    if (const auto *ME = dyn_cast<MemberExpr>(E)) {
+      bool IsBaseCXXThis = isWrappedCXXThis(ME->getBase());
+      if (IsBaseCXXThis)
+        SkippedChecks.set(SanitizerKind::Alignment, true);
+      if (IsBaseCXXThis || isa<DeclRefExpr>(ME->getBase()))
+        SkippedChecks.set(SanitizerKind::Null, true);
+    }
+    buildTypeCheck(TCK, E->getExprLoc(), LV.getPointer(), E->getType(),
+                   LV.getAlignment(), SkippedChecks);
+  }
+  return LV;
+}
+
+// TODO(cir): candidate for common AST helper for LLVM and CIR codegen
+bool CIRGenFunction::isWrappedCXXThis(const Expr *Obj) {
+  const Expr *Base = Obj;
+  while (!isa<CXXThisExpr>(Base)) {
+    // The result of a dynamic_cast can be null.
+    if (isa<CXXDynamicCastExpr>(Base))
+      return false;
+
+    if (const auto *CE = dyn_cast<CastExpr>(Base)) {
+      Base = CE->getSubExpr();
+    } else if (const auto *PE = dyn_cast<ParenExpr>(Base)) {
+      Base = PE->getSubExpr();
+    } else if (const auto *UO = dyn_cast<UnaryOperator>(Base)) {
+      if (UO->getOpcode() == UO_Extension)
+        Base = UO->getSubExpr();
+      else
+        return false;
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+LValue CIRGenFunction::buildMemberExpr(const MemberExpr *E) {
+  if (DeclRefExpr *DRE = tryToConvertMemberExprToDeclRefExpr(*this, E)) {
+    buildIgnoredExpr(E->getBase());
+    return buildDeclRefLValue(DRE);
+  }
+
+  Expr *BaseExpr = E->getBase();
+  // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a scalar.
+  LValue BaseLV;
+  if (E->isArrow()) {
+    LValueBaseInfo BaseInfo;
+    Address Addr = buildPointerWithAlignment(BaseExpr, &BaseInfo);
+    QualType PtrTy = BaseExpr->getType()->getPointeeType();
+    SanitizerSet SkippedChecks;
+    bool IsBaseCXXThis = isWrappedCXXThis(BaseExpr);
+    if (IsBaseCXXThis)
+      SkippedChecks.set(SanitizerKind::Alignment, true);
+    if (IsBaseCXXThis || isa<DeclRefExpr>(BaseExpr))
+      SkippedChecks.set(SanitizerKind::Null, true);
+    buildTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy,
+                   /*Alignment=*/CharUnits::Zero(), SkippedChecks);
+    BaseLV = makeAddrLValue(Addr, PtrTy, BaseInfo);
+  } else
+    BaseLV = buildCheckedLValue(BaseExpr, TCK_MemberAccess);
+
+  NamedDecl *ND = E->getMemberDecl();
+  if (auto *Field = dyn_cast<FieldDecl>(ND)) {
+    LValue LV = buildLValueForField(BaseLV, Field);
+    assert(!MissingFeatures::setObjCGCLValueClass() && "NYI");
+    if (getLangOpts().OpenMP) {
+      // If the member was explicitly marked as nontemporal, mark it as
+      // nontemporal. If the base lvalue is marked as nontemporal, mark access
+      // to children as nontemporal too.
+      assert(0 && "not implemented");
+    }
+    return LV;
+  }
+
+  if (const auto *FD = dyn_cast<FunctionDecl>(ND))
+    assert(0 && "not implemented");
+
+  llvm_unreachable("Unhandled member declaration!");
+}
+
+LValue CIRGenFunction::buildCallExprLValue(const CallExpr *E) {
+  RValue RV = buildCallExpr(E);
+
+  if (!RV.isScalar())
+    return makeAddrLValue(RV.getAggregateAddress(), E->getType(),
+                          AlignmentSource::Decl);
+
+  assert(E->getCallReturnType(getContext())->isReferenceType() &&
+         "Can't have a scalar return unless the return type is a "
+         "reference type!");
+
+  return MakeNaturalAlignPointeeAddrLValue(RV.getScalarVal(), E->getType());
+}
+
+/// Evaluate an expression into a given memory location.
+void CIRGenFunction::buildAnyExprToMem(const Expr *E, Address Location,
+                                       Qualifiers Quals, bool IsInit) {
+  // FIXME: This function should take an LValue as an argument.
+  switch (getEvaluationKind(E->getType())) {
+  case TEK_Complex:
+    assert(0 && "NYI");
+    return;
+
+  case TEK_Aggregate: {
+    buildAggExpr(E, AggValueSlot::forAddr(Location, Quals,
+                                          AggValueSlot::IsDestructed_t(IsInit),
+                                          AggValueSlot::DoesNotNeedGCBarriers,
+                                          AggValueSlot::IsAliased_t(!IsInit),
+                                          AggValueSlot::MayOverlap));
+    return;
+  }
+
+  case TEK_Scalar: {
+    RValue RV = RValue::get(buildScalarExpr(E));
+    LValue LV = makeAddrLValue(Location, E->getType());
+    buildStoreThroughLValue(RV, LV);
+    return;
+  }
+  }
+  llvm_unreachable("bad evaluation kind");
+}
+
+static Address createReferenceTemporary(CIRGenFunction &CGF,
+                                        const MaterializeTemporaryExpr *M,
+                                        const Expr *Inner,
+                                        Address *Alloca = nullptr) {
+  // TODO(cir): CGF.getTargetHooks();
+  switch (M->getStorageDuration()) {
+  case SD_FullExpression:
+  case SD_Automatic: {
+    // TODO(cir): probably not needed / too LLVM specific?
+    // If we have a constant temporary array or record try to promote it into a
+    // constant global under the same rules a normal constant would've been
+    // promoted. This is easier on the optimizer and generally emits fewer
+    // instructions.
+    QualType Ty = Inner->getType();
+    if (CGF.CGM.getCodeGenOpts().MergeAllConstants &&
+        (Ty->isArrayType() || Ty->isRecordType()) &&
+        CGF.CGM.isTypeConstant(Ty, /*ExcludeCtor=*/true, /*ExcludeDtor=*/false))
+      assert(0 && "NYI");
+
+    // The temporary memory should be created in the same scope as the extending
+    // declaration of the temporary materialization expression.
+    mlir::cir::AllocaOp extDeclAlloca;
+    if (const clang::ValueDecl *extDecl = M->getExtendingDecl()) {
+      auto extDeclAddrIter = CGF.LocalDeclMap.find(extDecl);
+      if (extDeclAddrIter != CGF.LocalDeclMap.end()) {
+        extDeclAlloca = dyn_cast_if_present<mlir::cir::AllocaOp>(
+            extDeclAddrIter->second.getDefiningOp());
+      }
+    }
+    mlir::OpBuilder::InsertPoint ip;
+    if (extDeclAlloca)
+      ip = {extDeclAlloca->getBlock(), extDeclAlloca->getIterator()};
+    return CGF.CreateMemTemp(Ty, CGF.getLoc(M->getSourceRange()),
+                             CGF.getCounterRefTmpAsString(), Alloca, ip);
+  }
+  case SD_Thread:
+  case SD_Static:
+    assert(0 && "NYI");
+
+  case SD_Dynamic:
+    llvm_unreachable("temporary can't have dynamic storage duration");
+  }
+  llvm_unreachable("unknown storage duration");
+}
+
+static void pushTemporaryCleanup(CIRGenFunction &CGF,
+                                 const MaterializeTemporaryExpr *M,
+                                 const Expr *E, Address ReferenceTemporary) {
+  // Objective-C++ ARC:
+  //   If we are binding a reference to a temporary that has ownership, we
+  //   need to perform retain/release operations on the temporary.
+  //
+  // FIXME: This should be looking at E, not M.
+  if (auto Lifetime = M->getType().getObjCLifetime()) {
+    assert(0 && "NYI");
+  }
+
+  CXXDestructorDecl *ReferenceTemporaryDtor = nullptr;
+  if (const RecordType *RT =
+          E->getType()->getBaseElementTypeUnsafe()->getAs<RecordType>()) {
+    // Get the destructor for the reference temporary.
+    auto *ClassDecl = cast<CXXRecordDecl>(RT->getDecl());
+    if (!ClassDecl->hasTrivialDestructor())
+      ReferenceTemporaryDtor = ClassDecl->getDestructor();
+  }
+
+  if (!ReferenceTemporaryDtor)
+    return;
+
+  // Call the destructor for the temporary.
+  switch (M->getStorageDuration()) {
+  case SD_Static:
+  case SD_Thread: {
+    if (E->getType()->isArrayType()) {
+      llvm_unreachable("SD_Static|SD_Thread + array types not implemented");
+    } else {
+      llvm_unreachable("SD_Static|SD_Thread for general types not implemented");
+    }
+    llvm_unreachable("SD_Static|SD_Thread not implemented");
+  }
+
+  case SD_FullExpression:
+    CGF.pushDestroy(NormalAndEHCleanup, ReferenceTemporary, E->getType(),
+                    CIRGenFunction::destroyCXXObject,
+                    CGF.getLangOpts().Exceptions);
+    break;
+
+  case SD_Automatic:
+    llvm_unreachable("SD_Automatic not implemented");
+    break;
+
+  case SD_Dynamic:
+    llvm_unreachable("temporary cannot have dynamic storage duration");
+  }
+}
+
+LValue CIRGenFunction::buildMaterializeTemporaryExpr(
+    const MaterializeTemporaryExpr *M) {
+  const Expr *E = M->getSubExpr();
+
+  assert((!M->getExtendingDecl() || !isa<VarDecl>(M->getExtendingDecl()) ||
+          !cast<VarDecl>(M->getExtendingDecl())->isARCPseudoStrong()) &&
+         "Reference should never be pseudo-strong!");
+
+  // FIXME: ideally this would use buildAnyExprToMem, however, we cannot do so
+  // as that will cause the lifetime adjustment to be lost for ARC
+  auto ownership = M->getType().getObjCLifetime();
+  if (ownership != Qualifiers::OCL_None &&
+      ownership != Qualifiers::OCL_ExplicitNone) {
+    assert(0 && "NYI");
+  }
+
+  SmallVector<const Expr *, 2> CommaLHSs;
+  SmallVector<SubobjectAdjustment, 2> Adjustments;
+  E = E->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments);
+
+  for (const auto &Ignored : CommaLHSs)
+    buildIgnoredExpr(Ignored);
+
+  if (const auto *opaque = dyn_cast<OpaqueValueExpr>(E))
+    assert(0 && "NYI");
+
+  // Create and initialize the reference temporary.
+  Address Alloca = Address::invalid();
+  Address Object = createReferenceTemporary(*this, M, E, &Alloca);
+
+  if (auto Var =
+          dyn_cast<mlir::cir::GlobalOp>(Object.getPointer().getDefiningOp())) {
+    // TODO(cir): add something akin to stripPointerCasts() to ptr above
+    assert(0 && "NYI");
+  } else {
+    switch (M->getStorageDuration()) {
+    case SD_Automatic:
+      assert(!MissingFeatures::shouldEmitLifetimeMarkers());
+      break;
+
+    case SD_FullExpression: {
+      if (!ShouldEmitLifetimeMarkers)
+        break;
+      assert(0 && "NYI");
+      break;
+    }
+
+    default:
+      break;
+    }
+
+    buildAnyExprToMem(E, Object, Qualifiers(), /*IsInit*/ true);
+  }
+  pushTemporaryCleanup(*this, M, E, Object);
+
+  // Perform derived-to-base casts and/or field accesses, to get from the
+  // temporary object we created (and, potentially, for which we extended
+  // the lifetime) to the subobject we're binding the reference to.
+  for (SubobjectAdjustment &Adjustment : llvm::reverse(Adjustments)) {
+    (void)Adjustment;
+    assert(0 && "NYI");
+  }
+
+  return makeAddrLValue(Object, M->getType(), AlignmentSource::Decl);
+}
+
+LValue CIRGenFunction::buildOpaqueValueLValue(const OpaqueValueExpr *e) {
+  assert(OpaqueValueMappingData::shouldBindAsLValue(e));
+  return getOrCreateOpaqueLValueMapping(e);
+}
+
+LValue
+CIRGenFunction::getOrCreateOpaqueLValueMapping(const OpaqueValueExpr *e) {
+  assert(OpaqueValueMapping::shouldBindAsLValue(e));
+
+  llvm::DenseMap<const OpaqueValueExpr *, LValue>::iterator it =
+      OpaqueLValues.find(e);
+
+  if (it != OpaqueLValues.end())
+    return it->second;
+
+  assert(e->isUnique() && "LValue for a nonunique OVE hasn't been emitted");
+  return buildLValue(e->getSourceExpr());
+}
+
+RValue
+CIRGenFunction::getOrCreateOpaqueRValueMapping(const OpaqueValueExpr *e) {
+  assert(!OpaqueValueMapping::shouldBindAsLValue(e));
+
+  llvm::DenseMap<const OpaqueValueExpr *, RValue>::iterator it =
+      OpaqueRValues.find(e);
+
+  if (it != OpaqueRValues.end())
+    return it->second;
+
+  assert(e->isUnique() && "RValue for a nonunique OVE hasn't been emitted");
+  return buildAnyExpr(e->getSourceExpr());
+}
+
+namespace {
+// Handle the case where the condition is a constant evaluatable simple integer,
+// which means we don't have to separately handle the true/false blocks.
+std::optional<LValue> HandleConditionalOperatorLValueSimpleCase(
+    CIRGenFunction &CGF, const AbstractConditionalOperator *E) {
+  const Expr *condExpr = E->getCond();
+  bool CondExprBool;
+  if (CGF.ConstantFoldsToSimpleInteger(condExpr, CondExprBool)) {
+    const Expr *Live = E->getTrueExpr(), *Dead = E->getFalseExpr();
+    if (!CondExprBool)
+      std::swap(Live, Dead);
+
+    if (!CGF.ContainsLabel(Dead)) {
+      // If the true case is live, we need to track its region.
+      if (CondExprBool) {
+        assert(!MissingFeatures::incrementProfileCounter());
+      }
+      // If a throw expression we emit it and return an undefined lvalue
+      // because it can't be used.
+      if (auto *ThrowExpr = dyn_cast<CXXThrowExpr>(Live->IgnoreParens())) {
+        llvm_unreachable("NYI");
+      }
+      return CGF.buildLValue(Live);
+    }
+  }
+  return std::nullopt;
+}
+} // namespace
+
+/// Emit the operand of a glvalue conditional operator. This is either a glvalue
+/// or a (possibly-parenthesized) throw-expression. If this is a throw, no
+/// LValue is returned and the current block has been terminated.
+static std::optional<LValue> buildLValueOrThrowExpression(CIRGenFunction &CGF,
+                                                          const Expr *Operand) {
+  if (auto *ThrowExpr = dyn_cast<CXXThrowExpr>(Operand->IgnoreParens())) {
+    llvm_unreachable("NYI");
+  }
+
+  return CGF.buildLValue(Operand);
+}
+
+// Create and generate the 3 blocks for a conditional operator.
+// Leaves the 'current block' in the continuation basic block.
+template <typename FuncTy>
+CIRGenFunction::ConditionalInfo
+CIRGenFunction::buildConditionalBlocks(const AbstractConditionalOperator *E,
+                                       const FuncTy &BranchGenFunc) {
+  ConditionalInfo Info;
+  auto &CGF = *this;
+  ConditionalEvaluation eval(CGF);
+  auto loc = CGF.getLoc(E->getSourceRange());
+  auto &builder = CGF.getBuilder();
+  auto *trueExpr = E->getTrueExpr();
+  auto *falseExpr = E->getFalseExpr();
+
+  mlir::Value condV = CGF.buildOpOnBoolExpr(loc, E->getCond());
+  SmallVector<mlir::OpBuilder::InsertPoint, 2> insertPoints{};
+  mlir::Type yieldTy{};
+
+  auto patchVoidOrThrowSites = [&]() {
+    if (insertPoints.empty())
+      return;
+    // If both arms are void, so be it.
+    if (!yieldTy)
+      yieldTy = CGF.VoidTy;
+
+    // Insert required yields.
+    for (auto &toInsert : insertPoints) {
+      mlir::OpBuilder::InsertionGuard guard(builder);
+      builder.restoreInsertionPoint(toInsert);
+
+      // Block does not return: build empty yield.
+      if (mlir::isa<mlir::cir::VoidType>(yieldTy)) {
+        builder.create<mlir::cir::YieldOp>(loc);
+      } else { // Block returns: set null yield value.
+        mlir::Value op0 = builder.getNullValue(yieldTy, loc);
+        builder.create<mlir::cir::YieldOp>(loc, op0);
+      }
+    }
+  };
+
+  Info.Result = builder
+                    .create<mlir::cir::TernaryOp>(
+                        loc, condV, /*trueBuilder=*/
+                        [&](mlir::OpBuilder &b, mlir::Location loc) {
+                          CIRGenFunction::LexicalScope lexScope{
+                              *this, loc, b.getInsertionBlock()};
+                          CGF.currLexScope->setAsTernary();
+
+                          assert(!MissingFeatures::incrementProfileCounter());
+                          eval.begin(CGF);
+                          Info.LHS = BranchGenFunc(CGF, trueExpr);
+                          auto lhs = Info.LHS->getPointer();
+                          eval.end(CGF);
+
+                          if (lhs) {
+                            yieldTy = lhs.getType();
+                            b.create<mlir::cir::YieldOp>(loc, lhs);
+                            return;
+                          }
+                          // If LHS or RHS is a throw or void expression we need
+                          // to patch arms as to properly match yield types.
+                          insertPoints.push_back(b.saveInsertionPoint());
+                        },
+                        /*falseBuilder=*/
+                        [&](mlir::OpBuilder &b, mlir::Location loc) {
+                          CIRGenFunction::LexicalScope lexScope{
+                              *this, loc, b.getInsertionBlock()};
+                          CGF.currLexScope->setAsTernary();
+
+                          assert(!MissingFeatures::incrementProfileCounter());
+                          eval.begin(CGF);
+                          Info.RHS = BranchGenFunc(CGF, falseExpr);
+                          auto rhs = Info.RHS->getPointer();
+                          eval.end(CGF);
+
+                          if (rhs) {
+                            yieldTy = rhs.getType();
+                            b.create<mlir::cir::YieldOp>(loc, rhs);
+                          } else {
+                            // If LHS or RHS is a throw or void expression we
+                            // need to patch arms as to properly match yield
+                            // types.
+                            insertPoints.push_back(b.saveInsertionPoint());
+                          }
+
+                          patchVoidOrThrowSites();
+                        })
+                    .getResult();
+  return Info;
+}
+
+LValue CIRGenFunction::buildConditionalOperatorLValue(
+    const AbstractConditionalOperator *expr) {
+  if (!expr->isGLValue()) {
+    // ?: here should be an aggregate.
+    assert(hasAggregateEvaluationKind(expr->getType()) &&
+           "Unexpected conditional operator!");
+    return buildAggExprToLValue(expr);
+  }
+
+  OpaqueValueMapping binding(*this, expr);
+  if (std::optional<LValue> Res =
+          HandleConditionalOperatorLValueSimpleCase(*this, expr))
+    return *Res;
+
+  ConditionalInfo Info =
+      buildConditionalBlocks(expr, [](CIRGenFunction &CGF, const Expr *E) {
+        return buildLValueOrThrowExpression(CGF, E);
+      });
+
+  if ((Info.LHS && !Info.LHS->isSimple()) ||
+      (Info.RHS && !Info.RHS->isSimple()))
+    llvm_unreachable("unsupported conditional operator");
+
+  if (Info.LHS && Info.RHS) {
+    Address lhsAddr = Info.LHS->getAddress();
+    Address rhsAddr = Info.RHS->getAddress();
+    Address result(Info.Result, lhsAddr.getElementType(),
+                   std::min(lhsAddr.getAlignment(), rhsAddr.getAlignment()));
+    AlignmentSource alignSource =
+        std::max(Info.LHS->getBaseInfo().getAlignmentSource(),
+                 Info.RHS->getBaseInfo().getAlignmentSource());
+    assert(!MissingFeatures::tbaa());
+    return makeAddrLValue(result, expr->getType(), LValueBaseInfo(alignSource));
+  } else {
+    llvm_unreachable("NYI");
+  }
+}
+
+/// Emit code to compute a designator that specifies the location
+/// of the expression.
+/// FIXME: document this function better.
+LValue CIRGenFunction::buildLValue(const Expr *E) {
+  // FIXME: ApplyDebugLocation DL(*this, E);
+  switch (E->getStmtClass()) {
+  default: {
+    emitError(getLoc(E->getExprLoc()), "l-value not implemented for '")
+        << E->getStmtClassName() << "'";
+    assert(0 && "not implemented");
+  }
+  case Expr::ConditionalOperatorClass:
+    return buildConditionalOperatorLValue(cast<ConditionalOperator>(E));
+  case Expr::ArraySubscriptExprClass:
+    return buildArraySubscriptExpr(cast<ArraySubscriptExpr>(E));
+  case Expr::ExtVectorElementExprClass:
+    return buildExtVectorElementExpr(cast<ExtVectorElementExpr>(E));
+  case Expr::BinaryOperatorClass:
+    return buildBinaryOperatorLValue(cast<BinaryOperator>(E));
+  case Expr::CompoundAssignOperatorClass: {
+    QualType Ty = E->getType();
+    if (const AtomicType *AT = Ty->getAs<AtomicType>())
+      assert(0 && "not yet implemented");
+    if (!Ty->isAnyComplexType())
+      return buildCompoundAssignmentLValue(cast<CompoundAssignOperator>(E));
+    return buildComplexCompoundAssignmentLValue(
+        cast<CompoundAssignOperator>(E));
+  }
+  case Expr::CallExprClass:
+  case Expr::CXXMemberCallExprClass:
+  case Expr::CXXOperatorCallExprClass:
+  case Expr::UserDefinedLiteralClass:
+    return buildCallExprLValue(cast<CallExpr>(E));
+  case Expr::ExprWithCleanupsClass: {
+    const auto *cleanups = cast<ExprWithCleanups>(E);
+    LValue LV;
+
+    auto scopeLoc = getLoc(E->getSourceRange());
+    [[maybe_unused]] auto scope = builder.create<mlir::cir::ScopeOp>(
+        scopeLoc, /*scopeBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          CIRGenFunction::LexicalScope lexScope{*this, loc,
+                                                builder.getInsertionBlock()};
+
+          LV = buildLValue(cleanups->getSubExpr());
+          if (LV.isSimple()) {
+            // Defend against branches out of gnu statement expressions
+            // surrounded by cleanups.
+            Address addr = LV.getAddress();
+            auto v = addr.getPointer();
+            LV = LValue::makeAddr(addr.withPointer(v, NotKnownNonNull),
+                                  LV.getType(), getContext(), LV.getBaseInfo(),
+                                  LV.getTBAAInfo());
+          }
+        });
+
+    // FIXME: Is it possible to create an ExprWithCleanups that produces a
+    // bitfield lvalue or some other non-simple lvalue?
+    return LV;
+  }
+  case Expr::ParenExprClass:
+    return buildLValue(cast<ParenExpr>(E)->getSubExpr());
+  case Expr::DeclRefExprClass:
+    return buildDeclRefLValue(cast<DeclRefExpr>(E));
+  case Expr::UnaryOperatorClass:
+    return buildUnaryOpLValue(cast<UnaryOperator>(E));
+  case Expr::StringLiteralClass:
+    return buildStringLiteralLValue(cast<StringLiteral>(E));
+  case Expr::MemberExprClass:
+    return buildMemberExpr(cast<MemberExpr>(E));
+  case Expr::CompoundLiteralExprClass:
+    return buildCompoundLiteralLValue(cast<CompoundLiteralExpr>(E));
+  case Expr::PredefinedExprClass:
+    return buildPredefinedLValue(cast<PredefinedExpr>(E));
+  case Expr::CXXFunctionalCastExprClass:
+  case Expr::CXXReinterpretCastExprClass:
+  case Expr::CXXConstCastExprClass:
+  case Expr::CXXAddrspaceCastExprClass:
+  case Expr::ObjCBridgedCastExprClass:
+    emitError(getLoc(E->getExprLoc()), "l-value not implemented for '")
+        << E->getStmtClassName() << "'";
+    assert(0 && "Use buildCastLValue below, remove me when adding testcase");
+  case Expr::CStyleCastExprClass:
+  case Expr::CXXStaticCastExprClass:
+  case Expr::CXXDynamicCastExprClass:
+  case Expr::ImplicitCastExprClass:
+    return buildCastLValue(cast<CastExpr>(E));
+  case Expr::OpaqueValueExprClass:
+    return buildOpaqueValueLValue(cast<OpaqueValueExpr>(E));
+
+  case Expr::MaterializeTemporaryExprClass:
+    return buildMaterializeTemporaryExpr(cast<MaterializeTemporaryExpr>(E));
+
+  case Expr::ObjCPropertyRefExprClass:
+    llvm_unreachable("cannot emit a property reference directly");
+  case Expr::StmtExprClass:
+    return buildStmtExprLValue(cast<StmtExpr>(E));
+  }
+
+  return LValue::makeAddr(Address::invalid(), E->getType());
+}
+
+/// Given the address of a temporary variable, produce an r-value of its type.
+RValue CIRGenFunction::convertTempToRValue(Address addr, clang::QualType type,
+                                           clang::SourceLocation loc) {
+  LValue lvalue = makeAddrLValue(addr, type, AlignmentSource::Decl);
+  switch (getEvaluationKind(type)) {
+  case TEK_Complex:
+    llvm_unreachable("NYI");
+  case TEK_Aggregate:
+    llvm_unreachable("NYI");
+  case TEK_Scalar:
+    return RValue::get(buildLoadOfScalar(lvalue, loc));
+  }
+  llvm_unreachable("NYI");
+}
+
+/// An LValue is a candidate for having its loads and stores be made atomic if
+/// we are operating under /volatile:ms *and* the LValue itself is volatile and
+/// performing such an operation can be performed without a libcall.
+bool CIRGenFunction::LValueIsSuitableForInlineAtomic(LValue LV) {
+  if (!CGM.getLangOpts().MSVolatile)
+    return false;
+
+  llvm_unreachable("NYI");
+}
+
+/// Emit an `if` on a boolean condition, filling `then` and `else` into
+/// appropriated regions.
+mlir::LogicalResult CIRGenFunction::buildIfOnBoolExpr(const Expr *cond,
+                                                      const Stmt *thenS,
+                                                      const Stmt *elseS) {
+  // Attempt to be more accurate as possible with IfOp location, generate
+  // one fused location that has either 2 or 4 total locations, depending
+  // on else's availability.
+  auto getStmtLoc = [this](const Stmt &s) {
+    return mlir::FusedLoc::get(builder.getContext(),
+                               {getLoc(s.getSourceRange().getBegin()),
+                                getLoc(s.getSourceRange().getEnd())});
+  };
+  auto thenLoc = getStmtLoc(*thenS);
+  std::optional<mlir::Location> elseLoc;
+  if (elseS)
+    elseLoc = getStmtLoc(*elseS);
+
+  mlir::LogicalResult resThen = mlir::success(), resElse = mlir::success();
+  buildIfOnBoolExpr(
+      cond, /*thenBuilder=*/
+      [&](mlir::OpBuilder &, mlir::Location) {
+        LexicalScope lexScope{*this, thenLoc, builder.getInsertionBlock()};
+        resThen = buildStmt(thenS, /*useCurrentScope=*/true);
+      },
+      thenLoc,
+      /*elseBuilder=*/
+      [&](mlir::OpBuilder &, mlir::Location) {
+        assert(elseLoc && "Invalid location for elseS.");
+        LexicalScope lexScope{*this, *elseLoc, builder.getInsertionBlock()};
+        resElse = buildStmt(elseS, /*useCurrentScope=*/true);
+      },
+      elseLoc);
+
+  return mlir::LogicalResult::success(resThen.succeeded() &&
+                                      resElse.succeeded());
+}
+
+/// Emit an `if` on a boolean condition, filling `then` and `else` into
+/// appropriated regions.
+mlir::cir::IfOp CIRGenFunction::buildIfOnBoolExpr(
+    const clang::Expr *cond,
+    llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)> thenBuilder,
+    mlir::Location thenLoc,
+    llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)> elseBuilder,
+    std::optional<mlir::Location> elseLoc) {
+
+  SmallVector<mlir::Location, 2> ifLocs{thenLoc};
+  if (elseLoc)
+    ifLocs.push_back(*elseLoc);
+  auto loc = mlir::FusedLoc::get(builder.getContext(), ifLocs);
+
+  // Emit the code with the fully general case.
+  mlir::Value condV = buildOpOnBoolExpr(loc, cond);
+  return builder.create<mlir::cir::IfOp>(loc, condV, elseLoc.has_value(),
+                                         /*thenBuilder=*/thenBuilder,
+                                         /*elseBuilder=*/elseBuilder);
+}
+
+/// TODO(cir): PGO data
+/// TODO(cir): see EmitBranchOnBoolExpr for extra ideas).
+mlir::Value CIRGenFunction::buildOpOnBoolExpr(mlir::Location loc,
+                                              const Expr *cond) {
+  // TODO(CIR): scoped ApplyDebugLocation DL(*this, Cond);
+  // TODO(CIR): __builtin_unpredictable and profile counts?
+  cond = cond->IgnoreParens();
+
+  // if (const BinaryOperator *CondBOp = dyn_cast<BinaryOperator>(cond)) {
+  //   llvm_unreachable("binaryoperator ifstmt NYI");
+  // }
+
+  if (const UnaryOperator *CondUOp = dyn_cast<UnaryOperator>(cond)) {
+    // In LLVM the condition is reversed here for efficient codegen.
+    // This should be done in CIR prior to LLVM lowering, if we do now
+    // we can make CIR based diagnostics misleading.
+    //  cir.ternary(!x, t, f) -> cir.ternary(x, f, t)
+    assert(!MissingFeatures::shouldReverseUnaryCondOnBoolExpr());
+  }
+
+  if (const ConditionalOperator *CondOp = dyn_cast<ConditionalOperator>(cond)) {
+    auto *trueExpr = CondOp->getTrueExpr();
+    auto *falseExpr = CondOp->getFalseExpr();
+    mlir::Value condV = buildOpOnBoolExpr(loc, CondOp->getCond());
+
+    auto ternaryOpRes =
+        builder
+            .create<mlir::cir::TernaryOp>(
+                loc, condV, /*thenBuilder=*/
+                [this, trueExpr](mlir::OpBuilder &b, mlir::Location loc) {
+                  auto lhs = buildScalarExpr(trueExpr);
+                  b.create<mlir::cir::YieldOp>(loc, lhs);
+                },
+                /*elseBuilder=*/
+                [this, falseExpr](mlir::OpBuilder &b, mlir::Location loc) {
+                  auto rhs = buildScalarExpr(falseExpr);
+                  b.create<mlir::cir::YieldOp>(loc, rhs);
+                })
+            .getResult();
+
+    return buildScalarConversion(ternaryOpRes, CondOp->getType(),
+                                 getContext().BoolTy, CondOp->getExprLoc());
+  }
+
+  if (const CXXThrowExpr *Throw = dyn_cast<CXXThrowExpr>(cond)) {
+    llvm_unreachable("NYI");
+  }
+
+  // If the branch has a condition wrapped by __builtin_unpredictable,
+  // create metadata that specifies that the branch is unpredictable.
+  // Don't bother if not optimizing because that metadata would not be used.
+  auto *Call = dyn_cast<CallExpr>(cond->IgnoreImpCasts());
+  if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) {
+    assert(!MissingFeatures::insertBuiltinUnpredictable());
+  }
+
+  // Emit the code with the fully general case.
+  return evaluateExprAsBool(cond);
+}
+
+mlir::Value CIRGenFunction::buildAlloca(StringRef name, mlir::Type ty,
+                                        mlir::Location loc, CharUnits alignment,
+                                        bool insertIntoFnEntryBlock,
+                                        mlir::Value arraySize) {
+  mlir::Block *entryBlock = insertIntoFnEntryBlock
+                                ? getCurFunctionEntryBlock()
+                                : currLexScope->getEntryBlock();
+
+  // If this is an alloca in the entry basic block of a cir.try and there's
+  // a surrounding cir.scope, make sure the alloca ends up in the surrounding
+  // scope instead. This is necessary in order to guarantee all SSA values are
+  // reachable during cleanups.
+  if (auto tryOp = llvm::dyn_cast_if_present<mlir::cir::TryOp>(
+          entryBlock->getParentOp())) {
+    if (auto scopeOp = llvm::dyn_cast<mlir::cir::ScopeOp>(tryOp->getParentOp()))
+      entryBlock = &scopeOp.getRegion().front();
+  }
+
+  return buildAlloca(name, ty, loc, alignment,
+                     builder.getBestAllocaInsertPoint(entryBlock), arraySize);
+}
+
+mlir::Value CIRGenFunction::buildAlloca(StringRef name, mlir::Type ty,
+                                        mlir::Location loc, CharUnits alignment,
+                                        mlir::OpBuilder::InsertPoint ip,
+                                        mlir::Value arraySize) {
+  // CIR uses its own alloca AS rather than follow the target data layout like
+  // original CodeGen. The data layout awareness should be done in the lowering
+  // pass instead.
+  auto localVarPtrTy = builder.getPointerTo(ty, getCIRAllocaAddressSpace());
+  auto alignIntAttr = CGM.getSize(alignment);
+
+  mlir::Value addr;
+  {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+    builder.restoreInsertionPoint(ip);
+    addr = builder.createAlloca(loc, /*addr type*/ localVarPtrTy,
+                                /*var type*/ ty, name, alignIntAttr, arraySize);
+    if (currVarDecl) {
+      auto alloca = cast<mlir::cir::AllocaOp>(addr.getDefiningOp());
+      alloca.setAstAttr(ASTVarDeclAttr::get(builder.getContext(), currVarDecl));
+    }
+  }
+  return addr;
+}
+
+mlir::Value CIRGenFunction::buildAlloca(StringRef name, QualType ty,
+                                        mlir::Location loc, CharUnits alignment,
+                                        bool insertIntoFnEntryBlock,
+                                        mlir::Value arraySize) {
+  return buildAlloca(name, getCIRType(ty), loc, alignment,
+                     insertIntoFnEntryBlock, arraySize);
+}
+
+mlir::Value CIRGenFunction::buildLoadOfScalar(LValue lvalue,
+                                              SourceLocation loc) {
+  return buildLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
+                           lvalue.getType(), getLoc(loc), lvalue.getBaseInfo(),
+                           lvalue.getTBAAInfo(), lvalue.isNontemporal());
+}
+
+mlir::Value CIRGenFunction::buildLoadOfScalar(LValue lvalue,
+                                              mlir::Location loc) {
+  return buildLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
+                           lvalue.getType(), loc, lvalue.getBaseInfo(),
+                           lvalue.getTBAAInfo(), lvalue.isNontemporal());
+}
+
+mlir::Value CIRGenFunction::buildFromMemory(mlir::Value Value, QualType Ty) {
+  if (!Ty->isBooleanType() && hasBooleanRepresentation(Ty)) {
+    llvm_unreachable("NIY");
+  }
+
+  return Value;
+}
+
+mlir::Value CIRGenFunction::buildLoadOfScalar(Address addr, bool isVolatile,
+                                              QualType ty, SourceLocation loc,
+                                              LValueBaseInfo baseInfo,
+                                              TBAAAccessInfo tbaaInfo,
+                                              bool isNontemporal) {
+  return buildLoadOfScalar(addr, isVolatile, ty, getLoc(loc), baseInfo,
+                           tbaaInfo, isNontemporal);
+}
+
+mlir::Value CIRGenFunction::buildLoadOfScalar(Address addr, bool isVolatile,
+                                              QualType ty, mlir::Location loc,
+                                              LValueBaseInfo baseInfo,
+                                              TBAAAccessInfo tbaaInfo,
+                                              bool isNontemporal) {
+  // Atomic operations have to be done on integral types
+  LValue atomicLValue =
+      LValue::makeAddr(addr, ty, getContext(), baseInfo, tbaaInfo);
+  if (ty->isAtomicType() || LValueIsSuitableForInlineAtomic(atomicLValue)) {
+    llvm_unreachable("NYI");
+  }
+
+  auto ElemTy = addr.getElementType();
+
+  if (const auto *ClangVecTy = ty->getAs<clang::VectorType>()) {
+    // Handle vectors of size 3 like size 4 for better performance.
+    const auto VTy = cast<mlir::cir::VectorType>(ElemTy);
+
+    if (!CGM.getCodeGenOpts().PreserveVec3Type &&
+        ClangVecTy->getNumElements() == 3) {
+      auto loc = addr.getPointer().getLoc();
+      auto vec4Ty =
+          mlir::cir::VectorType::get(VTy.getContext(), VTy.getEltType(), 4);
+      Address Cast = addr.withElementType(vec4Ty);
+      // Now load value.
+      mlir::Value V = builder.createLoad(loc, Cast);
+
+      // Shuffle vector to get vec3.
+      V = builder.createVecShuffle(loc, V, ArrayRef<int64_t>{0, 1, 2});
+      return buildFromMemory(V, ty);
+    }
+  }
+
+  auto Ptr = addr.getPointer();
+  if (mlir::isa<mlir::cir::VoidType>(ElemTy)) {
+    ElemTy = mlir::cir::IntType::get(builder.getContext(), 8, true);
+    auto ElemPtrTy = mlir::cir::PointerType::get(builder.getContext(), ElemTy);
+    Ptr = builder.create<mlir::cir::CastOp>(loc, ElemPtrTy,
+                                            mlir::cir::CastKind::bitcast, Ptr);
+  }
+
+  mlir::Value Load = builder.CIRBaseBuilderTy::createLoad(loc, Ptr, isVolatile);
+
+  if (isNontemporal) {
+    llvm_unreachable("NYI");
+  }
+
+  assert(!MissingFeatures::tbaa() && "NYI");
+  assert(!MissingFeatures::emitScalarRangeCheck() && "NYI");
+
+  return buildFromMemory(Load, ty);
+}
+
+// Note: this function also emit constructor calls to support a MSVC extensions
+// allowing explicit constructor function call.
+RValue CIRGenFunction::buildCXXMemberCallExpr(const CXXMemberCallExpr *CE,
+                                              ReturnValueSlot ReturnValue) {
+
+  const Expr *callee = CE->getCallee()->IgnoreParens();
+
+  if (isa<BinaryOperator>(callee))
+    return buildCXXMemberPointerCallExpr(CE, ReturnValue);
+
+  const auto *ME = cast<MemberExpr>(callee);
+  const auto *MD = cast<CXXMethodDecl>(ME->getMemberDecl());
+
+  if (MD->isStatic()) {
+    llvm_unreachable("NYI");
+  }
+
+  bool HasQualifier = ME->hasQualifier();
+  NestedNameSpecifier *Qualifier = HasQualifier ? ME->getQualifier() : nullptr;
+  bool IsArrow = ME->isArrow();
+  const Expr *Base = ME->getBase();
+
+  return buildCXXMemberOrOperatorMemberCallExpr(
+      CE, MD, ReturnValue, HasQualifier, Qualifier, IsArrow, Base);
+}
+
+RValue CIRGenFunction::buildReferenceBindingToExpr(const Expr *E) {
+  // Emit the expression as an lvalue.
+  LValue LV = buildLValue(E);
+  assert(LV.isSimple());
+  auto Value = LV.getPointer();
+
+  if (sanitizePerformTypeCheck() && !E->getType()->isFunctionType()) {
+    assert(0 && "NYI");
+  }
+
+  return RValue::get(Value);
+}
+
+Address CIRGenFunction::buildLoadOfReference(LValue refLVal, mlir::Location loc,
+                                             LValueBaseInfo *pointeeBaseInfo,
+                                             TBAAAccessInfo *pointeeTBAAInfo) {
+  assert(!refLVal.isVolatile() && "NYI");
+  mlir::cir::LoadOp load = builder.create<mlir::cir::LoadOp>(
+      loc, refLVal.getAddress().getElementType(),
+      refLVal.getAddress().getPointer());
+
+  // TODO(cir): DecorateInstructionWithTBAA relevant for us?
+  assert(!MissingFeatures::tbaa());
+
+  QualType pointeeType = refLVal.getType()->getPointeeType();
+  CharUnits align =
+      CGM.getNaturalTypeAlignment(pointeeType, pointeeBaseInfo, pointeeTBAAInfo,
+                                  /* forPointeeType= */ true);
+  return Address(load, getTypes().convertTypeForMem(pointeeType), align);
+}
+
+LValue CIRGenFunction::buildLoadOfReferenceLValue(LValue RefLVal,
+                                                  mlir::Location Loc) {
+  LValueBaseInfo PointeeBaseInfo;
+  Address PointeeAddr = buildLoadOfReference(RefLVal, Loc, &PointeeBaseInfo);
+  return makeAddrLValue(PointeeAddr, RefLVal.getType()->getPointeeType(),
+                        PointeeBaseInfo);
+}
+
+void CIRGenFunction::buildUnreachable(SourceLocation Loc) {
+  if (SanOpts.has(SanitizerKind::Unreachable))
+    llvm_unreachable("NYI");
+  builder.create<mlir::cir::UnreachableOp>(getLoc(Loc));
+}
+
+//===----------------------------------------------------------------------===//
+// CIR builder helpers
+//===----------------------------------------------------------------------===//
+
+Address CIRGenFunction::CreateMemTemp(QualType Ty, mlir::Location Loc,
+                                      const Twine &Name, Address *Alloca,
+                                      mlir::OpBuilder::InsertPoint ip) {
+  // FIXME: Should we prefer the preferred type alignment here?
+  return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Loc, Name,
+                       Alloca, ip);
+}
+
+Address CIRGenFunction::CreateMemTemp(QualType Ty, CharUnits Align,
+                                      mlir::Location Loc, const Twine &Name,
+                                      Address *Alloca,
+                                      mlir::OpBuilder::InsertPoint ip) {
+  Address Result =
+      CreateTempAlloca(getTypes().convertTypeForMem(Ty), Align, Loc, Name,
+                       /*ArraySize=*/nullptr, Alloca, ip);
+  if (Ty->isConstantMatrixType()) {
+    assert(0 && "NYI");
+  }
+  return Result;
+}
+
+/// This creates a alloca and inserts it into the entry block of the
+/// current region.
+Address CIRGenFunction::CreateTempAllocaWithoutCast(
+    mlir::Type Ty, CharUnits Align, mlir::Location Loc, const Twine &Name,
+    mlir::Value ArraySize, mlir::OpBuilder::InsertPoint ip) {
+  auto Alloca = ip.isSet() ? CreateTempAlloca(Ty, Loc, Name, ip, ArraySize)
+                           : CreateTempAlloca(Ty, Loc, Name, ArraySize);
+  Alloca.setAlignmentAttr(CGM.getSize(Align));
+  return Address(Alloca, Ty, Align);
+}
+
+/// This creates a alloca and inserts it into the entry block. The alloca is
+/// casted to default address space if necessary.
+Address CIRGenFunction::CreateTempAlloca(mlir::Type Ty, CharUnits Align,
+                                         mlir::Location Loc, const Twine &Name,
+                                         mlir::Value ArraySize,
+                                         Address *AllocaAddr,
+                                         mlir::OpBuilder::InsertPoint ip) {
+  auto Alloca =
+      CreateTempAllocaWithoutCast(Ty, Align, Loc, Name, ArraySize, ip);
+  if (AllocaAddr)
+    *AllocaAddr = Alloca;
+  mlir::Value V = Alloca.getPointer();
+  // Alloca always returns a pointer in alloca address space, which may
+  // be different from the type defined by the language. For example,
+  // in C++ the auto variables are in the default address space. Therefore
+  // cast alloca to the default address space when necessary.
+  if (auto ASTAS =
+          builder.getAddrSpaceAttr(CGM.getLangTempAllocaAddressSpace());
+      getCIRAllocaAddressSpace() != ASTAS) {
+    llvm_unreachable("Requires address space cast which is NYI");
+  }
+  return Address(V, Ty, Align);
+}
+
+/// This creates an alloca and inserts it into the entry block if \p ArraySize
+/// is nullptr, otherwise inserts it at the current insertion point of the
+/// builder.
+mlir::cir::AllocaOp
+CIRGenFunction::CreateTempAlloca(mlir::Type Ty, mlir::Location Loc,
+                                 const Twine &Name, mlir::Value ArraySize,
+                                 bool insertIntoFnEntryBlock) {
+  return cast<mlir::cir::AllocaOp>(buildAlloca(Name.str(), Ty, Loc, CharUnits(),
+                                               insertIntoFnEntryBlock,
+                                               ArraySize)
+                                       .getDefiningOp());
+}
+
+/// This creates an alloca and inserts it into the provided insertion point
+mlir::cir::AllocaOp CIRGenFunction::CreateTempAlloca(
+    mlir::Type Ty, mlir::Location Loc, const Twine &Name,
+    mlir::OpBuilder::InsertPoint ip, mlir::Value ArraySize) {
+  assert(ip.isSet() && "Insertion point is not set");
+  return cast<mlir::cir::AllocaOp>(
+      buildAlloca(Name.str(), Ty, Loc, CharUnits(), ip, ArraySize)
+          .getDefiningOp());
+}
+
+/// Just like CreateTempAlloca above, but place the alloca into the function
+/// entry basic block instead.
+mlir::cir::AllocaOp CIRGenFunction::CreateTempAllocaInFnEntryBlock(
+    mlir::Type Ty, mlir::Location Loc, const Twine &Name,
+    mlir::Value ArraySize) {
+  return CreateTempAlloca(Ty, Loc, Name, ArraySize,
+                          /*insertIntoFnEntryBlock=*/true);
+}
+
+/// Given an object of the given canonical type, can we safely copy a
+/// value out of it based on its initializer?
+static bool isConstantEmittableObjectType(QualType type) {
+  assert(type.isCanonical());
+  assert(!type->isReferenceType());
+
+  // Must be const-qualified but non-volatile.
+  Qualifiers qs = type.getLocalQualifiers();
+  if (!qs.hasConst() || qs.hasVolatile())
+    return false;
+
+  // Otherwise, all object types satisfy this except C++ classes with
+  // mutable subobjects or non-trivial copy/destroy behavior.
+  if (const auto *RT = dyn_cast<RecordType>(type))
+    if (const auto *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+      if (RD->hasMutableFields() || !RD->isTrivial())
+        return false;
+
+  return true;
+}
+
+/// Can we constant-emit a load of a reference to a variable of the
+/// given type?  This is different from predicates like
+/// Decl::mightBeUsableInConstantExpressions because we do want it to apply
+/// in situations that don't necessarily satisfy the language's rules
+/// for this (e.g. C++'s ODR-use rules).  For example, we want to able
+/// to do this with const float variables even if those variables
+/// aren't marked 'constexpr'.
+enum ConstantEmissionKind {
+  CEK_None,
+  CEK_AsReferenceOnly,
+  CEK_AsValueOrReference,
+  CEK_AsValueOnly
+};
+static ConstantEmissionKind checkVarTypeForConstantEmission(QualType type) {
+  type = type.getCanonicalType();
+  if (const auto *ref = dyn_cast<ReferenceType>(type)) {
+    if (isConstantEmittableObjectType(ref->getPointeeType()))
+      return CEK_AsValueOrReference;
+    return CEK_AsReferenceOnly;
+  }
+  if (isConstantEmittableObjectType(type))
+    return CEK_AsValueOnly;
+  return CEK_None;
+}
+
+/// Try to emit a reference to the given value without producing it as
+/// an l-value.  This is just an optimization, but it avoids us needing
+/// to emit global copies of variables if they're named without triggering
+/// a formal use in a context where we can't emit a direct reference to them,
+/// for instance if a block or lambda or a member of a local class uses a
+/// const int variable or constexpr variable from an enclosing function.
+CIRGenFunction::ConstantEmission
+CIRGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) {
+  ValueDecl *value = refExpr->getDecl();
+
+  // The value needs to be an enum constant or a constant variable.
+  ConstantEmissionKind CEK;
+  if (isa<ParmVarDecl>(value)) {
+    CEK = CEK_None;
+  } else if (auto *var = dyn_cast<VarDecl>(value)) {
+    CEK = checkVarTypeForConstantEmission(var->getType());
+  } else if (isa<EnumConstantDecl>(value)) {
+    CEK = CEK_AsValueOnly;
+  } else {
+    CEK = CEK_None;
+  }
+  if (CEK == CEK_None)
+    return ConstantEmission();
+
+  Expr::EvalResult result;
+  bool resultIsReference;
+  QualType resultType;
+
+  // It's best to evaluate all the way as an r-value if that's permitted.
+  if (CEK != CEK_AsReferenceOnly &&
+      refExpr->EvaluateAsRValue(result, getContext())) {
+    resultIsReference = false;
+    resultType = refExpr->getType();
+
+    // Otherwise, try to evaluate as an l-value.
+  } else if (CEK != CEK_AsValueOnly &&
+             refExpr->EvaluateAsLValue(result, getContext())) {
+    resultIsReference = true;
+    resultType = value->getType();
+
+    // Failure.
+  } else {
+    return ConstantEmission();
+  }
+
+  // In any case, if the initializer has side-effects, abandon ship.
+  if (result.HasSideEffects)
+    return ConstantEmission();
+
+  // In CUDA/HIP device compilation, a lambda may capture a reference variable
+  // referencing a global host variable by copy. In this case the lambda should
+  // make a copy of the value of the global host variable. The DRE of the
+  // captured reference variable cannot be emitted as load from the host
+  // global variable as compile time constant, since the host variable is not
+  // accessible on device. The DRE of the captured reference variable has to be
+  // loaded from captures.
+  if (CGM.getLangOpts().CUDAIsDevice && result.Val.isLValue() &&
+      refExpr->refersToEnclosingVariableOrCapture()) {
+    auto *MD = dyn_cast_or_null<CXXMethodDecl>(CurCodeDecl);
+    if (MD && MD->getParent()->isLambda() &&
+        MD->getOverloadedOperator() == OO_Call) {
+      const APValue::LValueBase &base = result.Val.getLValueBase();
+      if (const ValueDecl *D = base.dyn_cast<const ValueDecl *>()) {
+        if (const VarDecl *VD = dyn_cast<const VarDecl>(D)) {
+          if (!VD->hasAttr<CUDADeviceAttr>()) {
+            return ConstantEmission();
+          }
+        }
+      }
+    }
+  }
+
+  // Emit as a constant.
+  // FIXME(cir): have emitAbstract build a TypedAttr instead (this requires
+  // somewhat heavy refactoring...)
+  auto C = ConstantEmitter(*this).emitAbstract(refExpr->getLocation(),
+                                               result.Val, resultType);
+  mlir::TypedAttr cstToEmit = mlir::dyn_cast_if_present<mlir::TypedAttr>(C);
+  assert(cstToEmit && "expect a typed attribute");
+
+  // Make sure we emit a debug reference to the global variable.
+  // This should probably fire even for
+  if (isa<VarDecl>(value)) {
+    if (!getContext().DeclMustBeEmitted(cast<VarDecl>(value)))
+      buildDeclRefExprDbgValue(refExpr, result.Val);
+  } else {
+    assert(isa<EnumConstantDecl>(value));
+    buildDeclRefExprDbgValue(refExpr, result.Val);
+  }
+
+  // If we emitted a reference constant, we need to dereference that.
+  if (resultIsReference)
+    return ConstantEmission::forReference(cstToEmit);
+
+  return ConstantEmission::forValue(cstToEmit);
+}
+
+CIRGenFunction::ConstantEmission
+CIRGenFunction::tryEmitAsConstant(const MemberExpr *ME) {
+  llvm_unreachable("NYI");
+}
+
+mlir::Value CIRGenFunction::buildScalarConstant(
+    const CIRGenFunction::ConstantEmission &Constant, Expr *E) {
+  assert(Constant && "not a constant");
+  if (Constant.isReference())
+    return buildLoadOfLValue(Constant.getReferenceLValue(*this, E),
+                             E->getExprLoc())
+        .getScalarVal();
+  return builder.getConstant(getLoc(E->getSourceRange()), Constant.getValue());
+}
+
+LValue CIRGenFunction::buildPredefinedLValue(const PredefinedExpr *E) {
+  const auto *SL = E->getFunctionName();
+  assert(SL != nullptr && "No StringLiteral name in PredefinedExpr");
+  auto Fn = dyn_cast<mlir::cir::FuncOp>(CurFn);
+  assert(Fn && "other callables NYI");
+  StringRef FnName = Fn.getName();
+  if (FnName.starts_with("\01"))
+    FnName = FnName.substr(1);
+  StringRef NameItems[] = {PredefinedExpr::getIdentKindName(E->getIdentKind()),
+                           FnName};
+  std::string GVName = llvm::join(NameItems, NameItems + 2, ".");
+  if (auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl)) {
+    llvm_unreachable("NYI");
+  }
+
+  return buildStringLiteralLValue(SL);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAgg.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAgg.cpp
new file mode 100644
index 000000000000..4c321f18779f
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenExprAgg.cpp
@@ -0,0 +1,1731 @@
+//===--- CIRGenExprAgg.cpp - Emit CIR Code from Aggregate Expressions -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit Aggregate Expr nodes as CIR code.
+//
+//===----------------------------------------------------------------------===//
+#include "CIRGenCall.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "CIRGenTypes.h"
+#include "CIRGenValue.h"
+#include "mlir/IR/Attributes.h"
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/OperationKinds.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace cir;
+using namespace clang;
+
+namespace {
+
+// FIXME(cir): This should be a common helper between CIRGen
+// and traditional CodeGen
+/// Is the value of the given expression possibly a reference to or
+/// into a __block variable?
+static bool isBlockVarRef(const Expr *E) {
+  // Make sure we look through parens.
+  E = E->IgnoreParens();
+
+  // Check for a direct reference to a __block variable.
+  if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
+    const VarDecl *var = dyn_cast<VarDecl>(DRE->getDecl());
+    return (var && var->hasAttr<BlocksAttr>());
+  }
+
+  // More complicated stuff.
+
+  // Binary operators.
+  if (const BinaryOperator *op = dyn_cast<BinaryOperator>(E)) {
+    // For an assignment or pointer-to-member operation, just care
+    // about the LHS.
+    if (op->isAssignmentOp() || op->isPtrMemOp())
+      return isBlockVarRef(op->getLHS());
+
+    // For a comma, just care about the RHS.
+    if (op->getOpcode() == BO_Comma)
+      return isBlockVarRef(op->getRHS());
+
+    // FIXME: pointer arithmetic?
+    return false;
+
+    // Check both sides of a conditional operator.
+  } else if (const AbstractConditionalOperator *op =
+                 dyn_cast<AbstractConditionalOperator>(E)) {
+    return isBlockVarRef(op->getTrueExpr()) ||
+           isBlockVarRef(op->getFalseExpr());
+
+    // OVEs are required to support BinaryConditionalOperators.
+  } else if (const OpaqueValueExpr *op = dyn_cast<OpaqueValueExpr>(E)) {
+    if (const Expr *src = op->getSourceExpr())
+      return isBlockVarRef(src);
+
+    // Casts are necessary to get things like (*(int*)&var) = foo().
+    // We don't really care about the kind of cast here, except
+    // we don't want to look through l2r casts, because it's okay
+    // to get the *value* in a __block variable.
+  } else if (const CastExpr *cast = dyn_cast<CastExpr>(E)) {
+    if (cast->getCastKind() == CK_LValueToRValue)
+      return false;
+    return isBlockVarRef(cast->getSubExpr());
+
+    // Handle unary operators.  Again, just aggressively look through
+    // it, ignoring the operation.
+  } else if (const UnaryOperator *uop = dyn_cast<UnaryOperator>(E)) {
+    return isBlockVarRef(uop->getSubExpr());
+
+    // Look into the base of a field access.
+  } else if (const MemberExpr *mem = dyn_cast<MemberExpr>(E)) {
+    return isBlockVarRef(mem->getBase());
+
+    // Look into the base of a subscript.
+  } else if (const ArraySubscriptExpr *sub = dyn_cast<ArraySubscriptExpr>(E)) {
+    return isBlockVarRef(sub->getBase());
+  }
+
+  return false;
+}
+
+class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
+  CIRGenFunction &CGF;
+  AggValueSlot Dest;
+  bool IsResultUnused;
+
+  // Calls `Fn` with a valid return value slot, potentially creating a temporary
+  // to do so. If a temporary is created, an appropriate copy into `Dest` will
+  // be emitted, as will lifetime markers.
+  //
+  // The given function should take a ReturnValueSlot, and return an RValue that
+  // points to said slot.
+  void withReturnValueSlot(const Expr *E,
+                           llvm::function_ref<RValue(ReturnValueSlot)> Fn);
+
+  AggValueSlot EnsureSlot(mlir::Location loc, QualType T) {
+    if (!Dest.isIgnored())
+      return Dest;
+    return CGF.CreateAggTemp(T, loc, "agg.tmp.ensured");
+  }
+
+  void EnsureDest(mlir::Location loc, QualType T) {
+    if (!Dest.isIgnored())
+      return;
+    Dest = CGF.CreateAggTemp(T, loc, "agg.tmp.ensured");
+  }
+
+public:
+  AggExprEmitter(CIRGenFunction &cgf, AggValueSlot Dest, bool IsResultUnused)
+      : CGF{cgf}, Dest(Dest), IsResultUnused(IsResultUnused) {}
+
+  //===--------------------------------------------------------------------===//
+  //                               Utilities
+  //===--------------------------------------------------------------------===//
+
+  /// Given an expression with aggregate type that represents a value lvalue,
+  /// this method emits the address of the lvalue, then loads the result into
+  /// DestPtr.
+  void buildAggLoadOfLValue(const Expr *E);
+
+  enum ExprValueKind { EVK_RValue, EVK_NonRValue };
+
+  /// Perform the final copy to DestPtr, if desired.
+  void buildFinalDestCopy(QualType type, RValue src);
+
+  /// Perform the final copy to DestPtr, if desired. SrcIsRValue is true if
+  /// source comes from an RValue.
+  void buildFinalDestCopy(QualType type, const LValue &src,
+                          ExprValueKind SrcValueKind = EVK_NonRValue);
+  void buildCopy(QualType type, const AggValueSlot &dest,
+                 const AggValueSlot &src);
+
+  void buildArrayInit(Address DestPtr, mlir::cir::ArrayType AType,
+                      QualType ArrayQTy, Expr *ExprToVisit,
+                      ArrayRef<Expr *> Args, Expr *ArrayFiller);
+
+  AggValueSlot::NeedsGCBarriers_t needsGC(QualType T) {
+    if (CGF.getLangOpts().getGC() && TypeRequiresGCollection(T))
+      llvm_unreachable("garbage collection is NYI");
+    return AggValueSlot::DoesNotNeedGCBarriers;
+  }
+
+  bool TypeRequiresGCollection(QualType T);
+
+  //===--------------------------------------------------------------------===//
+  //                             Visitor Methods
+  //===--------------------------------------------------------------------===//
+
+  void Visit(Expr *E) {
+    if (CGF.getDebugInfo()) {
+      llvm_unreachable("NYI");
+    }
+    StmtVisitor<AggExprEmitter>::Visit(E);
+  }
+
+  void VisitStmt(Stmt *S) {
+    llvm::errs() << "Missing visitor for AggExprEmitter Stmt: "
+                 << S->getStmtClassName() << "\n";
+    llvm_unreachable("NYI");
+  }
+  void VisitParenExpr(ParenExpr *PE) { Visit(PE->getSubExpr()); }
+  void VisitGenericSelectionExpr(GenericSelectionExpr *GE) {
+    llvm_unreachable("NYI");
+  }
+  void VisitCoawaitExpr(CoawaitExpr *E) {
+    CGF.buildCoawaitExpr(*E, Dest, IsResultUnused);
+  }
+  void VisitCoyieldExpr(CoyieldExpr *E) { llvm_unreachable("NYI"); }
+  void VisitUnaryCoawait(UnaryOperator *E) { llvm_unreachable("NYI"); }
+  void VisitUnaryExtension(UnaryOperator *E) { llvm_unreachable("NYI"); }
+  void VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  void VisitConstantExpr(ConstantExpr *E) { llvm_unreachable("NYI"); }
+
+  // l-values
+  void VisitDeclRefExpr(DeclRefExpr *E) { buildAggLoadOfLValue(E); }
+  void VisitMemberExpr(MemberExpr *E) { buildAggLoadOfLValue(E); }
+  void VisitUnaryDeref(UnaryOperator *E) { buildAggLoadOfLValue(E); }
+  void VisitStringLiteral(StringLiteral *E) { llvm_unreachable("NYI"); }
+  void VisitCompoundLiteralExpr(CompoundLiteralExpr *E);
+  void VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
+    buildAggLoadOfLValue(E);
+  }
+  void VisitPredefinedExpr(const PredefinedExpr *E) { llvm_unreachable("NYI"); }
+
+  // Operators.
+  void VisitCastExpr(CastExpr *E);
+  void VisitCallExpr(const CallExpr *E);
+
+  void VisitStmtExpr(const StmtExpr *E) {
+    assert(!MissingFeatures::stmtExprEvaluation() && "NYI");
+    CGF.buildCompoundStmt(*E->getSubStmt(), /*getLast=*/true, Dest);
+  }
+
+  void VisitBinaryOperator(const BinaryOperator *E) { llvm_unreachable("NYI"); }
+  void VisitPointerToDataMemberBinaryOperator(const BinaryOperator *E) {
+    llvm_unreachable("NYI");
+  }
+  void VisitBinAssign(const BinaryOperator *E) {
+
+    // For an assignment to work, the value on the right has
+    // to be compatible with the value on the left.
+    assert(CGF.getContext().hasSameUnqualifiedType(E->getLHS()->getType(),
+                                                   E->getRHS()->getType()) &&
+           "Invalid assignment");
+
+    if (isBlockVarRef(E->getLHS()) &&
+        E->getRHS()->HasSideEffects(CGF.getContext())) {
+      llvm_unreachable("NYI");
+    }
+
+    LValue lhs = CGF.buildLValue(E->getLHS());
+
+    // If we have an atomic type, evaluate into the destination and then
+    // do an atomic copy.
+    if (lhs.getType()->isAtomicType() ||
+        CGF.LValueIsSuitableForInlineAtomic(lhs)) {
+      assert(!MissingFeatures::atomicTypes());
+      return;
+    }
+
+    // Codegen the RHS so that it stores directly into the LHS.
+    AggValueSlot lhsSlot = AggValueSlot::forLValue(
+        lhs, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers,
+        AggValueSlot::IsAliased, AggValueSlot::MayOverlap);
+
+    // A non-volatile aggregate destination might have volatile member.
+    if (!lhsSlot.isVolatile() && CGF.hasVolatileMember(E->getLHS()->getType()))
+      assert(!MissingFeatures::atomicTypes());
+
+    CGF.buildAggExpr(E->getRHS(), lhsSlot);
+
+    // Copy into the destination if the assignment isn't ignored.
+    buildFinalDestCopy(E->getType(), lhs);
+
+    if (!Dest.isIgnored() && !Dest.isExternallyDestructed() &&
+        E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct)
+      CGF.pushDestroy(QualType::DK_nontrivial_c_struct, Dest.getAddress(),
+                      E->getType());
+  }
+
+  void VisitBinComma(const BinaryOperator *E);
+  void VisitBinCmp(const BinaryOperator *E);
+  void VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) {
+    llvm_unreachable("NYI");
+  }
+
+  void VisitObjCMessageExpr(ObjCMessageExpr *E) { llvm_unreachable("NYI"); }
+  void VisitObjCIVarRefExpr(ObjCIvarRefExpr *E) { llvm_unreachable("NYI"); }
+
+  void VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E);
+  void VisitChooseExpr(const ChooseExpr *E) { llvm_unreachable("NYI"); }
+  void VisitInitListExpr(InitListExpr *E);
+  void VisitCXXParenListInitExpr(CXXParenListInitExpr *E);
+  void VisitCXXParenListOrInitListExpr(Expr *ExprToVisit, ArrayRef<Expr *> Args,
+                                       FieldDecl *InitializedFieldInUnion,
+                                       Expr *ArrayFiller);
+  void VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,
+                              llvm::Value *outerBegin = nullptr) {
+    llvm_unreachable("NYI");
+  }
+  void VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  void VisitNoInitExpr(NoInitExpr *E) { llvm_unreachable("NYI"); }
+  void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) {
+    CIRGenFunction::CXXDefaultArgExprScope Scope(CGF, DAE);
+    Visit(DAE->getExpr());
+  }
+  void VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) {
+    CIRGenFunction::CXXDefaultInitExprScope Scope(CGF, DIE);
+    Visit(DIE->getExpr());
+  }
+  void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E);
+  void VisitCXXConstructExpr(const CXXConstructExpr *E);
+  void VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  void VisitLambdaExpr(LambdaExpr *E);
+  void VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
+    ASTContext &Ctx = CGF.getContext();
+    CIRGenFunction::SourceLocRAIIObject locRAIIObject{
+        CGF, CGF.getLoc(E->getSourceRange())};
+    // Emit an array containing the elements.  The array is externally
+    // destructed if the std::initializer_list object is.
+    LValue Array = CGF.buildLValue(E->getSubExpr());
+    assert(Array.isSimple() && "initializer_list array not a simple lvalue");
+    Address ArrayPtr = Array.getAddress();
+
+    const ConstantArrayType *ArrayType =
+        Ctx.getAsConstantArrayType(E->getSubExpr()->getType());
+    assert(ArrayType && "std::initializer_list constructed from non-array");
+
+    RecordDecl *Record = E->getType()->castAs<RecordType>()->getDecl();
+    RecordDecl::field_iterator Field = Record->field_begin();
+    assert(Field != Record->field_end() &&
+           Ctx.hasSameType(Field->getType()->getPointeeType(),
+                           ArrayType->getElementType()) &&
+           "Expected std::initializer_list first field to be const E *");
+    // Start pointer.
+    auto loc = CGF.getLoc(E->getSourceRange());
+    AggValueSlot Dest = EnsureSlot(loc, E->getType());
+    LValue DestLV = CGF.makeAddrLValue(Dest.getAddress(), E->getType());
+    LValue Start =
+        CGF.buildLValueForFieldInitialization(DestLV, *Field, Field->getName());
+    mlir::Value ArrayStart = ArrayPtr.emitRawPointer();
+    CGF.buildStoreThroughLValue(RValue::get(ArrayStart), Start);
+    ++Field;
+    assert(Field != Record->field_end() &&
+           "Expected std::initializer_list to have two fields");
+
+    auto Builder = CGF.getBuilder();
+
+    auto sizeOp = Builder.getConstInt(loc, ArrayType->getSize());
+
+    mlir::Value Size = sizeOp.getRes();
+    Builder.getUIntNTy(ArrayType->getSizeBitWidth());
+    LValue EndOrLength =
+        CGF.buildLValueForFieldInitialization(DestLV, *Field, Field->getName());
+    if (Ctx.hasSameType(Field->getType(), Ctx.getSizeType())) {
+      // Length.
+      CGF.buildStoreThroughLValue(RValue::get(Size), EndOrLength);
+    } else {
+      // End pointer.
+      assert(Field->getType()->isPointerType() &&
+             Ctx.hasSameType(Field->getType()->getPointeeType(),
+                             ArrayType->getElementType()) &&
+             "Expected std::initializer_list second field to be const E *");
+
+      auto ArrayEnd =
+          Builder.getArrayElement(loc, loc, ArrayPtr.getPointer(),
+                                  ArrayPtr.getElementType(), Size, false);
+      CGF.buildStoreThroughLValue(RValue::get(ArrayEnd), EndOrLength);
+    }
+    assert(++Field == Record->field_end() &&
+           "Expected std::initializer_list to only have two fields");
+  }
+
+  void VisitExprWithCleanups(ExprWithCleanups *E);
+  void VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  void VisitCXXTypeidExpr(CXXTypeidExpr *E) { llvm_unreachable("NYI"); }
+  void VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E);
+  void VisitOpaqueValueExpr(OpaqueValueExpr *E) { llvm_unreachable("NYI"); }
+
+  void VisitPseudoObjectExpr(PseudoObjectExpr *E) { llvm_unreachable("NYI"); }
+
+  void VisitVAArgExpr(VAArgExpr *E) { llvm_unreachable("NYI"); }
+
+  void buildInitializationToLValue(Expr *E, LValue LV);
+
+  void buildNullInitializationToLValue(mlir::Location loc, LValue Address);
+  void VisitCXXThrowExpr(const CXXThrowExpr *E) { llvm_unreachable("NYI"); }
+  void VisitAtomicExpr(AtomicExpr *E) { llvm_unreachable("NYI"); }
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+//                                Utilities
+//===----------------------------------------------------------------------===//
+
+/// Given an expression with aggregate type that represents a value lvalue, this
+/// method emits the address of the lvalue, then loads the result into DestPtr.
+void AggExprEmitter::buildAggLoadOfLValue(const Expr *E) {
+  LValue LV = CGF.buildLValue(E);
+
+  // If the type of the l-value is atomic, then do an atomic load.
+  if (LV.getType()->isAtomicType() || CGF.LValueIsSuitableForInlineAtomic(LV) ||
+      MissingFeatures::atomicTypes())
+    llvm_unreachable("atomic load is NYI");
+
+  buildFinalDestCopy(E->getType(), LV);
+}
+
+/// Perform the final copy to DestPtr, if desired.
+void AggExprEmitter::buildFinalDestCopy(QualType type, RValue src) {
+  assert(src.isAggregate() && "value must be aggregate value!");
+  LValue srcLV = CGF.makeAddrLValue(src.getAggregateAddress(), type);
+  buildFinalDestCopy(type, srcLV, EVK_RValue);
+}
+
+/// Perform the final copy to DestPtr, if desired.
+void AggExprEmitter::buildFinalDestCopy(QualType type, const LValue &src,
+                                        ExprValueKind SrcValueKind) {
+  // If Dest is ignored, then we're evaluating an aggregate expression
+  // in a context that doesn't care about the result.  Note that loads
+  // from volatile l-values force the existence of a non-ignored
+  // destination.
+  if (Dest.isIgnored())
+    return;
+
+  // Copy non-trivial C structs here.
+  if (Dest.isVolatile())
+    assert(!MissingFeatures::volatileTypes());
+
+  if (SrcValueKind == EVK_RValue) {
+    if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct) {
+      llvm_unreachable("move assignment/move ctor for rvalue is NYI");
+    }
+  } else {
+    if (type.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct)
+      llvm_unreachable("non-trivial primitive copy is NYI");
+  }
+
+  AggValueSlot srcAgg = AggValueSlot::forLValue(
+      src, AggValueSlot::IsDestructed, needsGC(type), AggValueSlot::IsAliased,
+      AggValueSlot::MayOverlap);
+  buildCopy(type, Dest, srcAgg);
+}
+
+/// Perform a copy from the source into the destination.
+///
+/// \param type - the type of the aggregate being copied; qualifiers are
+///   ignored
+void AggExprEmitter::buildCopy(QualType type, const AggValueSlot &dest,
+                               const AggValueSlot &src) {
+  if (dest.requiresGCollection())
+    llvm_unreachable("garbage collection is NYI");
+
+  // If the result of the assignment is used, copy the LHS there also.
+  // It's volatile if either side is.  Use the minimum alignment of
+  // the two sides.
+  LValue DestLV = CGF.makeAddrLValue(dest.getAddress(), type);
+  LValue SrcLV = CGF.makeAddrLValue(src.getAddress(), type);
+  CGF.buildAggregateCopy(DestLV, SrcLV, type, dest.mayOverlap(),
+                         dest.isVolatile() || src.isVolatile());
+}
+
+// FIXME(cir): This function could be shared with traditional LLVM codegen
+/// Determine if E is a trivial array filler, that is, one that is
+/// equivalent to zero-initialization.
+static bool isTrivialFiller(Expr *E) {
+  if (!E)
+    return true;
+
+  if (isa<ImplicitValueInitExpr>(E))
+    return true;
+
+  if (auto *ILE = dyn_cast<InitListExpr>(E)) {
+    if (ILE->getNumInits())
+      return false;
+    return isTrivialFiller(ILE->getArrayFiller());
+  }
+
+  if (auto *Cons = dyn_cast_or_null<CXXConstructExpr>(E))
+    return Cons->getConstructor()->isDefaultConstructor() &&
+           Cons->getConstructor()->isTrivial();
+
+  // FIXME: Are there other cases where we can avoid emitting an initializer?
+  return false;
+}
+
+void AggExprEmitter::buildArrayInit(Address DestPtr, mlir::cir::ArrayType AType,
+                                    QualType ArrayQTy, Expr *ExprToVisit,
+                                    ArrayRef<Expr *> Args, Expr *ArrayFiller) {
+  uint64_t NumInitElements = Args.size();
+
+  uint64_t NumArrayElements = AType.getSize();
+  assert(NumInitElements <= NumArrayElements);
+
+  QualType elementType =
+      CGF.getContext().getAsArrayType(ArrayQTy)->getElementType();
+  QualType elementPtrType = CGF.getContext().getPointerType(elementType);
+
+  auto cirElementType = CGF.convertType(elementType);
+  auto cirAddrSpace = mlir::cast_if_present<mlir::cir::AddressSpaceAttr>(
+      DestPtr.getType().getAddrSpace());
+  auto cirElementPtrType =
+      CGF.getBuilder().getPointerTo(cirElementType, cirAddrSpace);
+  auto loc = CGF.getLoc(ExprToVisit->getSourceRange());
+
+  // Cast from cir.ptr<cir.array<elementType> to cir.ptr<elementType>
+  auto begin = CGF.getBuilder().create<mlir::cir::CastOp>(
+      loc, cirElementPtrType, mlir::cir::CastKind::array_to_ptrdecay,
+      DestPtr.getPointer());
+
+  CharUnits elementSize = CGF.getContext().getTypeSizeInChars(elementType);
+  CharUnits elementAlign =
+      DestPtr.getAlignment().alignmentOfArrayElement(elementSize);
+
+  // Exception safety requires us to destroy all the
+  // already-constructed members if an initializer throws.
+  // For that, we'll need an EH cleanup.
+  QualType::DestructionKind dtorKind = elementType.isDestructedType();
+  [[maybe_unused]] Address endOfInit = Address::invalid();
+  CIRGenFunction::CleanupDeactivationScope deactivation(CGF);
+
+  if (dtorKind) {
+    llvm_unreachable("dtorKind NYI");
+  }
+
+  // The 'current element to initialize'.  The invariants on this
+  // variable are complicated.  Essentially, after each iteration of
+  // the loop, it points to the last initialized element, except
+  // that it points to the beginning of the array before any
+  // elements have been initialized.
+  mlir::Value element = begin;
+
+  // Don't build the 'one' before the cycle to avoid
+  // emmiting the redundant `cir.const 1` instrs.
+  mlir::Value one;
+
+  // Emit the explicit initializers.
+  for (uint64_t i = 0; i != NumInitElements; ++i) {
+    if (i == 1)
+      one = CGF.getBuilder().getConstInt(
+          loc, mlir::cast<mlir::cir::IntType>(CGF.PtrDiffTy), 1);
+
+    // Advance to the next element.
+    if (i > 0) {
+      element = CGF.getBuilder().create<mlir::cir::PtrStrideOp>(
+          loc, cirElementPtrType, element, one);
+
+      // Tell the cleanup that it needs to destroy up to this
+      // element.  TODO: some of these stores can be trivially
+      // observed to be unnecessary.
+      assert(!endOfInit.isValid() && "destructed types NIY");
+    }
+
+    LValue elementLV = CGF.makeAddrLValue(
+        Address(element, cirElementType, elementAlign), elementType);
+    buildInitializationToLValue(Args[i], elementLV);
+  }
+
+  // Check whether there's a non-trivial array-fill expression.
+  bool hasTrivialFiller = isTrivialFiller(ArrayFiller);
+
+  // Any remaining elements need to be zero-initialized, possibly
+  // using the filler expression.  We can skip this if the we're
+  // emitting to zeroed memory.
+  if (NumInitElements != NumArrayElements &&
+      !(Dest.isZeroed() && hasTrivialFiller &&
+        CGF.getTypes().isZeroInitializable(elementType))) {
+
+    // Use an actual loop.  This is basically
+    //   do { *array++ = filler; } while (array != end);
+
+    auto &builder = CGF.getBuilder();
+
+    // Advance to the start of the rest of the array.
+    if (NumInitElements) {
+      auto one = builder.getConstInt(
+          loc, mlir::cast<mlir::cir::IntType>(CGF.PtrDiffTy), 1);
+      element = builder.create<mlir::cir::PtrStrideOp>(loc, cirElementPtrType,
+                                                       element, one);
+
+      assert(!endOfInit.isValid() && "destructed types NIY");
+    }
+
+    // Allocate the temporary variable
+    // to store the pointer to first unitialized element
+    auto tmpAddr = CGF.CreateTempAlloca(
+        cirElementPtrType, CGF.getPointerAlign(), loc, "arrayinit.temp");
+    LValue tmpLV = CGF.makeAddrLValue(tmpAddr, elementPtrType);
+    CGF.buildStoreThroughLValue(RValue::get(element), tmpLV);
+
+    // Compute the end of array
+    auto numArrayElementsConst = builder.getConstInt(
+        loc, mlir::cast<mlir::cir::IntType>(CGF.PtrDiffTy), NumArrayElements);
+    mlir::Value end = builder.create<mlir::cir::PtrStrideOp>(
+        loc, cirElementPtrType, begin, numArrayElementsConst);
+
+    builder.createDoWhile(
+        loc,
+        /*condBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          auto currentElement = builder.createLoad(loc, tmpAddr);
+          mlir::Type boolTy = CGF.getCIRType(CGF.getContext().BoolTy);
+          auto cmp = builder.create<mlir::cir::CmpOp>(
+              loc, boolTy, mlir::cir::CmpOpKind::ne, currentElement, end);
+          builder.createCondition(cmp);
+        },
+        /*bodyBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          auto currentElement = builder.createLoad(loc, tmpAddr);
+
+          if (MissingFeatures::cleanups())
+            llvm_unreachable("NYI");
+
+          // Emit the actual filler expression.
+          LValue elementLV = CGF.makeAddrLValue(
+              Address(currentElement, cirElementType, elementAlign),
+              elementType);
+          if (ArrayFiller)
+            buildInitializationToLValue(ArrayFiller, elementLV);
+          else
+            buildNullInitializationToLValue(loc, elementLV);
+
+          // Tell the EH cleanup that we finished with the last element.
+          assert(!endOfInit.isValid() && "destructed types NIY");
+
+          // Advance pointer and store them to temporary variable
+          auto one = builder.getConstInt(
+              loc, mlir::cast<mlir::cir::IntType>(CGF.PtrDiffTy), 1);
+          auto nextElement = builder.create<mlir::cir::PtrStrideOp>(
+              loc, cirElementPtrType, currentElement, one);
+          CGF.buildStoreThroughLValue(RValue::get(nextElement), tmpLV);
+
+          builder.createYield(loc);
+        });
+  }
+}
+
+/// True if the given aggregate type requires special GC API calls.
+bool AggExprEmitter::TypeRequiresGCollection(QualType T) {
+  // Only record types have members that might require garbage collection.
+  const RecordType *RecordTy = T->getAs<RecordType>();
+  if (!RecordTy)
+    return false;
+
+  // Don't mess with non-trivial C++ types.
+  RecordDecl *Record = RecordTy->getDecl();
+  if (isa<CXXRecordDecl>(Record) &&
+      (cast<CXXRecordDecl>(Record)->hasNonTrivialCopyConstructor() ||
+       !cast<CXXRecordDecl>(Record)->hasTrivialDestructor()))
+    return false;
+
+  // Check whether the type has an object member.
+  return Record->hasObjectMember();
+}
+
+//===----------------------------------------------------------------------===//
+//                             Visitor Methods
+//===----------------------------------------------------------------------===//
+
+/// Determine whether the given cast kind is known to always convert values
+/// with all zero bits in their value representation to values with all zero
+/// bits in their value representation.
+/// TODO(cir): this can be shared with LLVM codegen.
+static bool castPreservesZero(const CastExpr *CE) {
+  switch (CE->getCastKind()) {
+  case CK_HLSLVectorTruncation:
+  case CK_HLSLArrayRValue:
+    llvm_unreachable("NYI");
+    // No-ops.
+  case CK_NoOp:
+  case CK_UserDefinedConversion:
+  case CK_ConstructorConversion:
+  case CK_BitCast:
+  case CK_ToUnion:
+  case CK_ToVoid:
+    // Conversions between (possibly-complex) integral, (possibly-complex)
+    // floating-point, and bool.
+  case CK_BooleanToSignedIntegral:
+  case CK_FloatingCast:
+  case CK_FloatingComplexCast:
+  case CK_FloatingComplexToBoolean:
+  case CK_FloatingComplexToIntegralComplex:
+  case CK_FloatingComplexToReal:
+  case CK_FloatingRealToComplex:
+  case CK_FloatingToBoolean:
+  case CK_FloatingToIntegral:
+  case CK_IntegralCast:
+  case CK_IntegralComplexCast:
+  case CK_IntegralComplexToBoolean:
+  case CK_IntegralComplexToFloatingComplex:
+  case CK_IntegralComplexToReal:
+  case CK_IntegralRealToComplex:
+  case CK_IntegralToBoolean:
+  case CK_IntegralToFloating:
+    // Reinterpreting integers as pointers and vice versa.
+  case CK_IntegralToPointer:
+  case CK_PointerToIntegral:
+    // Language extensions.
+  case CK_VectorSplat:
+  case CK_MatrixCast:
+  case CK_NonAtomicToAtomic:
+  case CK_AtomicToNonAtomic:
+    return true;
+
+  case CK_BaseToDerivedMemberPointer:
+  case CK_DerivedToBaseMemberPointer:
+  case CK_MemberPointerToBoolean:
+  case CK_NullToMemberPointer:
+  case CK_ReinterpretMemberPointer:
+    // FIXME: ABI-dependent.
+    return false;
+
+  case CK_AnyPointerToBlockPointerCast:
+  case CK_BlockPointerToObjCPointerCast:
+  case CK_CPointerToObjCPointerCast:
+  case CK_ObjCObjectLValueCast:
+  case CK_IntToOCLSampler:
+  case CK_ZeroToOCLOpaqueType:
+    // FIXME: Check these.
+    return false;
+
+  case CK_FixedPointCast:
+  case CK_FixedPointToBoolean:
+  case CK_FixedPointToFloating:
+  case CK_FixedPointToIntegral:
+  case CK_FloatingToFixedPoint:
+  case CK_IntegralToFixedPoint:
+    // FIXME: Do all fixed-point types represent zero as all 0 bits?
+    return false;
+
+  case CK_AddressSpaceConversion:
+  case CK_BaseToDerived:
+  case CK_DerivedToBase:
+  case CK_Dynamic:
+  case CK_NullToPointer:
+  case CK_PointerToBoolean:
+    // FIXME: Preserves zeroes only if zero pointers and null pointers have the
+    // same representation in all involved address spaces.
+    return false;
+
+  case CK_ARCConsumeObject:
+  case CK_ARCExtendBlockObject:
+  case CK_ARCProduceObject:
+  case CK_ARCReclaimReturnedObject:
+  case CK_CopyAndAutoreleaseBlockObject:
+  case CK_ArrayToPointerDecay:
+  case CK_FunctionToPointerDecay:
+  case CK_BuiltinFnToFnPtr:
+  case CK_Dependent:
+  case CK_LValueBitCast:
+  case CK_LValueToRValue:
+  case CK_LValueToRValueBitCast:
+  case CK_UncheckedDerivedToBase:
+    return false;
+  }
+  llvm_unreachable("Unhandled clang::CastKind enum");
+}
+
+/// If emitting this value will obviously just cause a store of
+/// zero to memory, return true.  This can return false if uncertain, so it just
+/// handles simple cases.
+static bool isSimpleZero(const Expr *E, CIRGenFunction &CGF) {
+  E = E->IgnoreParens();
+  while (auto *CE = dyn_cast<CastExpr>(E)) {
+    if (!castPreservesZero(CE))
+      break;
+    E = CE->getSubExpr()->IgnoreParens();
+  }
+
+  // 0
+  if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(E))
+    return IL->getValue() == 0;
+  // +0.0
+  if (const FloatingLiteral *FL = dyn_cast<FloatingLiteral>(E))
+    return FL->getValue().isPosZero();
+  // int()
+  if ((isa<ImplicitValueInitExpr>(E) || isa<CXXScalarValueInitExpr>(E)) &&
+      CGF.getTypes().isZeroInitializable(E->getType()))
+    return true;
+  // (int*)0 - Null pointer expressions.
+  if (const CastExpr *ICE = dyn_cast<CastExpr>(E)) {
+    return ICE->getCastKind() == CK_NullToPointer &&
+           CGF.getTypes().isPointerZeroInitializable(E->getType()) &&
+           !E->HasSideEffects(CGF.getContext());
+  }
+  // '\0'
+  if (const CharacterLiteral *CL = dyn_cast<CharacterLiteral>(E))
+    return CL->getValue() == 0;
+
+  // Otherwise, hard case: conservatively return false.
+  return false;
+}
+
+void AggExprEmitter::buildNullInitializationToLValue(mlir::Location loc,
+                                                     LValue lv) {
+  QualType type = lv.getType();
+
+  // If the destination slot is already zeroed out before the aggregate is
+  // copied into it, we don't have to emit any zeros here.
+  if (Dest.isZeroed() && CGF.getTypes().isZeroInitializable(type))
+    return;
+
+  if (CGF.hasScalarEvaluationKind(type)) {
+    // For non-aggregates, we can store the appropriate null constant.
+    auto null = CGF.CGM.buildNullConstant(type, loc);
+    // Note that the following is not equivalent to
+    // EmitStoreThroughBitfieldLValue for ARC types.
+    if (lv.isBitField()) {
+      mlir::Value result;
+      CGF.buildStoreThroughBitfieldLValue(RValue::get(null), lv, result);
+    } else {
+      assert(lv.isSimple());
+      CGF.buildStoreOfScalar(null, lv, /* isInitialization */ true);
+    }
+  } else {
+    // There's a potential optimization opportunity in combining
+    // memsets; that would be easy for arrays, but relatively
+    // difficult for structures with the current code.
+    CGF.buildNullInitialization(loc, lv.getAddress(), lv.getType());
+  }
+}
+
+void AggExprEmitter::buildInitializationToLValue(Expr *E, LValue LV) {
+  QualType type = LV.getType();
+  // FIXME: Ignore result?
+  // FIXME: Are initializers affected by volatile?
+  if (Dest.isZeroed() && isSimpleZero(E, CGF)) {
+    // TODO(cir): LLVM codegen considers 'storing "i32 0" to a zero'd memory
+    // location is a noop'. Consider emitting the store to zero in CIR, as to
+    // model the actual user behavior, we can have a pass to optimize this out
+    // later.
+    return;
+  }
+
+  if (isa<ImplicitValueInitExpr>(E) || isa<CXXScalarValueInitExpr>(E)) {
+    auto loc = E->getSourceRange().isValid() ? CGF.getLoc(E->getSourceRange())
+                                             : *CGF.currSrcLoc;
+    return buildNullInitializationToLValue(loc, LV);
+  } else if (isa<NoInitExpr>(E)) {
+    // Do nothing.
+    return;
+  } else if (type->isReferenceType()) {
+    RValue RV = CGF.buildReferenceBindingToExpr(E);
+    return CGF.buildStoreThroughLValue(RV, LV);
+  }
+
+  switch (CGF.getEvaluationKind(type)) {
+  case TEK_Complex:
+    llvm_unreachable("NYI");
+    return;
+  case TEK_Aggregate:
+    CGF.buildAggExpr(
+        E, AggValueSlot::forLValue(LV, AggValueSlot::IsDestructed,
+                                   AggValueSlot::DoesNotNeedGCBarriers,
+                                   AggValueSlot::IsNotAliased,
+                                   AggValueSlot::MayOverlap, Dest.isZeroed()));
+    return;
+  case TEK_Scalar:
+    if (LV.isSimple()) {
+      CGF.buildScalarInit(E, CGF.getLoc(E->getSourceRange()), LV);
+    } else {
+      CGF.buildStoreThroughLValue(RValue::get(CGF.buildScalarExpr(E)), LV);
+    }
+    return;
+  }
+  llvm_unreachable("bad evaluation kind");
+}
+
+void AggExprEmitter::VisitMaterializeTemporaryExpr(
+    MaterializeTemporaryExpr *E) {
+  Visit(E->getSubExpr());
+}
+
+void AggExprEmitter::VisitCXXConstructExpr(const CXXConstructExpr *E) {
+  AggValueSlot Slot = EnsureSlot(CGF.getLoc(E->getSourceRange()), E->getType());
+  CGF.buildCXXConstructExpr(E, Slot);
+}
+
+void AggExprEmitter::VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
+  if (Dest.isPotentiallyAliased() && E->getType().isPODType(CGF.getContext())) {
+    // For a POD type, just emit a load of the lvalue + a copy, because our
+    // compound literal might alias the destination.
+    buildAggLoadOfLValue(E);
+    return;
+  }
+
+  AggValueSlot Slot = EnsureSlot(CGF.getLoc(E->getSourceRange()), E->getType());
+
+  // Block-scope compound literals are destroyed at the end of the enclosing
+  // scope in C.
+  bool Destruct =
+      !CGF.getLangOpts().CPlusPlus && !Slot.isExternallyDestructed();
+  if (Destruct)
+    Slot.setExternallyDestructed();
+
+  CGF.buildAggExpr(E->getInitializer(), Slot);
+
+  if (Destruct)
+    if (QualType::DestructionKind DtorKind = E->getType().isDestructedType())
+      llvm_unreachable("NYI");
+}
+
+void AggExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) {
+  if (MissingFeatures::cleanups())
+    llvm_unreachable("NYI");
+
+  auto &builder = CGF.getBuilder();
+  auto scopeLoc = CGF.getLoc(E->getSourceRange());
+  [[maybe_unused]] auto scope = builder.create<mlir::cir::ScopeOp>(
+      scopeLoc, /*scopeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        CIRGenFunction::LexicalScope lexScope{CGF, loc,
+                                              builder.getInsertionBlock()};
+        Visit(E->getSubExpr());
+      });
+}
+
+void AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) {
+  CIRGenFunction::SourceLocRAIIObject loc{CGF, CGF.getLoc(E->getSourceRange())};
+  AggValueSlot Slot = EnsureSlot(CGF.getLoc(E->getSourceRange()), E->getType());
+  LLVM_ATTRIBUTE_UNUSED LValue SlotLV =
+      CGF.makeAddrLValue(Slot.getAddress(), E->getType());
+
+  // We'll need to enter cleanup scopes in case any of the element
+  // initializers throws an exception or contains branch out of the expressions.
+  CIRGenFunction::CleanupDeactivationScope scope(CGF);
+
+  auto CurField = E->getLambdaClass()->field_begin();
+  auto captureInfo = E->capture_begin();
+  for (auto &captureInit : E->capture_inits()) {
+    // Pick a name for the field.
+    llvm::StringRef fieldName = CurField->getName();
+    const LambdaCapture &capture = *captureInfo;
+    if (capture.capturesVariable()) {
+      assert(!CurField->isBitField() && "lambdas don't have bitfield members!");
+      ValueDecl *v = capture.getCapturedVar();
+      fieldName = v->getName();
+      CGF.getCIRGenModule().LambdaFieldToName[*CurField] = fieldName;
+    } else {
+      llvm_unreachable("NYI");
+    }
+
+    // Emit initialization
+    LValue LV =
+        CGF.buildLValueForFieldInitialization(SlotLV, *CurField, fieldName);
+    if (CurField->hasCapturedVLAType()) {
+      llvm_unreachable("NYI");
+    }
+
+    buildInitializationToLValue(captureInit, LV);
+
+    // Push a destructor if necessary.
+    if (QualType::DestructionKind DtorKind =
+            CurField->getType().isDestructedType()) {
+      llvm_unreachable("NYI");
+    }
+
+    CurField++;
+    captureInfo++;
+  }
+}
+
+void AggExprEmitter::VisitCastExpr(CastExpr *E) {
+  if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E))
+    CGF.CGM.buildExplicitCastExprType(ECE, &CGF);
+  switch (E->getCastKind()) {
+  case CK_LValueToRValueBitCast: {
+    if (Dest.isIgnored()) {
+      CGF.buildAnyExpr(E->getSubExpr(), AggValueSlot::ignored(),
+                       /*ignoreResult=*/true);
+      break;
+    }
+
+    LValue SourceLV = CGF.buildLValue(E->getSubExpr());
+    Address SourceAddress = SourceLV.getAddress();
+    Address DestAddress = Dest.getAddress();
+
+    auto Loc = CGF.getLoc(E->getExprLoc());
+    mlir::Value SrcPtr = CGF.getBuilder().createBitcast(
+        Loc, SourceAddress.getPointer(), CGF.VoidPtrTy);
+    mlir::Value DstPtr = CGF.getBuilder().createBitcast(
+        Loc, DestAddress.getPointer(), CGF.VoidPtrTy);
+
+    mlir::Value SizeVal = CGF.getBuilder().getConstInt(
+        Loc, CGF.SizeTy,
+        CGF.getContext().getTypeSizeInChars(E->getType()).getQuantity());
+    CGF.getBuilder().createMemCpy(Loc, DstPtr, SrcPtr, SizeVal);
+
+    break;
+  }
+
+  case CK_ToUnion: {
+    // Evaluate even if the destination is ignored.
+    if (Dest.isIgnored()) {
+      CGF.buildAnyExpr(E->getSubExpr(), AggValueSlot::ignored(),
+                       /*ignoreResult=*/true);
+      break;
+    }
+
+    // GCC union extension
+    QualType Ty = E->getSubExpr()->getType();
+    Address CastPtr = Dest.getAddress().withElementType(CGF.ConvertType(Ty));
+    buildInitializationToLValue(E->getSubExpr(),
+                                CGF.makeAddrLValue(CastPtr, Ty));
+    break;
+  }
+
+  case CK_LValueToRValue:
+    // If we're loading from a volatile type, force the destination
+    // into existence.
+    if (E->getSubExpr()->getType().isVolatileQualified() ||
+        MissingFeatures::volatileTypes()) {
+      bool Destruct =
+          !Dest.isExternallyDestructed() &&
+          E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct;
+      if (Destruct)
+        Dest.setExternallyDestructed();
+      Visit(E->getSubExpr());
+
+      if (Destruct)
+        CGF.pushDestroy(QualType::DK_nontrivial_c_struct, Dest.getAddress(),
+                        E->getType());
+
+      return;
+    }
+    [[fallthrough]];
+
+  case CK_NoOp:
+  case CK_UserDefinedConversion:
+  case CK_ConstructorConversion:
+    assert(CGF.getContext().hasSameUnqualifiedType(E->getSubExpr()->getType(),
+                                                   E->getType()) &&
+           "Implicit cast types must be compatible");
+    Visit(E->getSubExpr());
+    break;
+
+  case CK_LValueBitCast:
+    llvm_unreachable("should not be emitting lvalue bitcast as rvalue");
+
+  case CK_Dependent:
+  case CK_BitCast:
+  case CK_ArrayToPointerDecay:
+  case CK_FunctionToPointerDecay:
+  case CK_NullToPointer:
+  case CK_NullToMemberPointer:
+  case CK_BaseToDerivedMemberPointer:
+  case CK_DerivedToBaseMemberPointer:
+  case CK_MemberPointerToBoolean:
+  case CK_ReinterpretMemberPointer:
+  case CK_IntegralToPointer:
+  case CK_PointerToIntegral:
+  case CK_PointerToBoolean:
+  case CK_ToVoid:
+  case CK_VectorSplat:
+  case CK_IntegralCast:
+  case CK_BooleanToSignedIntegral:
+  case CK_IntegralToBoolean:
+  case CK_IntegralToFloating:
+  case CK_FloatingToIntegral:
+  case CK_FloatingToBoolean:
+  case CK_FloatingCast:
+  case CK_CPointerToObjCPointerCast:
+  case CK_BlockPointerToObjCPointerCast:
+  case CK_AnyPointerToBlockPointerCast:
+  case CK_ObjCObjectLValueCast:
+  case CK_FloatingRealToComplex:
+  case CK_FloatingComplexToReal:
+  case CK_FloatingComplexToBoolean:
+  case CK_FloatingComplexCast:
+  case CK_FloatingComplexToIntegralComplex:
+  case CK_IntegralRealToComplex:
+  case CK_IntegralComplexToReal:
+  case CK_IntegralComplexToBoolean:
+  case CK_IntegralComplexCast:
+  case CK_IntegralComplexToFloatingComplex:
+  case CK_ARCProduceObject:
+  case CK_ARCConsumeObject:
+  case CK_ARCReclaimReturnedObject:
+  case CK_ARCExtendBlockObject:
+  case CK_CopyAndAutoreleaseBlockObject:
+  case CK_BuiltinFnToFnPtr:
+  case CK_ZeroToOCLOpaqueType:
+  case CK_MatrixCast:
+
+  case CK_IntToOCLSampler:
+  case CK_FloatingToFixedPoint:
+  case CK_FixedPointToFloating:
+  case CK_FixedPointCast:
+  case CK_FixedPointToBoolean:
+  case CK_FixedPointToIntegral:
+  case CK_IntegralToFixedPoint:
+    llvm::errs() << "cast '" << E->getCastKindName()
+                 << "' invalid for aggregate types\n";
+    llvm_unreachable("cast kind invalid for aggregate types");
+  default: {
+    llvm::errs() << "cast kind not implemented: '" << E->getCastKindName()
+                 << "'\n";
+    assert(0 && "not implemented");
+    break;
+  }
+  }
+}
+
+void AggExprEmitter::VisitCallExpr(const CallExpr *E) {
+  if (E->getCallReturnType(CGF.getContext())->isReferenceType()) {
+    llvm_unreachable("NYI");
+  }
+
+  withReturnValueSlot(
+      E, [&](ReturnValueSlot Slot) { return CGF.buildCallExpr(E, Slot); });
+}
+
+void AggExprEmitter::withReturnValueSlot(
+    const Expr *E, llvm::function_ref<RValue(ReturnValueSlot)> EmitCall) {
+  QualType RetTy = E->getType();
+  bool RequiresDestruction =
+      !Dest.isExternallyDestructed() &&
+      RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct;
+
+  // If it makes no observable difference, save a memcpy + temporary.
+  //
+  // We need to always provide our own temporary if destruction is required.
+  // Otherwise, EmitCall will emit its own, notice that it's "unused", and end
+  // its lifetime before we have the chance to emit a proper destructor call.
+  bool UseTemp = Dest.isPotentiallyAliased() || Dest.requiresGCollection() ||
+                 (RequiresDestruction && !Dest.getAddress().isValid());
+
+  Address RetAddr = Address::invalid();
+  assert(!MissingFeatures::shouldEmitLifetimeMarkers() && "NYI");
+
+  if (!UseTemp) {
+    RetAddr = Dest.getAddress();
+  } else {
+    RetAddr = CGF.CreateMemTemp(RetTy, CGF.getLoc(E->getSourceRange()), "tmp",
+                                &RetAddr);
+    assert(!MissingFeatures::shouldEmitLifetimeMarkers() && "NYI");
+  }
+
+  RValue Src =
+      EmitCall(ReturnValueSlot(RetAddr, Dest.isVolatile(), IsResultUnused,
+                               Dest.isExternallyDestructed()));
+
+  if (!UseTemp)
+    return;
+
+  assert(Dest.isIgnored() || Dest.getPointer() != Src.getAggregatePointer());
+  buildFinalDestCopy(E->getType(), Src);
+
+  if (!RequiresDestruction) {
+    // If there's no dtor to run, the copy was the last use of our temporary.
+    // Since we're not guaranteed to be in an ExprWithCleanups, clean up
+    // eagerly.
+    assert(!MissingFeatures::shouldEmitLifetimeMarkers() && "NYI");
+  }
+}
+
+void AggExprEmitter::VisitBinCmp(const BinaryOperator *E) {
+  assert(CGF.getContext().hasSameType(E->getLHS()->getType(),
+                                      E->getRHS()->getType()));
+  const ComparisonCategoryInfo &CmpInfo =
+      CGF.getContext().CompCategories.getInfoForType(E->getType());
+  assert(CmpInfo.Record->isTriviallyCopyable() &&
+         "cannot copy non-trivially copyable aggregate");
+
+  QualType ArgTy = E->getLHS()->getType();
+
+  if (!ArgTy->isIntegralOrEnumerationType() && !ArgTy->isRealFloatingType() &&
+      !ArgTy->isNullPtrType() && !ArgTy->isPointerType() &&
+      !ArgTy->isMemberPointerType() && !ArgTy->isAnyComplexType())
+    llvm_unreachable("aggregate three-way comparison");
+
+  auto Loc = CGF.getLoc(E->getSourceRange());
+
+  if (E->getType()->isAnyComplexType())
+    llvm_unreachable("NYI");
+
+  auto LHS = CGF.buildAnyExpr(E->getLHS()).getScalarVal();
+  auto RHS = CGF.buildAnyExpr(E->getRHS()).getScalarVal();
+
+  mlir::Value ResultScalar;
+  if (ArgTy->isNullPtrType()) {
+    ResultScalar =
+        CGF.builder.getConstInt(Loc, CmpInfo.getEqualOrEquiv()->getIntValue());
+  } else {
+    auto LtRes = CmpInfo.getLess()->getIntValue();
+    auto EqRes = CmpInfo.getEqualOrEquiv()->getIntValue();
+    auto GtRes = CmpInfo.getGreater()->getIntValue();
+    if (!CmpInfo.isPartial()) {
+      // Strong ordering.
+      ResultScalar = CGF.builder.createThreeWayCmpStrong(Loc, LHS, RHS, LtRes,
+                                                         EqRes, GtRes);
+    } else {
+      // Partial ordering.
+      auto UnorderedRes = CmpInfo.getUnordered()->getIntValue();
+      ResultScalar = CGF.builder.createThreeWayCmpPartial(
+          Loc, LHS, RHS, LtRes, EqRes, GtRes, UnorderedRes);
+    }
+  }
+
+  // Create the return value in the destination slot.
+  EnsureDest(Loc, E->getType());
+  LValue DestLV = CGF.makeAddrLValue(Dest.getAddress(), E->getType());
+
+  // Emit the address of the first (and only) field in the comparison category
+  // type, and initialize it from the constant integer value produced above.
+  const FieldDecl *ResultField = *CmpInfo.Record->field_begin();
+  LValue FieldLV = CGF.buildLValueForFieldInitialization(
+      DestLV, ResultField, ResultField->getName());
+  CGF.buildStoreThroughLValue(RValue::get(ResultScalar), FieldLV);
+
+  // All done! The result is in the Dest slot.
+}
+
+void AggExprEmitter::VisitCXXParenListInitExpr(CXXParenListInitExpr *E) {
+  VisitCXXParenListOrInitListExpr(E, E->getInitExprs(),
+                                  E->getInitializedFieldInUnion(),
+                                  E->getArrayFiller());
+}
+
+void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
+  // TODO(cir): use something like CGF.ErrorUnsupported
+  if (E->hadArrayRangeDesignator())
+    llvm_unreachable("GNU array range designator extension");
+
+  if (E->isTransparent())
+    return Visit(E->getInit(0));
+
+  VisitCXXParenListOrInitListExpr(
+      E, E->inits(), E->getInitializedFieldInUnion(), E->getArrayFiller());
+}
+
+void AggExprEmitter::VisitCXXParenListOrInitListExpr(
+    Expr *ExprToVisit, ArrayRef<Expr *> InitExprs,
+    FieldDecl *InitializedFieldInUnion, Expr *ArrayFiller) {
+#if 0
+  // FIXME: Assess perf here?  Figure out what cases are worth optimizing here
+  // (Length of globals? Chunks of zeroed-out space?).
+  //
+  // If we can, prefer a copy from a global; this is a lot less code for long
+  // globals, and it's easier for the current optimizers to analyze.
+  if (llvm::Constant *C =
+          CGF.CGM.EmitConstantExpr(ExprToVisit, ExprToVisit->getType(), &CGF)) {
+    llvm::GlobalVariable* GV =
+    new llvm::GlobalVariable(CGF.CGM.getModule(), C->getType(), true,
+                             llvm::GlobalValue::InternalLinkage, C, "");
+    EmitFinalDestCopy(ExprToVisit->getType(),
+                      CGF.MakeAddrLValue(GV, ExprToVisit->getType()));
+    return;
+  }
+#endif
+
+  AggValueSlot Dest = EnsureSlot(CGF.getLoc(ExprToVisit->getSourceRange()),
+                                 ExprToVisit->getType());
+
+  LValue DestLV = CGF.makeAddrLValue(Dest.getAddress(), ExprToVisit->getType());
+
+  // Handle initialization of an array.
+  if (ExprToVisit->getType()->isConstantArrayType()) {
+    auto AType = cast<mlir::cir::ArrayType>(Dest.getAddress().getElementType());
+    buildArrayInit(Dest.getAddress(), AType, ExprToVisit->getType(),
+                   ExprToVisit, InitExprs, ArrayFiller);
+    return;
+  } else if (ExprToVisit->getType()->isVariableArrayType()) {
+    llvm_unreachable("variable arrays NYI");
+    return;
+  }
+
+  if (ExprToVisit->getType()->isArrayType()) {
+    llvm_unreachable("NYI");
+  }
+
+  assert(ExprToVisit->getType()->isRecordType() &&
+         "Only support structs/unions here!");
+
+  // Do struct initialization; this code just sets each individual member
+  // to the approprate value.  This makes bitfield support automatic;
+  // the disadvantage is that the generated code is more difficult for
+  // the optimizer, especially with bitfields.
+  unsigned NumInitElements = InitExprs.size();
+  RecordDecl *record = ExprToVisit->getType()->castAs<RecordType>()->getDecl();
+
+  // We'll need to enter cleanup scopes in case any of the element
+  // initializers throws an exception.
+  SmallVector<EHScopeStack::stable_iterator, 16> cleanups;
+  CIRGenFunction::CleanupDeactivationScope DeactivateCleanups(CGF);
+
+  unsigned curInitIndex = 0;
+
+  // Emit initialization of base classes.
+  if (auto *CXXRD = dyn_cast<CXXRecordDecl>(record)) {
+    assert(NumInitElements >= CXXRD->getNumBases() &&
+           "missing initializer for base class");
+    for ([[maybe_unused]] auto &Base : CXXRD->bases()) {
+      llvm_unreachable("NYI");
+    }
+  }
+
+  // Prepare a 'this' for CXXDefaultInitExprs.
+  CIRGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress());
+
+  if (record->isUnion()) {
+    // Only initialize one field of a union. The field itself is
+    // specified by the initializer list.
+    if (!InitializedFieldInUnion) {
+      // Empty union; we have nothing to do.
+
+#ifndef NDEBUG
+      // Make sure that it's really an empty and not a failure of
+      // semantic analysis.
+      for (const auto *Field : record->fields())
+        assert(
+            (Field->isUnnamedBitField() || Field->isAnonymousStructOrUnion()) &&
+            "Only unnamed bitfields or ananymous class allowed");
+#endif
+      return;
+    }
+
+    // FIXME: volatility
+    FieldDecl *Field = InitializedFieldInUnion;
+
+    LValue FieldLoc =
+        CGF.buildLValueForFieldInitialization(DestLV, Field, Field->getName());
+    if (NumInitElements) {
+      // Store the initializer into the field
+      buildInitializationToLValue(InitExprs[0], FieldLoc);
+    } else {
+      // Default-initialize to null.
+      buildNullInitializationToLValue(CGF.getLoc(ExprToVisit->getSourceRange()),
+                                      FieldLoc);
+    }
+
+    return;
+  }
+
+  // Here we iterate over the fields; this makes it simpler to both
+  // default-initialize fields and skip over unnamed fields.
+  for (const auto *field : record->fields()) {
+    // We're done once we hit the flexible array member.
+    if (field->getType()->isIncompleteArrayType())
+      break;
+
+    // Always skip anonymous bitfields.
+    if (field->isUnnamedBitField())
+      continue;
+
+    // We're done if we reach the end of the explicit initializers, we
+    // have a zeroed object, and the rest of the fields are
+    // zero-initializable.
+    if (curInitIndex == NumInitElements && Dest.isZeroed() &&
+        CGF.getTypes().isZeroInitializable(ExprToVisit->getType()))
+      break;
+    LValue LV =
+        CGF.buildLValueForFieldInitialization(DestLV, field, field->getName());
+    // We never generate write-barries for initialized fields.
+    assert(!MissingFeatures::setNonGC());
+
+    if (curInitIndex < NumInitElements) {
+      // Store the initializer into the field.
+      CIRGenFunction::SourceLocRAIIObject loc{
+          CGF, CGF.getLoc(record->getSourceRange())};
+      buildInitializationToLValue(InitExprs[curInitIndex++], LV);
+    } else {
+      // We're out of initializers; default-initialize to null
+      buildNullInitializationToLValue(CGF.getLoc(ExprToVisit->getSourceRange()),
+                                      LV);
+    }
+
+    // Push a destructor if necessary.
+    // FIXME: if we have an array of structures, all explicitly
+    // initialized, we can end up pushing a linear number of cleanups.
+    if (QualType::DestructionKind dtorKind =
+            field->getType().isDestructedType()) {
+      assert(LV.isSimple());
+      if (dtorKind) {
+        CGF.pushDestroyAndDeferDeactivation(NormalAndEHCleanup, LV.getAddress(),
+                                            field->getType(),
+                                            CGF.getDestroyer(dtorKind), false);
+      }
+    }
+
+    // From LLVM codegen, maybe not useful for CIR:
+    // If the GEP didn't get used because of a dead zero init or something
+    // else, clean it up for -O0 builds and general tidiness.
+  }
+}
+
+void AggExprEmitter::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) {
+  // Ensure that we have a slot, but if we already do, remember
+  // whether it was externally destructed.
+  bool wasExternallyDestructed = Dest.isExternallyDestructed();
+  EnsureDest(CGF.getLoc(E->getSourceRange()), E->getType());
+
+  // We're going to push a destructor if there isn't already one.
+  Dest.setExternallyDestructed();
+
+  Visit(E->getSubExpr());
+
+  // Push that destructor we promised.
+  if (!wasExternallyDestructed)
+    CGF.buildCXXTemporary(E->getTemporary(), E->getType(), Dest.getAddress());
+}
+
+void AggExprEmitter::VisitAbstractConditionalOperator(
+    const AbstractConditionalOperator *E) {
+  auto &builder = CGF.getBuilder();
+  auto loc = CGF.getLoc(E->getSourceRange());
+
+  // Bind the common expression if necessary.
+  CIRGenFunction::OpaqueValueMapping binding(CGF, E);
+  CIRGenFunction::ConditionalEvaluation eval(CGF);
+  assert(!MissingFeatures::getProfileCount());
+
+  // Save whether the destination's lifetime is externally managed.
+  bool isExternallyDestructed = Dest.isExternallyDestructed();
+  bool destructNonTrivialCStruct =
+      !isExternallyDestructed &&
+      E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct;
+  isExternallyDestructed |= destructNonTrivialCStruct;
+
+  CGF.buildIfOnBoolExpr(
+      E->getCond(), /*thenBuilder=*/
+      [&](mlir::OpBuilder &, mlir::Location) {
+        eval.begin(CGF);
+        {
+          CIRGenFunction::LexicalScope lexScope{CGF, loc,
+                                                builder.getInsertionBlock()};
+          Dest.setExternallyDestructed(isExternallyDestructed);
+          assert(!MissingFeatures::incrementProfileCounter());
+          Visit(E->getTrueExpr());
+        }
+        eval.end(CGF);
+      },
+      loc,
+      /*elseBuilder=*/
+      [&](mlir::OpBuilder &, mlir::Location) {
+        eval.begin(CGF);
+        {
+          CIRGenFunction::LexicalScope lexScope{CGF, loc,
+                                                builder.getInsertionBlock()};
+          // If the result of an agg expression is unused, then the emission
+          // of the LHS might need to create a destination slot.  That's fine
+          // with us, and we can safely emit the RHS into the same slot, but
+          // we shouldn't claim that it's already being destructed.
+          Dest.setExternallyDestructed(isExternallyDestructed);
+          assert(!MissingFeatures::incrementProfileCounter());
+          Visit(E->getFalseExpr());
+        }
+        eval.end(CGF);
+      },
+      loc);
+
+  if (destructNonTrivialCStruct)
+    llvm_unreachable("NYI");
+  assert(!MissingFeatures::incrementProfileCounter());
+}
+
+void AggExprEmitter::VisitBinComma(const BinaryOperator *E) {
+  CGF.buildIgnoredExpr(E->getLHS());
+  Visit(E->getRHS());
+}
+
+//===----------------------------------------------------------------------===//
+//                        Helpers and dispatcher
+//===----------------------------------------------------------------------===//
+
+/// Get an approximate count of the number of non-zero bytes that will be stored
+/// when outputting the initializer for the specified initializer expression.
+/// FIXME(cir): this can be shared with LLVM codegen.
+static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CIRGenFunction &CGF) {
+  if (auto *MTE = dyn_cast<MaterializeTemporaryExpr>(E))
+    E = MTE->getSubExpr();
+  E = E->IgnoreParenNoopCasts(CGF.getContext());
+
+  // 0 and 0.0 won't require any non-zero stores!
+  if (isSimpleZero(E, CGF))
+    return CharUnits::Zero();
+
+  // If this is an initlist expr, sum up the size of sizes of the (present)
+  // elements.  If this is something weird, assume the whole thing is non-zero.
+  const InitListExpr *ILE = dyn_cast<InitListExpr>(E);
+  while (ILE && ILE->isTransparent())
+    ILE = dyn_cast<InitListExpr>(ILE->getInit(0));
+  if (!ILE || !CGF.getTypes().isZeroInitializable(ILE->getType()))
+    return CGF.getContext().getTypeSizeInChars(E->getType());
+
+  // InitListExprs for structs have to be handled carefully.  If there are
+  // reference members, we need to consider the size of the reference, not the
+  // referencee.  InitListExprs for unions and arrays can't have references.
+  if (const RecordType *RT = E->getType()->getAs<RecordType>()) {
+    if (!RT->isUnionType()) {
+      RecordDecl *SD = RT->getDecl();
+      CharUnits NumNonZeroBytes = CharUnits::Zero();
+
+      unsigned ILEElement = 0;
+      if (auto *CXXRD = dyn_cast<CXXRecordDecl>(SD))
+        while (ILEElement != CXXRD->getNumBases())
+          NumNonZeroBytes +=
+              GetNumNonZeroBytesInInit(ILE->getInit(ILEElement++), CGF);
+      for (const auto *Field : SD->fields()) {
+        // We're done once we hit the flexible array member or run out of
+        // InitListExpr elements.
+        if (Field->getType()->isIncompleteArrayType() ||
+            ILEElement == ILE->getNumInits())
+          break;
+        if (Field->isUnnamedBitField())
+          continue;
+
+        const Expr *E = ILE->getInit(ILEElement++);
+
+        // Reference values are always non-null and have the width of a pointer.
+        if (Field->getType()->isReferenceType())
+          NumNonZeroBytes += CGF.getContext().toCharUnitsFromBits(
+              CGF.getTarget().getPointerWidth(LangAS::Default));
+        else
+          NumNonZeroBytes += GetNumNonZeroBytesInInit(E, CGF);
+      }
+
+      return NumNonZeroBytes;
+    }
+  }
+
+  // FIXME: This overestimates the number of non-zero bytes for bit-fields.
+  CharUnits NumNonZeroBytes = CharUnits::Zero();
+  for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i)
+    NumNonZeroBytes += GetNumNonZeroBytesInInit(ILE->getInit(i), CGF);
+  return NumNonZeroBytes;
+}
+
+/// If the initializer is large and has a lot of zeros in it, emit a memset and
+/// avoid storing the individual zeros.
+static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
+                                     CIRGenFunction &CGF) {
+  // If the slot is arleady known to be zeroed, nothing to do. Don't mess with
+  // volatile stores.
+  if (Slot.isZeroed() || Slot.isVolatile() || !Slot.getAddress().isValid())
+    return;
+
+  // C++ objects with a user-declared constructor don't need zero'ing.
+  if (CGF.getLangOpts().CPlusPlus)
+    if (const auto *RT = CGF.getContext()
+                             .getBaseElementType(E->getType())
+                             ->getAs<RecordType>()) {
+      const auto *RD = cast<CXXRecordDecl>(RT->getDecl());
+      if (RD->hasUserDeclaredConstructor())
+        return;
+    }
+
+  // If the type is 16-bytes or smaller, prefer individual stores over memset.
+  CharUnits Size = Slot.getPreferredSize(CGF.getContext(), E->getType());
+  if (Size <= CharUnits::fromQuantity(16))
+    return;
+
+  // Check to see if over 3/4 of the initializer are known to be zero.  If so,
+  // we prefer to emit memset + individual stores for the rest.
+  CharUnits NumNonZeroBytes = GetNumNonZeroBytesInInit(E, CGF);
+  if (NumNonZeroBytes * 4 > Size)
+    return;
+
+  // Okay, it seems like a good idea to use an initial memset, emit the call.
+  auto &builder = CGF.getBuilder();
+  auto loc = CGF.getLoc(E->getSourceRange());
+  Address slotAddr = Slot.getAddress();
+  auto zero = builder.getZero(loc, slotAddr.getElementType());
+
+  builder.createStore(loc, zero, slotAddr);
+  // Loc = CGF.Builder.CreateElementBitCast(Loc, CGF.Int8Ty);
+  // CGF.Builder.CreateMemSet(Loc, CGF.Builder.getInt8(0), SizeVal, false);
+
+  // Tell the AggExprEmitter that the slot is known zero.
+  Slot.setZeroed();
+}
+
+AggValueSlot::Overlap_t CIRGenFunction::getOverlapForBaseInit(
+    const CXXRecordDecl *RD, const CXXRecordDecl *BaseRD, bool IsVirtual) {
+  // If the most-derived object is a field declared with [[no_unique_address]],
+  // the tail padding of any virtual base could be reused for other subobjects
+  // of that field's class.
+  if (IsVirtual)
+    return AggValueSlot::MayOverlap;
+
+  // If the base class is laid out entirely within the nvsize of the derived
+  // class, its tail padding cannot yet be initialized, so we can issue
+  // stores at the full width of the base class.
+  const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
+  if (Layout.getBaseClassOffset(BaseRD) +
+          getContext().getASTRecordLayout(BaseRD).getSize() <=
+      Layout.getNonVirtualSize())
+    return AggValueSlot::DoesNotOverlap;
+
+  // The tail padding may contain values we need to preserve.
+  return AggValueSlot::MayOverlap;
+}
+
+void CIRGenFunction::buildAggExpr(const Expr *E, AggValueSlot Slot) {
+  assert(E && CIRGenFunction::hasAggregateEvaluationKind(E->getType()) &&
+         "Invalid aggregate expression to emit");
+  assert((Slot.getAddress().isValid() || Slot.isIgnored()) &&
+         "slot has bits but no address");
+
+  // Optimize the slot if possible.
+  CheckAggExprForMemSetUse(Slot, E, *this);
+
+  AggExprEmitter(*this, Slot, Slot.isIgnored()).Visit(const_cast<Expr *>(E));
+}
+
+void CIRGenFunction::buildAggregateCopy(LValue Dest, LValue Src, QualType Ty,
+                                        AggValueSlot::Overlap_t MayOverlap,
+                                        bool isVolatile) {
+  // TODO(cir): this function needs improvements, commented code for now since
+  // this will be touched again soon.
+  assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex");
+
+  Address DestPtr = Dest.getAddress();
+  Address SrcPtr = Src.getAddress();
+
+  if (getLangOpts().CPlusPlus) {
+    if (const RecordType *RT = Ty->getAs<RecordType>()) {
+      CXXRecordDecl *Record = cast<CXXRecordDecl>(RT->getDecl());
+      assert((Record->hasTrivialCopyConstructor() ||
+              Record->hasTrivialCopyAssignment() ||
+              Record->hasTrivialMoveConstructor() ||
+              Record->hasTrivialMoveAssignment() ||
+              Record->hasAttr<TrivialABIAttr>() || Record->isUnion()) &&
+             "Trying to aggregate-copy a type without a trivial copy/move "
+             "constructor or assignment operator");
+      // Ignore empty classes in C++.
+      if (Record->isEmpty())
+        return;
+    }
+  }
+
+  if (getLangOpts().CUDAIsDevice) {
+    llvm_unreachable("CUDA is NYI");
+  }
+
+  // Aggregate assignment turns into llvm.memcpy.  This is almost valid per
+  // C99 6.5.16.1p3, which states "If the value being stored in an object is
+  // read from another object that overlaps in anyway the storage of the first
+  // object, then the overlap shall be exact and the two objects shall have
+  // qualified or unqualified versions of a compatible type."
+  //
+  // memcpy is not defined if the source and destination pointers are exactly
+  // equal, but other compilers do this optimization, and almost every memcpy
+  // implementation handles this case safely.  If there is a libc that does not
+  // safely handle this, we can add a target hook.
+
+  // Get data size info for this aggregate. Don't copy the tail padding if this
+  // might be a potentially-overlapping subobject, since the tail padding might
+  // be occupied by a different object. Otherwise, copying it is fine.
+  TypeInfoChars TypeInfo;
+  if (MayOverlap)
+    TypeInfo = getContext().getTypeInfoDataSizeInChars(Ty);
+  else
+    TypeInfo = getContext().getTypeInfoInChars(Ty);
+
+  mlir::Attribute SizeVal = nullptr;
+  if (TypeInfo.Width.isZero()) {
+    // But note that getTypeInfo returns 0 for a VLA.
+    if (auto *VAT = dyn_cast_or_null<VariableArrayType>(
+            getContext().getAsArrayType(Ty))) {
+      llvm_unreachable("VLA is NYI");
+    }
+  }
+  if (!SizeVal) {
+    // NOTE(cir): CIR types already carry info about their sizes. This is here
+    // just for codegen parity.
+    SizeVal = builder.getI64IntegerAttr(TypeInfo.Width.getQuantity());
+  }
+
+  // FIXME: If we have a volatile struct, the optimizer can remove what might
+  // appear to be `extra' memory ops:
+  //
+  // volatile struct { int i; } a, b;
+  //
+  // int main() {
+  //   a = b;
+  //   a = b;
+  // }
+  //
+  // we need to use a different call here.  We use isVolatile to indicate when
+  // either the source or the destination is volatile.
+
+  // NOTE(cir): original codegen would normally convert DestPtr and SrcPtr to
+  // i8* since memcpy operates on bytes. We don't need that in CIR because
+  // cir.copy will operate on any CIR pointer that points to a sized type.
+
+  // Don't do any of the memmove_collectable tests if GC isn't set.
+  if (CGM.getLangOpts().getGC() == LangOptions::NonGC) {
+    // fall through
+  } else if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
+    RecordDecl *Record = RecordTy->getDecl();
+    if (Record->hasObjectMember()) {
+      llvm_unreachable("ObjC is NYI");
+    }
+  } else if (Ty->isArrayType()) {
+    QualType BaseType = getContext().getBaseElementType(Ty);
+    if (const RecordType *RecordTy = BaseType->getAs<RecordType>()) {
+      if (RecordTy->getDecl()->hasObjectMember()) {
+        llvm_unreachable("ObjC is NYI");
+      }
+    }
+  }
+
+  builder.createCopy(DestPtr.getPointer(), SrcPtr.getPointer(), isVolatile);
+
+  // Determine the metadata to describe the position of any padding in this
+  // memcpy, as well as the TBAA tags for the members of the struct, in case
+  // the optimizer wishes to expand it in to scalar memory operations.
+  if (CGM.getCodeGenOpts().NewStructPathTBAA || MissingFeatures::tbaa())
+    llvm_unreachable("TBAA is NYI");
+}
+
+AggValueSlot::Overlap_t
+CIRGenFunction::getOverlapForFieldInit(const FieldDecl *FD) {
+  if (!FD->hasAttr<NoUniqueAddressAttr>() || !FD->getType()->isRecordType())
+    return AggValueSlot::DoesNotOverlap;
+
+  // If the field lies entirely within the enclosing class's nvsize, its tail
+  // padding cannot overlap any already-initialized object. (The only subobjects
+  // with greater addresses that might already be initialized are vbases.)
+  const RecordDecl *ClassRD = FD->getParent();
+  const ASTRecordLayout &Layout = getContext().getASTRecordLayout(ClassRD);
+  if (Layout.getFieldOffset(FD->getFieldIndex()) +
+          getContext().getTypeSize(FD->getType()) <=
+      (uint64_t)getContext().toBits(Layout.getNonVirtualSize()))
+    return AggValueSlot::DoesNotOverlap;
+
+  // The tail padding may contain values we need to preserve.
+  return AggValueSlot::MayOverlap;
+}
+
+LValue CIRGenFunction::buildAggExprToLValue(const Expr *E) {
+  assert(hasAggregateEvaluationKind(E->getType()) && "Invalid argument!");
+  Address Temp = CreateMemTemp(E->getType(), getLoc(E->getSourceRange()));
+  LValue LV = makeAddrLValue(Temp, E->getType());
+  buildAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed,
+                                          AggValueSlot::DoesNotNeedGCBarriers,
+                                          AggValueSlot::IsNotAliased,
+                                          AggValueSlot::DoesNotOverlap));
+  return LV;
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
new file mode 100644
index 000000000000..7dded241490b
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -0,0 +1,1245 @@
+//===--- CIRGenExprCXX.cpp - Emit CIR Code for C++ expressions ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code dealing with code generation of C++ expressions
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/MissingFeatures.h"
+#include <CIRGenCXXABI.h>
+#include <CIRGenFunction.h>
+#include <CIRGenModule.h>
+#include <CIRGenValue.h>
+
+#include <clang/AST/DeclCXX.h>
+
+using namespace cir;
+using namespace clang;
+
+namespace {
+struct MemberCallInfo {
+  RequiredArgs ReqArgs;
+  // Number of prefix arguments for the call. Ignores the `this` pointer.
+  unsigned PrefixSize;
+};
+} // namespace
+
+static RValue buildNewDeleteCall(CIRGenFunction &CGF,
+                                 const FunctionDecl *CalleeDecl,
+                                 const FunctionProtoType *CalleeType,
+                                 const CallArgList &Args);
+
+static MemberCallInfo
+commonBuildCXXMemberOrOperatorCall(CIRGenFunction &CGF, const CXXMethodDecl *MD,
+                                   mlir::Value This, mlir::Value ImplicitParam,
+                                   QualType ImplicitParamTy, const CallExpr *CE,
+                                   CallArgList &Args, CallArgList *RtlArgs) {
+  assert(CE == nullptr || isa<CXXMemberCallExpr>(CE) ||
+         isa<CXXOperatorCallExpr>(CE));
+  assert(MD->isInstance() &&
+         "Trying to emit a member or operator call expr on a static method!");
+
+  // Push the this ptr.
+  const CXXRecordDecl *RD =
+      CGF.CGM.getCXXABI().getThisArgumentTypeForMethod(MD);
+  Args.add(RValue::get(This), CGF.getTypes().DeriveThisType(RD, MD));
+
+  // If there is an implicit parameter (e.g. VTT), emit it.
+  if (ImplicitParam) {
+    llvm_unreachable("NYI");
+  }
+
+  const auto *FPT = MD->getType()->castAs<FunctionProtoType>();
+  RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size());
+  unsigned PrefixSize = Args.size() - 1;
+
+  // Add the rest of the call args
+  if (RtlArgs) {
+    // Special case: if the caller emitted the arguments right-to-left already
+    // (prior to emitting the *this argument), we're done. This happens for
+    // assignment operators.
+    Args.addFrom(*RtlArgs);
+  } else if (CE) {
+    // Special case: skip first argument of CXXOperatorCall (it is "this").
+    unsigned ArgsToSkip = isa<CXXOperatorCallExpr>(CE) ? 1 : 0;
+    CGF.buildCallArgs(Args, FPT, drop_begin(CE->arguments(), ArgsToSkip),
+                      CE->getDirectCallee());
+  } else {
+    assert(
+        FPT->getNumParams() == 0 &&
+        "No CallExpr specified for function with non-zero number of arguments");
+  }
+
+  return {required, PrefixSize};
+}
+
+RValue CIRGenFunction::buildCXXMemberOrOperatorCall(
+    const CXXMethodDecl *MD, const CIRGenCallee &Callee,
+    ReturnValueSlot ReturnValue, mlir::Value This, mlir::Value ImplicitParam,
+    QualType ImplicitParamTy, const CallExpr *CE, CallArgList *RtlArgs) {
+
+  const auto *FPT = MD->getType()->castAs<FunctionProtoType>();
+  CallArgList Args;
+  MemberCallInfo CallInfo = commonBuildCXXMemberOrOperatorCall(
+      *this, MD, This, ImplicitParam, ImplicitParamTy, CE, Args, RtlArgs);
+  auto &FnInfo = CGM.getTypes().arrangeCXXMethodCall(
+      Args, FPT, CallInfo.ReqArgs, CallInfo.PrefixSize);
+  assert((CE || currSrcLoc) && "expected source location");
+  mlir::Location loc = CE ? getLoc(CE->getExprLoc()) : *currSrcLoc;
+  return buildCall(FnInfo, Callee, ReturnValue, Args, nullptr,
+                   CE && CE == MustTailCall, loc, CE);
+}
+
+// TODO(cir): this can be shared with LLVM codegen
+static CXXRecordDecl *getCXXRecord(const Expr *E) {
+  QualType T = E->getType();
+  if (const PointerType *PTy = T->getAs<PointerType>())
+    T = PTy->getPointeeType();
+  const RecordType *Ty = T->castAs<RecordType>();
+  return cast<CXXRecordDecl>(Ty->getDecl());
+}
+
+RValue
+CIRGenFunction::buildCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
+                                              ReturnValueSlot ReturnValue) {
+  const BinaryOperator *BO =
+      cast<BinaryOperator>(E->getCallee()->IgnoreParens());
+  const Expr *BaseExpr = BO->getLHS();
+  const Expr *MemFnExpr = BO->getRHS();
+
+  const auto *MPT = MemFnExpr->getType()->castAs<MemberPointerType>();
+  const auto *FPT = MPT->getPointeeType()->castAs<FunctionProtoType>();
+
+  // Emit the 'this' pointer.
+  Address This = Address::invalid();
+  if (BO->getOpcode() == BO_PtrMemI)
+    This = buildPointerWithAlignment(BaseExpr, nullptr, nullptr, KnownNonNull);
+  else
+    This = buildLValue(BaseExpr).getAddress();
+
+  buildTypeCheck(TCK_MemberCall, E->getExprLoc(), This.emitRawPointer(),
+                 QualType(MPT->getClass(), 0));
+
+  // Get the member function pointer.
+  mlir::Value MemFnPtr = buildScalarExpr(MemFnExpr);
+
+  // Resolve the member function pointer to the actual callee and adjust the
+  // "this" pointer for call.
+  auto Loc = getLoc(E->getExprLoc());
+  auto [CalleePtr, AdjustedThis] =
+      builder.createGetMethod(Loc, MemFnPtr, This.getPointer());
+
+  // Prepare the call arguments.
+  CallArgList ArgsList;
+  ArgsList.add(RValue::get(AdjustedThis), getContext().VoidPtrTy);
+  buildCallArgs(ArgsList, FPT, E->arguments());
+
+  RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, 1);
+
+  // Build the call.
+  CIRGenCallee Callee(FPT, CalleePtr.getDefiningOp());
+  return buildCall(CGM.getTypes().arrangeCXXMethodCall(ArgsList, FPT, required,
+                                                       /*PrefixSize=*/0),
+                   Callee, ReturnValue, ArgsList, nullptr, E == MustTailCall,
+                   Loc);
+}
+
+RValue CIRGenFunction::buildCXXMemberOrOperatorMemberCallExpr(
+    const CallExpr *CE, const CXXMethodDecl *MD, ReturnValueSlot ReturnValue,
+    bool HasQualifier, NestedNameSpecifier *Qualifier, bool IsArrow,
+    const Expr *Base) {
+  assert(isa<CXXMemberCallExpr>(CE) || isa<CXXOperatorCallExpr>(CE));
+
+  // Compute the object pointer.
+  bool CanUseVirtualCall = MD->isVirtual() && !HasQualifier;
+  const CXXMethodDecl *DevirtualizedMethod = nullptr;
+  if (CanUseVirtualCall &&
+      MD->getDevirtualizedMethod(Base, getLangOpts().AppleKext)) {
+    const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType();
+    DevirtualizedMethod = MD->getCorrespondingMethodInClass(BestDynamicDecl);
+    assert(DevirtualizedMethod);
+    const CXXRecordDecl *DevirtualizedClass = DevirtualizedMethod->getParent();
+    const Expr *Inner = Base->IgnoreParenBaseCasts();
+    if (DevirtualizedMethod->getReturnType().getCanonicalType() !=
+        MD->getReturnType().getCanonicalType()) {
+      // If the return types are not the same, this might be a case where more
+      // code needs to run to compensate for it. For example, the derived
+      // method might return a type that inherits form from the return
+      // type of MD and has a prefix.
+      // For now we just avoid devirtualizing these covariant cases.
+      DevirtualizedMethod = nullptr;
+    } else if (getCXXRecord(Inner) == DevirtualizedClass) {
+      // If the class of the Inner expression is where the dynamic method
+      // is defined, build the this pointer from it.
+      Base = Inner;
+    } else if (getCXXRecord(Base) != DevirtualizedClass) {
+      // If the method is defined in a class that is not the best dynamic
+      // one or the one of the full expression, we would have to build
+      // a derived-to-base cast to compute the correct this pointer, but
+      // we don't have support for that yet, so do a virtual call.
+      assert(!MissingFeatures::buildDerivedToBaseCastForDevirt());
+      DevirtualizedMethod = nullptr;
+    }
+  }
+
+  bool TrivialForCodegen =
+      MD->isTrivial() || (MD->isDefaulted() && MD->getParent()->isUnion());
+  bool TrivialAssignment =
+      TrivialForCodegen &&
+      (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()) &&
+      !MD->getParent()->mayInsertExtraPadding();
+  (void)TrivialAssignment;
+
+  // C++17 demands that we evaluate the RHS of a (possibly-compound) assignment
+  // operator before the LHS.
+  CallArgList RtlArgStorage;
+  CallArgList *RtlArgs = nullptr;
+  LValue TrivialAssignmentRHS;
+  if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(CE)) {
+    if (OCE->isAssignmentOp()) {
+      // See further note on TrivialAssignment, we don't handle this during
+      // codegen, differently than LLVM, which early optimizes like this:
+      //  if (TrivialAssignment) {
+      //    TrivialAssignmentRHS = buildLValue(CE->getArg(1));
+      //  } else {
+      RtlArgs = &RtlArgStorage;
+      buildCallArgs(*RtlArgs, MD->getType()->castAs<FunctionProtoType>(),
+                    drop_begin(CE->arguments(), 1), CE->getDirectCallee(),
+                    /*ParamsToSkip*/ 0, EvaluationOrder::ForceRightToLeft);
+    }
+  }
+
+  LValue This;
+  if (IsArrow) {
+    LValueBaseInfo BaseInfo;
+    assert(!MissingFeatures::tbaa());
+    Address ThisValue = buildPointerWithAlignment(Base, &BaseInfo);
+    This = makeAddrLValue(ThisValue, Base->getType(), BaseInfo);
+  } else {
+    This = buildLValue(Base);
+  }
+
+  if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) {
+    llvm_unreachable("NYI");
+  }
+
+  if (TrivialForCodegen) {
+    if (isa<CXXDestructorDecl>(MD))
+      return RValue::get(nullptr);
+
+    if (TrivialAssignment) {
+      // From LLVM codegen:
+      // We don't like to generate the trivial copy/move assignment operator
+      // when it isn't necessary; just produce the proper effect here.
+      // It's important that we use the result of EmitLValue here rather than
+      // emitting call arguments, in order to preserve TBAA information from
+      // the RHS.
+      //
+      // We don't early optimize like LLVM does:
+      // LValue RHS = isa<CXXOperatorCallExpr>(CE) ? TrivialAssignmentRHS
+      //                                           :
+      //                                           buildLValue(*CE->arg_begin());
+      // buildAggregateAssign(This, RHS, CE->getType());
+      // return RValue::get(This.getPointer());
+    } else {
+      assert(MD->getParent()->mayInsertExtraPadding() &&
+             "unknown trivial member function");
+    }
+  }
+
+  // Compute the function type we're calling
+  const CXXMethodDecl *CalleeDecl =
+      DevirtualizedMethod ? DevirtualizedMethod : MD;
+  const CIRGenFunctionInfo *FInfo = nullptr;
+  if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl))
+    llvm_unreachable("NYI");
+  else
+    FInfo = &CGM.getTypes().arrangeCXXMethodDeclaration(CalleeDecl);
+
+  auto Ty = CGM.getTypes().GetFunctionType(*FInfo);
+
+  // C++11 [class.mfct.non-static]p2:
+  //   If a non-static member function of a class X is called for an object that
+  //   is not of type X, or of a type derived from X, the behavior is undefined.
+  SourceLocation CallLoc;
+  ASTContext &C = getContext();
+  (void)C;
+  if (CE)
+    CallLoc = CE->getExprLoc();
+
+  SanitizerSet SkippedChecks;
+  if (const auto *cmce = dyn_cast<CXXMemberCallExpr>(CE)) {
+    auto *ioa = cmce->getImplicitObjectArgument();
+    auto isImplicitObjectCXXThis = isWrappedCXXThis(ioa);
+    if (isImplicitObjectCXXThis)
+      SkippedChecks.set(SanitizerKind::Alignment, true);
+    if (isImplicitObjectCXXThis || isa<DeclRefExpr>(ioa))
+      SkippedChecks.set(SanitizerKind::Null, true);
+  }
+
+  if (MissingFeatures::buildTypeCheck())
+    llvm_unreachable("NYI");
+
+  // C++ [class.virtual]p12:
+  //   Explicit qualification with the scope operator (5.1) suppresses the
+  //   virtual call mechanism.
+  //
+  // We also don't emit a virtual call if the base expression has a record type
+  // because then we know what the type is.
+  bool useVirtualCall = CanUseVirtualCall && !DevirtualizedMethod;
+
+  if (const auto *dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl)) {
+    llvm_unreachable("NYI");
+  }
+
+  // FIXME: Uses of 'MD' past this point need to be audited. We may need to use
+  // 'CalleeDecl' instead.
+
+  CIRGenCallee Callee;
+  if (useVirtualCall) {
+    Callee = CIRGenCallee::forVirtual(CE, MD, This.getAddress(), Ty);
+  } else {
+    if (SanOpts.has(SanitizerKind::CFINVCall)) {
+      llvm_unreachable("NYI");
+    }
+
+    if (getLangOpts().AppleKext)
+      llvm_unreachable("NYI");
+    else if (!DevirtualizedMethod)
+      // TODO(cir): shouldn't this call getAddrOfCXXStructor instead?
+      Callee = CIRGenCallee::forDirect(CGM.GetAddrOfFunction(MD, Ty),
+                                       GlobalDecl(MD));
+    else {
+      Callee = CIRGenCallee::forDirect(CGM.GetAddrOfFunction(MD, Ty),
+                                       GlobalDecl(MD));
+    }
+  }
+
+  if (MD->isVirtual()) {
+    Address NewThisAddr =
+        CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall(
+            *this, CalleeDecl, This.getAddress(), useVirtualCall);
+    This.setAddress(NewThisAddr);
+  }
+
+  return buildCXXMemberOrOperatorCall(
+      CalleeDecl, Callee, ReturnValue, This.getPointer(),
+      /*ImplicitParam=*/nullptr, QualType(), CE, RtlArgs);
+}
+
+RValue
+CIRGenFunction::buildCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E,
+                                               const CXXMethodDecl *MD,
+                                               ReturnValueSlot ReturnValue) {
+  assert(MD->isInstance() &&
+         "Trying to emit a member call expr on a static method!");
+  return buildCXXMemberOrOperatorMemberCallExpr(
+      E, MD, ReturnValue, /*HasQualifier=*/false, /*Qualifier=*/nullptr,
+      /*IsArrow=*/false, E->getArg(0));
+}
+
+void CIRGenFunction::buildCXXConstructExpr(const CXXConstructExpr *E,
+                                           AggValueSlot Dest) {
+  assert(!Dest.isIgnored() && "Must have a destination!");
+  const auto *CD = E->getConstructor();
+
+  // If we require zero initialization before (or instead of) calling the
+  // constructor, as can be the case with a non-user-provided default
+  // constructor, emit the zero initialization now, unless destination is
+  // already zeroed.
+  if (E->requiresZeroInitialization() && !Dest.isZeroed()) {
+    switch (E->getConstructionKind()) {
+    case CXXConstructionKind::Delegating:
+    case CXXConstructionKind::Complete:
+      buildNullInitialization(getLoc(E->getSourceRange()), Dest.getAddress(),
+                              E->getType());
+      break;
+    case CXXConstructionKind::VirtualBase:
+    case CXXConstructionKind::NonVirtualBase:
+      llvm_unreachable("NYI");
+      break;
+    }
+  }
+
+  // If this is a call to a trivial default constructor:
+  // In LLVM: do nothing.
+  // In CIR: emit as a regular call, other later passes should lower the
+  // ctor call into trivial initialization.
+  // if (CD->isTrivial() && CD->isDefaultConstructor())
+  //  return;
+
+  // Elide the constructor if we're constructing from a temporary
+  if (getLangOpts().ElideConstructors && E->isElidable()) {
+    // FIXME: This only handles the simplest case, where the source object is
+    //        passed directly as the first argument to the constructor. This
+    //        should also handle stepping through implicit casts and conversion
+    //        sequences which involve two steps, with a conversion operator
+    //        follwed by a converting constructor.
+    const auto *SrcObj = E->getArg(0);
+    assert(SrcObj->isTemporaryObject(getContext(), CD->getParent()));
+    assert(
+        getContext().hasSameUnqualifiedType(E->getType(), SrcObj->getType()));
+    buildAggExpr(SrcObj, Dest);
+    return;
+  }
+
+  if (const ArrayType *arrayType = getContext().getAsArrayType(E->getType())) {
+    buildCXXAggrConstructorCall(CD, arrayType, Dest.getAddress(), E,
+                                Dest.isSanitizerChecked());
+  } else {
+    clang::CXXCtorType Type = Ctor_Complete;
+    bool ForVirtualBase = false;
+    bool Delegating = false;
+
+    switch (E->getConstructionKind()) {
+    case CXXConstructionKind::Complete:
+      Type = Ctor_Complete;
+      break;
+    case CXXConstructionKind::Delegating:
+      // We should be emitting a constructor; GlobalDecl will assert this
+      Type = CurGD.getCtorType();
+      Delegating = true;
+      break;
+    case CXXConstructionKind::VirtualBase:
+      ForVirtualBase = true;
+      [[fallthrough]];
+    case CXXConstructionKind::NonVirtualBase:
+      Type = Ctor_Base;
+      break;
+    }
+
+    buildCXXConstructorCall(CD, Type, ForVirtualBase, Delegating, Dest, E);
+  }
+}
+
+namespace {
+/// The parameters to pass to a usual operator delete.
+struct UsualDeleteParams {
+  bool DestroyingDelete = false;
+  bool Size = false;
+  bool Alignment = false;
+};
+} // namespace
+
+// FIXME(cir): this should be shared with LLVM codegen
+static UsualDeleteParams getUsualDeleteParams(const FunctionDecl *FD) {
+  UsualDeleteParams Params;
+
+  const FunctionProtoType *FPT = FD->getType()->castAs<FunctionProtoType>();
+  auto AI = FPT->param_type_begin(), AE = FPT->param_type_end();
+
+  // The first argument is always a void*.
+  ++AI;
+
+  // The next parameter may be a std::destroying_delete_t.
+  if (FD->isDestroyingOperatorDelete()) {
+    Params.DestroyingDelete = true;
+    assert(AI != AE);
+    ++AI;
+  }
+
+  // Figure out what other parameters we should be implicitly passing.
+  if (AI != AE && (*AI)->isIntegerType()) {
+    Params.Size = true;
+    ++AI;
+  }
+
+  if (AI != AE && (*AI)->isAlignValT()) {
+    Params.Alignment = true;
+    ++AI;
+  }
+
+  assert(AI == AE && "unexpected usual deallocation function parameter");
+  return Params;
+}
+
+static mlir::Value buildCXXNewAllocSize(CIRGenFunction &CGF,
+                                        const CXXNewExpr *e,
+                                        unsigned minElements,
+                                        mlir::Value &numElements,
+                                        mlir::Value &sizeWithoutCookie) {
+  QualType type = e->getAllocatedType();
+
+  if (!e->isArray()) {
+    CharUnits typeSize = CGF.getContext().getTypeSizeInChars(type);
+    sizeWithoutCookie = CGF.getBuilder().getConstant(
+        CGF.getLoc(e->getSourceRange()),
+        mlir::cir::IntAttr::get(CGF.SizeTy, typeSize.getQuantity()));
+    return sizeWithoutCookie;
+  }
+
+  llvm_unreachable("NYI");
+}
+
+namespace {
+/// A cleanup to call the given 'operator delete' function upon abnormal
+/// exit from a new expression. Templated on a traits type that deals with
+/// ensuring that the arguments dominate the cleanup if necessary.
+template <typename Traits>
+class CallDeleteDuringNew final : public EHScopeStack::Cleanup {
+  /// Type used to hold llvm::Value*s.
+  typedef typename Traits::ValueTy ValueTy;
+  /// Type used to hold RValues.
+  typedef typename Traits::RValueTy RValueTy;
+  struct PlacementArg {
+    RValueTy ArgValue;
+    QualType ArgType;
+  };
+
+  unsigned NumPlacementArgs : 31;
+  unsigned PassAlignmentToPlacementDelete : 1;
+  const FunctionDecl *OperatorDelete;
+  ValueTy Ptr;
+  ValueTy AllocSize;
+  CharUnits AllocAlign;
+
+  PlacementArg *getPlacementArgs() {
+    return reinterpret_cast<PlacementArg *>(this + 1);
+  }
+
+public:
+  static size_t getExtraSize(size_t NumPlacementArgs) {
+    return NumPlacementArgs * sizeof(PlacementArg);
+  }
+
+  CallDeleteDuringNew(size_t NumPlacementArgs,
+                      const FunctionDecl *OperatorDelete, ValueTy Ptr,
+                      ValueTy AllocSize, bool PassAlignmentToPlacementDelete,
+                      CharUnits AllocAlign)
+      : NumPlacementArgs(NumPlacementArgs),
+        PassAlignmentToPlacementDelete(PassAlignmentToPlacementDelete),
+        OperatorDelete(OperatorDelete), Ptr(Ptr), AllocSize(AllocSize),
+        AllocAlign(AllocAlign) {}
+
+  void setPlacementArg(unsigned I, RValueTy Arg, QualType Type) {
+    assert(I < NumPlacementArgs && "index out of range");
+    getPlacementArgs()[I] = {Arg, Type};
+  }
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    const auto *FPT = OperatorDelete->getType()->castAs<FunctionProtoType>();
+    CallArgList DeleteArgs;
+
+    // The first argument is always a void* (or C* for a destroying operator
+    // delete for class type C).
+    DeleteArgs.add(Traits::get(CGF, Ptr), FPT->getParamType(0));
+
+    // Figure out what other parameters we should be implicitly passing.
+    UsualDeleteParams Params;
+    if (NumPlacementArgs) {
+      // A placement deallocation function is implicitly passed an alignment
+      // if the placement allocation function was, but is never passed a size.
+      Params.Alignment = PassAlignmentToPlacementDelete;
+    } else {
+      // For a non-placement new-expression, 'operator delete' can take a
+      // size and/or an alignment if it has the right parameters.
+      Params = getUsualDeleteParams(OperatorDelete);
+    }
+
+    assert(!Params.DestroyingDelete &&
+           "should not call destroying delete in a new-expression");
+
+    // The second argument can be a std::size_t (for non-placement delete).
+    if (Params.Size)
+      DeleteArgs.add(Traits::get(CGF, AllocSize),
+                     CGF.getContext().getSizeType());
+
+    // The next (second or third) argument can be a std::align_val_t, which
+    // is an enum whose underlying type is std::size_t.
+    // FIXME: Use the right type as the parameter type. Note that in a call
+    // to operator delete(size_t, ...), we may not have it available.
+    if (Params.Alignment) {
+      llvm_unreachable("NYI");
+    }
+
+    // Pass the rest of the arguments, which must match exactly.
+    for (unsigned I = 0; I != NumPlacementArgs; ++I) {
+      auto Arg = getPlacementArgs()[I];
+      DeleteArgs.add(Traits::get(CGF, Arg.ArgValue), Arg.ArgType);
+    }
+
+    // Call 'operator delete'.
+    buildNewDeleteCall(CGF, OperatorDelete, FPT, DeleteArgs);
+  }
+};
+} // namespace
+
+/// Enter a cleanup to call 'operator delete' if the initializer in a
+/// new-expression throws.
+static void EnterNewDeleteCleanup(CIRGenFunction &CGF, const CXXNewExpr *E,
+                                  Address NewPtr, mlir::Value AllocSize,
+                                  CharUnits AllocAlign,
+                                  const CallArgList &NewArgs) {
+  unsigned NumNonPlacementArgs = E->passAlignment() ? 2 : 1;
+
+  // If we're not inside a conditional branch, then the cleanup will
+  // dominate and we can do the easier (and more efficient) thing.
+  if (!CGF.isInConditionalBranch()) {
+    struct DirectCleanupTraits {
+      typedef mlir::Value ValueTy;
+      typedef RValue RValueTy;
+      static RValue get(CIRGenFunction &, ValueTy V) { return RValue::get(V); }
+      static RValue get(CIRGenFunction &, RValueTy V) { return V; }
+    };
+
+    typedef CallDeleteDuringNew<DirectCleanupTraits> DirectCleanup;
+
+    DirectCleanup *Cleanup = CGF.EHStack.pushCleanupWithExtra<DirectCleanup>(
+        EHCleanup, E->getNumPlacementArgs(), E->getOperatorDelete(),
+        NewPtr.getPointer(), AllocSize, E->passAlignment(), AllocAlign);
+    for (unsigned I = 0, N = E->getNumPlacementArgs(); I != N; ++I) {
+      auto &Arg = NewArgs[I + NumNonPlacementArgs];
+      Cleanup->setPlacementArg(
+          I, Arg.getRValue(CGF, CGF.getLoc(E->getSourceRange())), Arg.Ty);
+    }
+
+    return;
+  }
+
+  // Otherwise, we need to save all this stuff.
+  DominatingValue<RValue>::saved_type SavedNewPtr =
+      DominatingValue<RValue>::save(CGF, RValue::get(NewPtr.getPointer()));
+  DominatingValue<RValue>::saved_type SavedAllocSize =
+      DominatingValue<RValue>::save(CGF, RValue::get(AllocSize));
+
+  struct ConditionalCleanupTraits {
+    typedef DominatingValue<RValue>::saved_type ValueTy;
+    typedef DominatingValue<RValue>::saved_type RValueTy;
+    static RValue get(CIRGenFunction &CGF, ValueTy V) { return V.restore(CGF); }
+  };
+  typedef CallDeleteDuringNew<ConditionalCleanupTraits> ConditionalCleanup;
+
+  ConditionalCleanup *Cleanup =
+      CGF.EHStack.pushCleanupWithExtra<ConditionalCleanup>(
+          EHCleanup, E->getNumPlacementArgs(), E->getOperatorDelete(),
+          SavedNewPtr, SavedAllocSize, E->passAlignment(), AllocAlign);
+  for (unsigned I = 0, N = E->getNumPlacementArgs(); I != N; ++I) {
+    auto &Arg = NewArgs[I + NumNonPlacementArgs];
+    Cleanup->setPlacementArg(
+        I,
+        DominatingValue<RValue>::save(
+            CGF, Arg.getRValue(CGF, CGF.getLoc(E->getSourceRange()))),
+        Arg.Ty);
+  }
+
+  CGF.initFullExprCleanup();
+}
+
+static void StoreAnyExprIntoOneUnit(CIRGenFunction &CGF, const Expr *Init,
+                                    QualType AllocType, Address NewPtr,
+                                    AggValueSlot::Overlap_t MayOverlap) {
+  // FIXME: Refactor with buildExprAsInit.
+  switch (CGF.getEvaluationKind(AllocType)) {
+  case TEK_Scalar:
+    CGF.buildScalarInit(Init, CGF.getLoc(Init->getSourceRange()),
+                        CGF.makeAddrLValue(NewPtr, AllocType), false);
+    return;
+  case TEK_Complex:
+    llvm_unreachable("NYI");
+    return;
+  case TEK_Aggregate: {
+    AggValueSlot Slot = AggValueSlot::forAddr(
+        NewPtr, AllocType.getQualifiers(), AggValueSlot::IsDestructed,
+        AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased,
+        MayOverlap, AggValueSlot::IsNotZeroed,
+        AggValueSlot::IsSanitizerChecked);
+    CGF.buildAggExpr(Init, Slot);
+    return;
+  }
+  }
+  llvm_unreachable("bad evaluation kind");
+}
+
+static void buildNewInitializer(CIRGenFunction &CGF, const CXXNewExpr *E,
+                                QualType ElementType, mlir::Type ElementTy,
+                                Address NewPtr, mlir::Value NumElements,
+                                mlir::Value AllocSizeWithoutCookie) {
+  assert(!MissingFeatures::generateDebugInfo());
+  if (E->isArray()) {
+    llvm_unreachable("NYI");
+  } else if (const Expr *Init = E->getInitializer()) {
+    StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr,
+                            AggValueSlot::DoesNotOverlap);
+  }
+}
+
+static CharUnits CalculateCookiePadding(CIRGenFunction &CGF,
+                                        const CXXNewExpr *E) {
+  if (!E->isArray())
+    return CharUnits::Zero();
+
+  // No cookie is required if the operator new[] being used is the
+  // reserved placement operator new[].
+  if (E->getOperatorNew()->isReservedGlobalPlacementOperator())
+    return CharUnits::Zero();
+
+  llvm_unreachable("NYI");
+  // return CGF.CGM.getCXXABI().GetArrayCookieSize(E);
+}
+
+namespace {
+/// Calls the given 'operator delete' on a single object.
+struct CallObjectDelete final : EHScopeStack::Cleanup {
+  mlir::Value Ptr;
+  const FunctionDecl *OperatorDelete;
+  QualType ElementType;
+
+  CallObjectDelete(mlir::Value Ptr, const FunctionDecl *OperatorDelete,
+                   QualType ElementType)
+      : Ptr(Ptr), OperatorDelete(OperatorDelete), ElementType(ElementType) {}
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    CGF.buildDeleteCall(OperatorDelete, Ptr, ElementType);
+  }
+};
+} // namespace
+
+/// Emit the code for deleting a single object.
+/// \return \c true if we started emitting UnconditionalDeleteBlock, \c false
+/// if not.
+static bool EmitObjectDelete(CIRGenFunction &CGF, const CXXDeleteExpr *DE,
+                             Address Ptr, QualType ElementType) {
+  // C++11 [expr.delete]p3:
+  //   If the static type of the object to be deleted is different from its
+  //   dynamic type, the static type shall be a base class of the dynamic type
+  //   of the object to be deleted and the static type shall have a virtual
+  //   destructor or the behavior is undefined.
+  CGF.buildTypeCheck(CIRGenFunction::TCK_MemberCall, DE->getExprLoc(),
+                     Ptr.getPointer(), ElementType);
+
+  const FunctionDecl *OperatorDelete = DE->getOperatorDelete();
+  assert(!OperatorDelete->isDestroyingOperatorDelete());
+
+  // Find the destructor for the type, if applicable.  If the
+  // destructor is virtual, we'll just emit the vcall and return.
+  const CXXDestructorDecl *Dtor = nullptr;
+  if (const RecordType *RT = ElementType->getAs<RecordType>()) {
+    CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
+    if (RD->hasDefinition() && !RD->hasTrivialDestructor()) {
+      Dtor = RD->getDestructor();
+
+      if (Dtor->isVirtual()) {
+        bool UseVirtualCall = true;
+        const Expr *Base = DE->getArgument();
+        if (auto *DevirtualizedDtor = dyn_cast_or_null<const CXXDestructorDecl>(
+                Dtor->getDevirtualizedMethod(
+                    Base, CGF.CGM.getLangOpts().AppleKext))) {
+          UseVirtualCall = false;
+          const CXXRecordDecl *DevirtualizedClass =
+              DevirtualizedDtor->getParent();
+          if (declaresSameEntity(getCXXRecord(Base), DevirtualizedClass)) {
+            // Devirtualized to the class of the base type (the type of the
+            // whole expression).
+            Dtor = DevirtualizedDtor;
+          } else {
+            // Devirtualized to some other type. Would need to cast the this
+            // pointer to that type but we don't have support for that yet, so
+            // do a virtual call. FIXME: handle the case where it is
+            // devirtualized to the derived type (the type of the inner
+            // expression) as in EmitCXXMemberOrOperatorMemberCallExpr.
+            UseVirtualCall = true;
+          }
+        }
+        if (UseVirtualCall) {
+          llvm_unreachable("NYI");
+          return false;
+        }
+      }
+    }
+  }
+
+  // Make sure that we call delete even if the dtor throws.
+  // This doesn't have to a conditional cleanup because we're going
+  // to pop it off in a second.
+  CGF.EHStack.pushCleanup<CallObjectDelete>(
+      NormalAndEHCleanup, Ptr.getPointer(), OperatorDelete, ElementType);
+
+  if (Dtor) {
+    llvm_unreachable("NYI");
+  } else if (auto Lifetime = ElementType.getObjCLifetime()) {
+    switch (Lifetime) {
+    case Qualifiers::OCL_None:
+    case Qualifiers::OCL_ExplicitNone:
+    case Qualifiers::OCL_Autoreleasing:
+      break;
+
+    case Qualifiers::OCL_Strong:
+      llvm_unreachable("NYI");
+      break;
+
+    case Qualifiers::OCL_Weak:
+      llvm_unreachable("NYI");
+      break;
+    }
+  }
+
+  // In traditional LLVM codegen null checks are emitted to save a delete call.
+  // In CIR we optimize for size by default, the null check should be added into
+  // this function callers.
+  assert(!MissingFeatures::emitNullCheckForDeleteCalls());
+
+  CGF.PopCleanupBlock();
+  return false;
+}
+
+void CIRGenFunction::buildCXXDeleteExpr(const CXXDeleteExpr *E) {
+  const Expr *Arg = E->getArgument();
+  Address Ptr = buildPointerWithAlignment(Arg);
+
+  // Null check the pointer.
+  //
+  // We could avoid this null check if we can determine that the object
+  // destruction is trivial and doesn't require an array cookie; we can
+  // unconditionally perform the operator delete call in that case. For now, we
+  // assume that deleted pointers are null rarely enough that it's better to
+  // keep the branch. This might be worth revisiting for a -O0 code size win.
+  //
+  // CIR note: emit the code size friendly by default for now, such as mentioned
+  // in `EmitObjectDelete`.
+  assert(!MissingFeatures::emitNullCheckForDeleteCalls());
+  QualType DeleteTy = E->getDestroyedType();
+
+  // A destroying operator delete overrides the entire operation of the
+  // delete expression.
+  if (E->getOperatorDelete()->isDestroyingOperatorDelete()) {
+    llvm_unreachable("NYI");
+    return;
+  }
+
+  // We might be deleting a pointer to array.  If so, GEP down to the
+  // first non-array element.
+  // (this assumes that A(*)[3][7] is converted to [3 x [7 x %A]]*)
+  if (DeleteTy->isConstantArrayType()) {
+    llvm_unreachable("NYI");
+  }
+
+  assert(convertTypeForMem(DeleteTy) == Ptr.getElementType());
+
+  if (E->isArrayForm()) {
+    llvm_unreachable("NYI");
+  } else {
+    (void)EmitObjectDelete(*this, E, Ptr, DeleteTy);
+  }
+}
+
+mlir::Value CIRGenFunction::buildCXXNewExpr(const CXXNewExpr *E) {
+  // The element type being allocated.
+  QualType allocType = getContext().getBaseElementType(E->getAllocatedType());
+
+  // 1. Build a call to the allocation function.
+  FunctionDecl *allocator = E->getOperatorNew();
+
+  // If there is a brace-initializer, cannot allocate fewer elements than inits.
+  unsigned minElements = 0;
+  if (E->isArray() && E->hasInitializer()) {
+    const InitListExpr *ILE = dyn_cast<InitListExpr>(E->getInitializer());
+    if (ILE && ILE->isStringLiteralInit())
+      minElements =
+          cast<ConstantArrayType>(ILE->getType()->getAsArrayTypeUnsafe())
+              ->getSize()
+              .getZExtValue();
+    else if (ILE)
+      minElements = ILE->getNumInits();
+  }
+
+  mlir::Value numElements = nullptr;
+  mlir::Value allocSizeWithoutCookie = nullptr;
+  mlir::Value allocSize = buildCXXNewAllocSize(
+      *this, E, minElements, numElements, allocSizeWithoutCookie);
+  CharUnits allocAlign = getContext().getTypeAlignInChars(allocType);
+
+  // Emit the allocation call.
+  Address allocation = Address::invalid();
+  CallArgList allocatorArgs;
+  if (allocator->isReservedGlobalPlacementOperator()) {
+    // If the allocator is a global placement operator, just
+    // "inline" it directly.
+    assert(E->getNumPlacementArgs() == 1);
+    const Expr *arg = *E->placement_arguments().begin();
+
+    LValueBaseInfo BaseInfo;
+    allocation = buildPointerWithAlignment(arg, &BaseInfo);
+
+    // The pointer expression will, in many cases, be an opaque void*.
+    // In these cases, discard the computed alignment and use the
+    // formal alignment of the allocated type.
+    if (BaseInfo.getAlignmentSource() != AlignmentSource::Decl)
+      allocation = allocation.withAlignment(allocAlign);
+
+    // Set up allocatorArgs for the call to operator delete if it's not
+    // the reserved global operator.
+    if (E->getOperatorDelete() &&
+        !E->getOperatorDelete()->isReservedGlobalPlacementOperator()) {
+      allocatorArgs.add(RValue::get(allocSize), getContext().getSizeType());
+      allocatorArgs.add(RValue::get(allocation.getPointer()), arg->getType());
+    }
+  } else {
+    const FunctionProtoType *allocatorType =
+        allocator->getType()->castAs<FunctionProtoType>();
+    unsigned ParamsToSkip = 0;
+
+    // The allocation size is the first argument.
+    QualType sizeType = getContext().getSizeType();
+    allocatorArgs.add(RValue::get(allocSize), sizeType);
+    ++ParamsToSkip;
+
+    if (allocSize != allocSizeWithoutCookie) {
+      llvm_unreachable("NYI");
+    }
+
+    // The allocation alignment may be passed as the second argument.
+    if (E->passAlignment()) {
+      llvm_unreachable("NYI");
+    }
+
+    // FIXME: Why do we not pass a CalleeDecl here?
+    buildCallArgs(allocatorArgs, allocatorType, E->placement_arguments(),
+                  /*AC*/
+                  AbstractCallee(),
+                  /*ParamsToSkip*/
+                  ParamsToSkip);
+    RValue RV =
+        buildNewDeleteCall(*this, allocator, allocatorType, allocatorArgs);
+
+    // Set !heapallocsite metadata on the call to operator new.
+    assert(!MissingFeatures::generateDebugInfo());
+
+    // If this was a call to a global replaceable allocation function that does
+    // not take an alignment argument, the allocator is known to produce storage
+    // that's suitably aligned for any object that fits, up to a known
+    // threshold. Otherwise assume it's suitably aligned for the allocated type.
+    CharUnits allocationAlign = allocAlign;
+    if (!E->passAlignment() &&
+        allocator->isReplaceableGlobalAllocationFunction()) {
+      auto &Target = CGM.getASTContext().getTargetInfo();
+      unsigned AllocatorAlign = llvm::bit_floor(std::min<uint64_t>(
+          Target.getNewAlign(), getContext().getTypeSize(allocType)));
+      allocationAlign = std::max(
+          allocationAlign, getContext().toCharUnitsFromBits(AllocatorAlign));
+    }
+
+    allocation = Address(RV.getScalarVal(), UInt8Ty, allocationAlign);
+  }
+
+  // Emit a null check on the allocation result if the allocation
+  // function is allowed to return null (because it has a non-throwing
+  // exception spec or is the reserved placement new) and we have an
+  // interesting initializer will be running sanitizers on the initialization.
+  bool nullCheck = E->shouldNullCheckAllocation() &&
+                   (!allocType.isPODType(getContext()) || E->hasInitializer() ||
+                    sanitizePerformTypeCheck());
+
+  // The null-check means that the initializer is conditionally
+  // evaluated.
+  mlir::OpBuilder::InsertPoint ifBody, postIfBody, preIfBody;
+  mlir::Value nullCmpResult;
+  mlir::Location loc = getLoc(E->getSourceRange());
+
+  if (nullCheck) {
+    mlir::Value nullPtr =
+        builder.getNullPtr(allocation.getPointer().getType(), loc);
+    nullCmpResult = builder.createCompare(loc, mlir::cir::CmpOpKind::ne,
+                                          allocation.getPointer(), nullPtr);
+    preIfBody = builder.saveInsertionPoint();
+    builder.create<mlir::cir::IfOp>(loc, nullCmpResult,
+                                    /*withElseRegion=*/false,
+                                    [&](mlir::OpBuilder &, mlir::Location) {
+                                      ifBody = builder.saveInsertionPoint();
+                                    });
+    postIfBody = builder.saveInsertionPoint();
+  }
+
+  // Make sure the conditional evaluation uses the insertion
+  // point right before the if check.
+  mlir::OpBuilder::InsertPoint ip = builder.saveInsertionPoint();
+  if (ifBody.isSet()) {
+    builder.setInsertionPointAfterValue(nullCmpResult);
+    ip = builder.saveInsertionPoint();
+  }
+  ConditionalEvaluation conditional(ip);
+
+  // All the actual work to be done should be placed inside the IfOp above,
+  // so change the insertion point over there.
+  if (ifBody.isSet()) {
+    conditional.begin(*this);
+    builder.restoreInsertionPoint(ifBody);
+  }
+
+  // If there's an operator delete, enter a cleanup to call it if an
+  // exception is thrown.
+  EHScopeStack::stable_iterator operatorDeleteCleanup;
+  [[maybe_unused]] mlir::Operation *cleanupDominator = nullptr;
+  if (E->getOperatorDelete() &&
+      !E->getOperatorDelete()->isReservedGlobalPlacementOperator()) {
+    EnterNewDeleteCleanup(*this, E, allocation, allocSize, allocAlign,
+                          allocatorArgs);
+    operatorDeleteCleanup = EHStack.stable_begin();
+    cleanupDominator =
+        builder.create<mlir::cir::UnreachableOp>(getLoc(E->getSourceRange()))
+            .getOperation();
+  }
+
+  assert((allocSize == allocSizeWithoutCookie) ==
+         CalculateCookiePadding(*this, E).isZero());
+  if (allocSize != allocSizeWithoutCookie) {
+    llvm_unreachable("NYI");
+  }
+
+  mlir::Type elementTy;
+  Address result = Address::invalid();
+  auto createCast = [&]() {
+    elementTy = getTypes().convertTypeForMem(allocType);
+    result = builder.createElementBitCast(getLoc(E->getSourceRange()),
+                                          allocation, elementTy);
+  };
+
+  if (preIfBody.isSet()) {
+    // Generate any cast before the if condition check on the null because the
+    // result can be used after the if body and should dominate all potential
+    // uses.
+    mlir::OpBuilder::InsertionGuard guard(builder);
+    assert(nullCmpResult && "expected");
+    builder.setInsertionPointAfterValue(nullCmpResult);
+    createCast();
+  } else {
+    createCast();
+  }
+
+  // Passing pointer through launder.invariant.group to avoid propagation of
+  // vptrs information which may be included in previous type.
+  // To not break LTO with different optimizations levels, we do it regardless
+  // of optimization level.
+  if (CGM.getCodeGenOpts().StrictVTablePointers &&
+      allocator->isReservedGlobalPlacementOperator())
+    llvm_unreachable("NYI");
+
+  // Emit sanitizer checks for pointer value now, so that in the case of an
+  // array it was checked only once and not at each constructor call. We may
+  // have already checked that the pointer is non-null.
+  // FIXME: If we have an array cookie and a potentially-throwing allocator,
+  // we'll null check the wrong pointer here.
+  SanitizerSet SkippedChecks;
+  SkippedChecks.set(SanitizerKind::Null, nullCheck);
+  buildTypeCheck(CIRGenFunction::TCK_ConstructorCall,
+                 E->getAllocatedTypeSourceInfo()->getTypeLoc().getBeginLoc(),
+                 result.getPointer(), allocType, result.getAlignment(),
+                 SkippedChecks, numElements);
+
+  buildNewInitializer(*this, E, allocType, elementTy, result, numElements,
+                      allocSizeWithoutCookie);
+  auto resultPtr = result.getPointer();
+  if (E->isArray()) {
+    llvm_unreachable("NYI");
+  }
+
+  // Deactivate the 'operator delete' cleanup if we finished
+  // initialization.
+  if (operatorDeleteCleanup.isValid()) {
+    // FIXME: enable cleanupDominator above before implementing this.
+    DeactivateCleanupBlock(operatorDeleteCleanup, cleanupDominator);
+    if (cleanupDominator)
+      cleanupDominator->erase();
+  }
+
+  if (nullCheck) {
+    conditional.end(*this);
+    // resultPtr is already updated in the first null check phase.
+
+    // Reset insertion point to resume back to post ifOp.
+    if (postIfBody.isSet()) {
+      builder.create<mlir::cir::YieldOp>(loc);
+      builder.restoreInsertionPoint(postIfBody);
+    }
+  }
+
+  return resultPtr;
+}
+
+RValue CIRGenFunction::buildCXXDestructorCall(GlobalDecl Dtor,
+                                              const CIRGenCallee &Callee,
+                                              mlir::Value This, QualType ThisTy,
+                                              mlir::Value ImplicitParam,
+                                              QualType ImplicitParamTy,
+                                              const CallExpr *CE) {
+  const CXXMethodDecl *DtorDecl = cast<CXXMethodDecl>(Dtor.getDecl());
+
+  assert(!ThisTy.isNull());
+  assert(ThisTy->getAsCXXRecordDecl() == DtorDecl->getParent() &&
+         "Pointer/Object mixup");
+
+  LangAS SrcAS = ThisTy.getAddressSpace();
+  LangAS DstAS = DtorDecl->getMethodQualifiers().getAddressSpace();
+  if (SrcAS != DstAS) {
+    llvm_unreachable("NYI");
+  }
+
+  CallArgList Args;
+  commonBuildCXXMemberOrOperatorCall(*this, DtorDecl, This, ImplicitParam,
+                                     ImplicitParamTy, CE, Args, nullptr);
+  assert((CE || Dtor.getDecl()) && "expected source location provider");
+  return buildCall(CGM.getTypes().arrangeCXXStructorDeclaration(Dtor), Callee,
+                   ReturnValueSlot(), Args, nullptr, CE && CE == MustTailCall,
+                   CE ? getLoc(CE->getExprLoc())
+                      : getLoc(Dtor.getDecl()->getSourceRange()));
+}
+
+/// Emit a call to an operator new or operator delete function, as implicitly
+/// created by new-expressions and delete-expressions.
+static RValue buildNewDeleteCall(CIRGenFunction &CGF,
+                                 const FunctionDecl *CalleeDecl,
+                                 const FunctionProtoType *CalleeType,
+                                 const CallArgList &Args) {
+  mlir::cir::CIRCallOpInterface CallOrTryCall;
+  auto CalleePtr = CGF.CGM.GetAddrOfFunction(CalleeDecl);
+  CIRGenCallee Callee =
+      CIRGenCallee::forDirect(CalleePtr, GlobalDecl(CalleeDecl));
+  RValue RV = CGF.buildCall(CGF.CGM.getTypes().arrangeFreeFunctionCall(
+                                Args, CalleeType, /*ChainCall=*/false),
+                            Callee, ReturnValueSlot(), Args, &CallOrTryCall);
+
+  /// C++1y [expr.new]p10:
+  ///   [In a new-expression,] an implementation is allowed to omit a call
+  ///   to a replaceable global allocation function.
+  ///
+  /// We model such elidable calls with the 'builtin' attribute.
+  assert(!MissingFeatures::attributeBuiltin());
+  return RV;
+}
+
+void CIRGenFunction::buildDeleteCall(const FunctionDecl *DeleteFD,
+                                     mlir::Value Ptr, QualType DeleteTy,
+                                     mlir::Value NumElements,
+                                     CharUnits CookieSize) {
+  assert((!NumElements && CookieSize.isZero()) ||
+         DeleteFD->getOverloadedOperator() == OO_Array_Delete);
+
+  const auto *DeleteFTy = DeleteFD->getType()->castAs<FunctionProtoType>();
+  CallArgList DeleteArgs;
+
+  auto Params = getUsualDeleteParams(DeleteFD);
+  auto ParamTypeIt = DeleteFTy->param_type_begin();
+
+  // Pass the pointer itself.
+  QualType ArgTy = *ParamTypeIt++;
+  mlir::Value DeletePtr =
+      builder.createBitcast(Ptr.getLoc(), Ptr, ConvertType(ArgTy));
+  DeleteArgs.add(RValue::get(DeletePtr), ArgTy);
+
+  // Pass the std::destroying_delete tag if present.
+  mlir::Value DestroyingDeleteTag{};
+  if (Params.DestroyingDelete) {
+    llvm_unreachable("NYI");
+  }
+
+  // Pass the size if the delete function has a size_t parameter.
+  if (Params.Size) {
+    QualType SizeType = *ParamTypeIt++;
+    CharUnits DeleteTypeSize = getContext().getTypeSizeInChars(DeleteTy);
+    assert(SizeTy && "expected mlir::cir::IntType");
+    auto Size = builder.getConstInt(*currSrcLoc, ConvertType(SizeType),
+                                    DeleteTypeSize.getQuantity());
+
+    // For array new, multiply by the number of elements.
+    if (NumElements) {
+      // Uncomment upon adding testcase.
+      // Size = builder.createMul(Size, NumElements);
+      llvm_unreachable("NYI");
+    }
+
+    // If there is a cookie, add the cookie size.
+    if (!CookieSize.isZero()) {
+      // Uncomment upon adding testcase.
+      // builder.createBinop(
+      //     Size, mlir::cir::BinOpKind::Add,
+      //     builder.getConstInt(*currSrcLoc, SizeTy,
+      //     CookieSize.getQuantity()));
+      llvm_unreachable("NYI");
+    }
+
+    DeleteArgs.add(RValue::get(Size), SizeType);
+  }
+
+  // Pass the alignment if the delete function has an align_val_t parameter.
+  if (Params.Alignment) {
+    llvm_unreachable("NYI");
+  }
+
+  assert(ParamTypeIt == DeleteFTy->param_type_end() &&
+         "unknown parameter to usual delete function");
+
+  // Emit the call to delete.
+  buildNewDeleteCall(*this, DeleteFD, DeleteFTy, DeleteArgs);
+
+  // If call argument lowering didn't use the destroying_delete_t alloca,
+  // remove it again.
+  if (DestroyingDeleteTag && DestroyingDeleteTag.use_empty()) {
+    llvm_unreachable("NYI"); // DestroyingDeleteTag->eraseFromParent();
+  }
+}
+
+static mlir::Value buildDynamicCastToNull(CIRGenFunction &CGF,
+                                          mlir::Location Loc, QualType DestTy) {
+  mlir::Type DestCIRTy = CGF.ConvertType(DestTy);
+  assert(mlir::isa<mlir::cir::PointerType>(DestCIRTy) &&
+         "result of dynamic_cast should be a ptr");
+
+  mlir::Value NullPtrValue = CGF.getBuilder().getNullPtr(DestCIRTy, Loc);
+
+  if (!DestTy->isPointerType()) {
+    auto *CurrentRegion = CGF.getBuilder().getBlock()->getParent();
+    /// C++ [expr.dynamic.cast]p9:
+    ///   A failed cast to reference type throws std::bad_cast
+    CGF.CGM.getCXXABI().buildBadCastCall(CGF, Loc);
+
+    // The call to bad_cast will terminate the current block. Create a new block
+    // to hold any follow up code.
+    CGF.getBuilder().createBlock(CurrentRegion, CurrentRegion->end());
+  }
+
+  return NullPtrValue;
+}
+
+mlir::Value CIRGenFunction::buildDynamicCast(Address ThisAddr,
+                                             const CXXDynamicCastExpr *DCE) {
+  auto loc = getLoc(DCE->getSourceRange());
+
+  CGM.buildExplicitCastExprType(DCE, this);
+  QualType destTy = DCE->getTypeAsWritten();
+  QualType srcTy = DCE->getSubExpr()->getType();
+
+  // C++ [expr.dynamic.cast]p7:
+  //   If T is "pointer to cv void," then the result is a pointer to the most
+  //   derived object pointed to by v.
+  bool isDynCastToVoid = destTy->isVoidPointerType();
+  bool isRefCast = destTy->isReferenceType();
+
+  QualType srcRecordTy;
+  QualType destRecordTy;
+  if (isDynCastToVoid) {
+    srcRecordTy = srcTy->getPointeeType();
+    // No destRecordTy.
+  } else if (const PointerType *DestPTy = destTy->getAs<PointerType>()) {
+    srcRecordTy = srcTy->castAs<PointerType>()->getPointeeType();
+    destRecordTy = DestPTy->getPointeeType();
+  } else {
+    srcRecordTy = srcTy;
+    destRecordTy = destTy->castAs<ReferenceType>()->getPointeeType();
+  }
+
+  assert(srcRecordTy->isRecordType() && "source type must be a record type!");
+  buildTypeCheck(TCK_DynamicOperation, DCE->getExprLoc(), ThisAddr.getPointer(),
+                 srcRecordTy);
+
+  if (DCE->isAlwaysNull())
+    return buildDynamicCastToNull(*this, loc, destTy);
+
+  auto destCirTy = mlir::cast<mlir::cir::PointerType>(ConvertType(destTy));
+  return CGM.getCXXABI().buildDynamicCast(*this, loc, srcRecordTy, destRecordTy,
+                                          destCirTy, isRefCast, ThisAddr);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
new file mode 100644
index 000000000000..f84f6157a61f
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
@@ -0,0 +1,999 @@
+#include "CIRGenBuilder.h"
+#include "CIRGenCstEmitter.h"
+#include "CIRGenFunction.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/CIR/Interfaces/CIRFPTypeInterface.h"
+#include "clang/CIR/MissingFeatures.h"
+
+#include "mlir/IR/Location.h"
+#include "mlir/IR/Value.h"
+#include "clang/AST/StmtVisitor.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace cir;
+using namespace clang;
+
+namespace {
+
+class ComplexExprEmitter : public StmtVisitor<ComplexExprEmitter, mlir::Value> {
+  CIRGenFunction &CGF;
+  CIRGenBuilderTy &Builder;
+  bool FPHasBeenPromoted;
+
+public:
+  explicit ComplexExprEmitter(CIRGenFunction &cgf)
+      : CGF(cgf), Builder(cgf.getBuilder()), FPHasBeenPromoted(false) {}
+
+  //===--------------------------------------------------------------------===//
+  //                               Utilities
+  //===--------------------------------------------------------------------===//
+
+  /// Given an expression with complex type that represents a value l-value,
+  /// this method emits the address of the l-value, then loads and returns the
+  /// result.
+  mlir::Value buildLoadOfLValue(const Expr *E) {
+    return buildLoadOfLValue(CGF.buildLValue(E), E->getExprLoc());
+  }
+
+  mlir::Value buildLoadOfLValue(LValue LV, SourceLocation Loc);
+
+  /// EmitStoreOfComplex - Store the specified real/imag parts into the
+  /// specified value pointer.
+  void buildStoreOfComplex(mlir::Location Loc, mlir::Value Val, LValue LV,
+                           bool isInit);
+
+  /// Emit a cast from complex value Val to DestType.
+  mlir::Value buildComplexToComplexCast(mlir::Value Val, QualType SrcType,
+                                        QualType DestType, SourceLocation Loc);
+  /// Emit a cast from scalar value Val to DestType.
+  mlir::Value buildScalarToComplexCast(mlir::Value Val, QualType SrcType,
+                                       QualType DestType, SourceLocation Loc);
+
+  //===--------------------------------------------------------------------===//
+  //                            Visitor Methods
+  //===--------------------------------------------------------------------===//
+
+  mlir::Value Visit(Expr *E) {
+    assert(!MissingFeatures::generateDebugInfo());
+    return StmtVisitor<ComplexExprEmitter, mlir::Value>::Visit(E);
+  }
+
+  mlir::Value VisitStmt(Stmt *S) {
+    S->dump(llvm::errs(), CGF.getContext());
+    llvm_unreachable("Stmt can't have complex result type!");
+  }
+
+  mlir::Value VisitExpr(Expr *S) { llvm_unreachable("not supported"); }
+  mlir::Value VisitConstantExpr(ConstantExpr *E) {
+    if (auto Result = ConstantEmitter(CGF).tryEmitConstantExpr(E))
+      return Builder.getConstant(CGF.getLoc(E->getSourceRange()),
+                                 mlir::cast<mlir::TypedAttr>(Result));
+    return Visit(E->getSubExpr());
+  }
+  mlir::Value VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr()); }
+  mlir::Value VisitGenericSelectionExpr(GenericSelectionExpr *GE) {
+    return Visit(GE->getResultExpr());
+  }
+  mlir::Value VisitImaginaryLiteral(const ImaginaryLiteral *IL);
+  mlir::Value
+  VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *PE) {
+    return Visit(PE->getReplacement());
+  }
+  mlir::Value VisitCoawaitExpr(CoawaitExpr *S) { llvm_unreachable("NYI"); }
+  mlir::Value VisitCoyieldExpr(CoyieldExpr *S) { llvm_unreachable("NYI"); }
+  mlir::Value VisitUnaryCoawait(const UnaryOperator *E) {
+    return Visit(E->getSubExpr());
+  }
+
+  mlir::Value emitConstant(const CIRGenFunction::ConstantEmission &Constant,
+                           Expr *E) {
+    assert(Constant && "not a constant");
+    if (Constant.isReference())
+      return buildLoadOfLValue(Constant.getReferenceLValue(CGF, E),
+                               E->getExprLoc());
+
+    auto valueAttr = Constant.getValue();
+    return Builder.getConstant(CGF.getLoc(E->getSourceRange()), valueAttr);
+  }
+
+  // l-values.
+  mlir::Value VisitDeclRefExpr(DeclRefExpr *E) {
+    if (CIRGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E))
+      return emitConstant(Constant, E);
+    return buildLoadOfLValue(E);
+  }
+  mlir::Value VisitObjCIvarRefExpr(ObjCIvarRefExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitObjCMessageExpr(ObjCMessageExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitArraySubscriptExpr(Expr *E) { llvm_unreachable("NYI"); }
+  mlir::Value VisitMemberExpr(MemberExpr *ME) { llvm_unreachable("NYI"); }
+  mlir::Value VisitOpaqueValueExpr(OpaqueValueExpr *E) {
+    llvm_unreachable("NYI");
+  }
+
+  mlir::Value VisitPseudoObjectExpr(PseudoObjectExpr *E) {
+    llvm_unreachable("NYI");
+  }
+
+  // FIXME: CompoundLiteralExpr
+
+  mlir::Value buildCast(CastKind CK, Expr *Op, QualType DestTy);
+  mlir::Value VisitImplicitCastExpr(ImplicitCastExpr *E) {
+    // Unlike for scalars, we don't have to worry about function->ptr demotion
+    // here.
+    if (E->changesVolatileQualification())
+      return buildLoadOfLValue(E);
+    return buildCast(E->getCastKind(), E->getSubExpr(), E->getType());
+  }
+  mlir::Value VisitCastExpr(CastExpr *E);
+  mlir::Value VisitCallExpr(const CallExpr *E);
+  mlir::Value VisitStmtExpr(const StmtExpr *E) { llvm_unreachable("NYI"); }
+
+  // Operators.
+  mlir::Value VisitPrePostIncDec(const UnaryOperator *E, bool isInc,
+                                 bool isPre);
+  mlir::Value VisitUnaryPostDec(const UnaryOperator *E) {
+    return VisitPrePostIncDec(E, false, false);
+  }
+  mlir::Value VisitUnaryPostInc(const UnaryOperator *E) {
+    return VisitPrePostIncDec(E, true, false);
+  }
+  mlir::Value VisitUnaryPreDec(const UnaryOperator *E) {
+    return VisitPrePostIncDec(E, false, true);
+  }
+  mlir::Value VisitUnaryPreInc(const UnaryOperator *E) {
+    return VisitPrePostIncDec(E, true, true);
+  }
+  mlir::Value VisitUnaryDeref(const Expr *E) { llvm_unreachable("NYI"); }
+
+  mlir::Value VisitUnaryPlus(const UnaryOperator *E,
+                             QualType PromotionType = QualType());
+  mlir::Value VisitPlus(const UnaryOperator *E, QualType PromotionType);
+  mlir::Value VisitUnaryMinus(const UnaryOperator *E,
+                              QualType PromotionType = QualType());
+  mlir::Value VisitMinus(const UnaryOperator *E, QualType PromotionType);
+  mlir::Value VisitUnaryNot(const UnaryOperator *E);
+  // LNot,Real,Imag never return complex.
+  mlir::Value VisitUnaryExtension(const UnaryOperator *E) {
+    return Visit(E->getSubExpr());
+  }
+  mlir::Value VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitExprWithCleanups(ExprWithCleanups *E) {
+    CIRGenFunction::RunCleanupsScope Scope(CGF);
+    mlir::Value V = Visit(E->getSubExpr());
+    // Defend against dominance problems caused by jumps out of expression
+    // evaluation through the shared cleanup block.
+    Scope.ForceCleanup({&V});
+    return V;
+  }
+  mlir::Value VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
+    llvm_unreachable("NYI");
+  }
+
+  struct BinOpInfo {
+    mlir::Location Loc;
+    mlir::Value LHS{};
+    mlir::Value RHS{};
+    QualType Ty{}; // Computation Type.
+    FPOptions FPFeatures{};
+  };
+
+  BinOpInfo buildBinOps(const BinaryOperator *E,
+                        QualType PromotionTy = QualType());
+  mlir::Value buildPromoted(const Expr *E, QualType PromotionTy);
+  mlir::Value buildPromotedComplexOperand(const Expr *E, QualType PromotionTy);
+
+  LValue buildCompoundAssignLValue(
+      const CompoundAssignOperator *E,
+      mlir::Value (ComplexExprEmitter::*Func)(const BinOpInfo &), RValue &Val);
+  mlir::Value buildCompoundAssign(
+      const CompoundAssignOperator *E,
+      mlir::Value (ComplexExprEmitter::*Func)(const BinOpInfo &));
+
+  mlir::Value buildBinAdd(const BinOpInfo &Op);
+  mlir::Value buildBinSub(const BinOpInfo &Op);
+  mlir::Value buildBinMul(const BinOpInfo &Op);
+  mlir::Value buildBinDiv(const BinOpInfo &Op);
+
+  QualType GetHigherPrecisionFPType(QualType ElementType) {
+    const auto *CurrentBT = cast<BuiltinType>(ElementType);
+    switch (CurrentBT->getKind()) {
+    case BuiltinType::Kind::Float16:
+      return CGF.getContext().FloatTy;
+    case BuiltinType::Kind::Float:
+    case BuiltinType::Kind::BFloat16:
+      return CGF.getContext().DoubleTy;
+    case BuiltinType::Kind::Double:
+      return CGF.getContext().LongDoubleTy;
+    default:
+      return ElementType;
+    }
+  }
+
+  QualType HigherPrecisionTypeForComplexArithmetic(QualType ElementType,
+                                                   bool IsDivOpCode) {
+    QualType HigherElementType = GetHigherPrecisionFPType(ElementType);
+    const llvm::fltSemantics &ElementTypeSemantics =
+        CGF.getContext().getFloatTypeSemantics(ElementType);
+    const llvm::fltSemantics &HigherElementTypeSemantics =
+        CGF.getContext().getFloatTypeSemantics(HigherElementType);
+    // Check that the promoted type can handle the intermediate values without
+    // overflowing. This can be interpreted as:
+    // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal) * 2 <=
+    // LargerType.LargestFiniteVal.
+    // In terms of exponent it gives this formula:
+    // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal
+    // doubles the exponent of SmallerType.LargestFiniteVal)
+    if (llvm::APFloat::semanticsMaxExponent(ElementTypeSemantics) * 2 + 1 <=
+        llvm::APFloat::semanticsMaxExponent(HigherElementTypeSemantics)) {
+      FPHasBeenPromoted = true;
+      return CGF.getContext().getComplexType(HigherElementType);
+    }
+
+    DiagnosticsEngine &Diags = CGF.CGM.getDiags();
+    Diags.Report(diag::warn_next_larger_fp_type_same_size_than_fp);
+    return QualType();
+  }
+
+  QualType getPromotionType(QualType Ty, bool IsDivOpCode = false) {
+    if (auto *CT = Ty->getAs<ComplexType>()) {
+      QualType ElementType = CT->getElementType();
+      if (IsDivOpCode && ElementType->isFloatingType() &&
+          CGF.getLangOpts().getComplexRange() ==
+              LangOptions::ComplexRangeKind::CX_Promoted)
+        return HigherPrecisionTypeForComplexArithmetic(ElementType,
+                                                       IsDivOpCode);
+      if (ElementType.UseExcessPrecision(CGF.getContext()))
+        return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
+    }
+    if (Ty.UseExcessPrecision(CGF.getContext()))
+      return CGF.getContext().FloatTy;
+    return QualType();
+  }
+
+#define HANDLEBINOP(OP)                                                        \
+  mlir::Value VisitBin##OP(const BinaryOperator *E) {                          \
+    QualType promotionTy = getPromotionType(                                   \
+        E->getType(),                                                          \
+        (E->getOpcode() == BinaryOperatorKind::BO_Div) ? true : false);        \
+    mlir::Value result = buildBin##OP(buildBinOps(E, promotionTy));            \
+    if (!promotionTy.isNull())                                                 \
+      result = CGF.buildUnPromotedValue(result, E->getType());                 \
+    return result;                                                             \
+  }
+
+  HANDLEBINOP(Mul)
+  HANDLEBINOP(Div)
+  HANDLEBINOP(Add)
+  HANDLEBINOP(Sub)
+#undef HANDLEBINOP
+
+  mlir::Value VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) {
+    llvm_unreachable("NYI");
+  }
+
+  // Compound assignments.
+  mlir::Value VisitBinAddAssign(const CompoundAssignOperator *E) {
+    return buildCompoundAssign(E, &ComplexExprEmitter::buildBinAdd);
+  }
+  mlir::Value VisitBinSubAssign(const CompoundAssignOperator *E) {
+    return buildCompoundAssign(E, &ComplexExprEmitter::buildBinSub);
+  }
+  mlir::Value VisitBinMulAssign(const CompoundAssignOperator *E) {
+    return buildCompoundAssign(E, &ComplexExprEmitter::buildBinMul);
+  }
+  mlir::Value VisitBinDivAssign(const CompoundAssignOperator *E) {
+    return buildCompoundAssign(E, &ComplexExprEmitter::buildBinDiv);
+  }
+
+  // GCC rejects rem/and/or/xor for integer complex.
+  // Logical and/or always return int, never complex.
+
+  // No comparisons produce a complex result.
+
+  LValue buildBinAssignLValue(const BinaryOperator *E, mlir::Value &Val);
+  mlir::Value VisitBinAssign(const BinaryOperator *E) {
+    mlir::Value Val;
+    LValue LV = buildBinAssignLValue(E, Val);
+
+    // The result of an assignment in C is the assigned r-value.
+    if (!CGF.getLangOpts().CPlusPlus)
+      return Val;
+
+    // If the lvalue is non-volatile, return the computed value of the
+    // assignment.
+    if (!LV.isVolatileQualified())
+      return Val;
+
+    return buildLoadOfLValue(LV, E->getExprLoc());
+  };
+  mlir::Value VisitBinComma(const BinaryOperator *E) {
+    llvm_unreachable("NYI");
+  }
+
+  mlir::Value
+  VisitAbstractConditionalOperator(const AbstractConditionalOperator *CO) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitChooseExpr(ChooseExpr *CE) { llvm_unreachable("NYI"); }
+
+  mlir::Value VisitInitListExpr(InitListExpr *E);
+
+  mlir::Value VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
+    llvm_unreachable("NYI");
+  }
+
+  mlir::Value VisitVAArgExpr(VAArgExpr *E) { llvm_unreachable("NYI"); }
+
+  mlir::Value VisitAtomicExpr(AtomicExpr *E) { llvm_unreachable("NYI"); }
+
+  mlir::Value VisitPackIndexingExpr(PackIndexingExpr *E) {
+    llvm_unreachable("NYI");
+  }
+};
+
+} // namespace
+
+static const ComplexType *getComplexType(QualType type) {
+  type = type.getCanonicalType();
+  if (const ComplexType *comp = dyn_cast<ComplexType>(type))
+    return comp;
+  return cast<ComplexType>(cast<AtomicType>(type)->getValueType());
+}
+
+static mlir::Value createComplexFromReal(CIRGenBuilderTy &builder,
+                                         mlir::Location loc, mlir::Value real) {
+  mlir::Value imag = builder.getNullValue(real.getType(), loc);
+  return builder.createComplexCreate(loc, real, imag);
+}
+
+mlir::Value ComplexExprEmitter::buildLoadOfLValue(LValue LV,
+                                                  SourceLocation Loc) {
+  assert(LV.isSimple() && "non-simple complex l-value?");
+  if (LV.getType()->isAtomicType())
+    llvm_unreachable("NYI");
+
+  Address SrcPtr = LV.getAddress();
+  return Builder.createLoad(CGF.getLoc(Loc), SrcPtr, LV.isVolatileQualified());
+}
+
+void ComplexExprEmitter::buildStoreOfComplex(mlir::Location Loc,
+                                             mlir::Value Val, LValue LV,
+                                             bool isInit) {
+  if (LV.getType()->isAtomicType() ||
+      (!isInit && CGF.LValueIsSuitableForInlineAtomic(LV)))
+    llvm_unreachable("NYI");
+
+  Address DestAddr = LV.getAddress();
+  Builder.createStore(Loc, Val, DestAddr, LV.isVolatileQualified());
+}
+
+mlir::Value ComplexExprEmitter::buildComplexToComplexCast(mlir::Value Val,
+                                                          QualType SrcType,
+                                                          QualType DestType,
+                                                          SourceLocation Loc) {
+  if (SrcType == DestType)
+    return Val;
+
+  // Get the src/dest element type.
+  QualType SrcElemTy = SrcType->castAs<ComplexType>()->getElementType();
+  QualType DestElemTy = DestType->castAs<ComplexType>()->getElementType();
+
+  mlir::cir::CastKind CastOpKind;
+  if (SrcElemTy->isFloatingType() && DestElemTy->isFloatingType())
+    CastOpKind = mlir::cir::CastKind::float_complex;
+  else if (SrcElemTy->isFloatingType() && DestElemTy->isIntegerType())
+    CastOpKind = mlir::cir::CastKind::float_complex_to_int_complex;
+  else if (SrcElemTy->isIntegerType() && DestElemTy->isFloatingType())
+    CastOpKind = mlir::cir::CastKind::int_complex_to_float_complex;
+  else if (SrcElemTy->isIntegerType() && DestElemTy->isIntegerType())
+    CastOpKind = mlir::cir::CastKind::int_complex;
+  else
+    llvm_unreachable("unexpected src type or dest type");
+
+  return Builder.createCast(CGF.getLoc(Loc), CastOpKind, Val,
+                            CGF.ConvertType(DestType));
+}
+
+mlir::Value ComplexExprEmitter::buildScalarToComplexCast(mlir::Value Val,
+                                                         QualType SrcType,
+                                                         QualType DestType,
+                                                         SourceLocation Loc) {
+  mlir::cir::CastKind CastOpKind;
+  if (SrcType->isFloatingType())
+    CastOpKind = mlir::cir::CastKind::float_to_complex;
+  else if (SrcType->isIntegerType())
+    CastOpKind = mlir::cir::CastKind::int_to_complex;
+  else
+    llvm_unreachable("unexpected src type");
+
+  return Builder.createCast(CGF.getLoc(Loc), CastOpKind, Val,
+                            CGF.ConvertType(DestType));
+}
+
+mlir::Value ComplexExprEmitter::buildCast(CastKind CK, Expr *Op,
+                                          QualType DestTy) {
+  switch (CK) {
+  case CK_Dependent:
+    llvm_unreachable("dependent cast kind in IR gen!");
+
+  // Atomic to non-atomic casts may be more than a no-op for some platforms and
+  // for some types.
+  case CK_LValueToRValue:
+    return Visit(Op);
+
+  case CK_AtomicToNonAtomic:
+  case CK_NonAtomicToAtomic:
+  case CK_NoOp:
+  case CK_UserDefinedConversion:
+    llvm_unreachable("NYI");
+
+  case CK_LValueBitCast:
+    llvm_unreachable("NYI");
+
+  case CK_LValueToRValueBitCast:
+    llvm_unreachable("NYI");
+
+  case CK_BitCast:
+  case CK_BaseToDerived:
+  case CK_DerivedToBase:
+  case CK_UncheckedDerivedToBase:
+  case CK_Dynamic:
+  case CK_ToUnion:
+  case CK_ArrayToPointerDecay:
+  case CK_FunctionToPointerDecay:
+  case CK_NullToPointer:
+  case CK_NullToMemberPointer:
+  case CK_BaseToDerivedMemberPointer:
+  case CK_DerivedToBaseMemberPointer:
+  case CK_MemberPointerToBoolean:
+  case CK_ReinterpretMemberPointer:
+  case CK_ConstructorConversion:
+  case CK_IntegralToPointer:
+  case CK_PointerToIntegral:
+  case CK_PointerToBoolean:
+  case CK_ToVoid:
+  case CK_VectorSplat:
+  case CK_IntegralCast:
+  case CK_BooleanToSignedIntegral:
+  case CK_IntegralToBoolean:
+  case CK_IntegralToFloating:
+  case CK_FloatingToIntegral:
+  case CK_FloatingToBoolean:
+  case CK_FloatingCast:
+  case CK_CPointerToObjCPointerCast:
+  case CK_BlockPointerToObjCPointerCast:
+  case CK_AnyPointerToBlockPointerCast:
+  case CK_ObjCObjectLValueCast:
+  case CK_FloatingComplexToReal:
+  case CK_FloatingComplexToBoolean:
+  case CK_IntegralComplexToReal:
+  case CK_IntegralComplexToBoolean:
+  case CK_ARCProduceObject:
+  case CK_ARCConsumeObject:
+  case CK_ARCReclaimReturnedObject:
+  case CK_ARCExtendBlockObject:
+  case CK_CopyAndAutoreleaseBlockObject:
+  case CK_BuiltinFnToFnPtr:
+  case CK_ZeroToOCLOpaqueType:
+  case CK_AddressSpaceConversion:
+  case CK_IntToOCLSampler:
+  case CK_FloatingToFixedPoint:
+  case CK_FixedPointToFloating:
+  case CK_FixedPointCast:
+  case CK_FixedPointToBoolean:
+  case CK_FixedPointToIntegral:
+  case CK_IntegralToFixedPoint:
+  case CK_MatrixCast:
+  case CK_HLSLVectorTruncation:
+  case CK_HLSLArrayRValue:
+    llvm_unreachable("invalid cast kind for complex value");
+
+  case CK_FloatingRealToComplex:
+  case CK_IntegralRealToComplex: {
+    assert(!MissingFeatures::CGFPOptionsRAII());
+    return buildScalarToComplexCast(CGF.buildScalarExpr(Op), Op->getType(),
+                                    DestTy, Op->getExprLoc());
+  }
+
+  case CK_FloatingComplexCast:
+  case CK_FloatingComplexToIntegralComplex:
+  case CK_IntegralComplexCast:
+  case CK_IntegralComplexToFloatingComplex: {
+    assert(!MissingFeatures::CGFPOptionsRAII());
+    return buildComplexToComplexCast(Visit(Op), Op->getType(), DestTy,
+                                     Op->getExprLoc());
+  }
+  }
+
+  llvm_unreachable("unknown cast resulting in complex value");
+}
+
+mlir::Value ComplexExprEmitter::VisitCastExpr(CastExpr *E) {
+  if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E))
+    CGF.CGM.buildExplicitCastExprType(ECE, &CGF);
+  if (E->changesVolatileQualification())
+    return buildLoadOfLValue(E);
+  return buildCast(E->getCastKind(), E->getSubExpr(), E->getType());
+}
+
+mlir::Value ComplexExprEmitter::VisitCallExpr(const CallExpr *E) {
+  if (E->getCallReturnType(CGF.getContext())->isReferenceType())
+    return buildLoadOfLValue(E);
+
+  return CGF.buildCallExpr(E).getComplexVal();
+}
+
+mlir::Value ComplexExprEmitter::VisitPrePostIncDec(const UnaryOperator *E,
+                                                   bool isInc, bool isPre) {
+  LValue LV = CGF.buildLValue(E->getSubExpr());
+  return CGF.buildComplexPrePostIncDec(E, LV, isInc, isPre);
+}
+
+mlir::Value ComplexExprEmitter::VisitUnaryPlus(const UnaryOperator *E,
+                                               QualType PromotionType) {
+  QualType promotionTy = PromotionType.isNull()
+                             ? getPromotionType(E->getSubExpr()->getType())
+                             : PromotionType;
+  mlir::Value result = VisitPlus(E, promotionTy);
+  if (!promotionTy.isNull())
+    return CGF.buildUnPromotedValue(result, E->getSubExpr()->getType());
+  return result;
+}
+
+mlir::Value ComplexExprEmitter::VisitPlus(const UnaryOperator *E,
+                                          QualType PromotionType) {
+  mlir::Value Op;
+  if (!PromotionType.isNull())
+    Op = CGF.buildPromotedComplexExpr(E->getSubExpr(), PromotionType);
+  else
+    Op = Visit(E->getSubExpr());
+
+  return Builder.createUnaryOp(CGF.getLoc(E->getExprLoc()),
+                               mlir::cir::UnaryOpKind::Plus, Op);
+}
+
+mlir::Value ComplexExprEmitter::VisitUnaryMinus(const UnaryOperator *E,
+                                                QualType PromotionType) {
+  QualType promotionTy = PromotionType.isNull()
+                             ? getPromotionType(E->getSubExpr()->getType())
+                             : PromotionType;
+  mlir::Value result = VisitMinus(E, promotionTy);
+  if (!promotionTy.isNull())
+    return CGF.buildUnPromotedValue(result, E->getSubExpr()->getType());
+  return result;
+}
+
+mlir::Value ComplexExprEmitter::VisitMinus(const UnaryOperator *E,
+                                           QualType PromotionType) {
+  mlir::Value Op;
+  if (!PromotionType.isNull())
+    Op = CGF.buildPromotedComplexExpr(E->getSubExpr(), PromotionType);
+  else
+    Op = Visit(E->getSubExpr());
+
+  return Builder.createUnaryOp(CGF.getLoc(E->getExprLoc()),
+                               mlir::cir::UnaryOpKind::Minus, Op);
+}
+
+mlir::Value ComplexExprEmitter::VisitUnaryNot(const UnaryOperator *E) {
+  mlir::Value Op = Visit(E->getSubExpr());
+  return Builder.createUnaryOp(CGF.getLoc(E->getExprLoc()),
+                               mlir::cir::UnaryOpKind::Not, Op);
+}
+
+ComplexExprEmitter::BinOpInfo
+ComplexExprEmitter::buildBinOps(const BinaryOperator *E, QualType PromotionTy) {
+  BinOpInfo Ops{CGF.getLoc(E->getExprLoc())};
+
+  Ops.LHS = buildPromotedComplexOperand(E->getLHS(), PromotionTy);
+  Ops.RHS = buildPromotedComplexOperand(E->getRHS(), PromotionTy);
+  if (!PromotionTy.isNull())
+    Ops.Ty = PromotionTy;
+  else
+    Ops.Ty = E->getType();
+  Ops.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
+  return Ops;
+}
+
+mlir::Value ComplexExprEmitter::buildPromoted(const Expr *E,
+                                              QualType PromotionTy) {
+  E = E->IgnoreParens();
+  if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
+    switch (BO->getOpcode()) {
+#define HANDLE_BINOP(OP)                                                       \
+  case BO_##OP:                                                                \
+    return buildBin##OP(buildBinOps(BO, PromotionTy));
+      HANDLE_BINOP(Add)
+      HANDLE_BINOP(Sub)
+      HANDLE_BINOP(Mul)
+      HANDLE_BINOP(Div)
+#undef HANDLE_BINOP
+    default:
+      break;
+    }
+  } else if (const auto *UO = dyn_cast<UnaryOperator>(E)) {
+    switch (UO->getOpcode()) {
+    case UO_Minus:
+      return VisitMinus(UO, PromotionTy);
+    case UO_Plus:
+      return VisitPlus(UO, PromotionTy);
+    default:
+      break;
+    }
+  }
+  auto result = Visit(const_cast<Expr *>(E));
+  if (!PromotionTy.isNull())
+    return CGF.buildPromotedValue(result, PromotionTy);
+  return result;
+}
+
+mlir::Value
+ComplexExprEmitter::buildPromotedComplexOperand(const Expr *E,
+                                                QualType PromotionTy) {
+  if (E->getType()->isAnyComplexType()) {
+    if (!PromotionTy.isNull())
+      return CGF.buildPromotedComplexExpr(E, PromotionTy);
+    return Visit(const_cast<Expr *>(E));
+  }
+
+  mlir::Value Real;
+  if (!PromotionTy.isNull()) {
+    QualType ComplexElementTy =
+        PromotionTy->castAs<ComplexType>()->getElementType();
+    Real = CGF.buildPromotedScalarExpr(E, ComplexElementTy);
+  } else
+    Real = CGF.buildScalarExpr(E);
+
+  return createComplexFromReal(CGF.getBuilder(), CGF.getLoc(E->getExprLoc()),
+                               Real);
+}
+
+LValue ComplexExprEmitter::buildCompoundAssignLValue(
+    const CompoundAssignOperator *E,
+    mlir::Value (ComplexExprEmitter::*Func)(const BinOpInfo &), RValue &Val) {
+  QualType LHSTy = E->getLHS()->getType();
+  if (const AtomicType *AT = LHSTy->getAs<AtomicType>())
+    LHSTy = AT->getValueType();
+
+  BinOpInfo OpInfo{CGF.getLoc(E->getExprLoc())};
+  OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
+
+  assert(!MissingFeatures::CGFPOptionsRAII());
+
+  // Load the RHS and LHS operands.
+  // __block variables need to have the rhs evaluated first, plus this should
+  // improve codegen a little.
+  QualType PromotionTypeCR;
+  PromotionTypeCR = getPromotionType(E->getComputationResultType());
+  if (PromotionTypeCR.isNull())
+    PromotionTypeCR = E->getComputationResultType();
+  OpInfo.Ty = PromotionTypeCR;
+  QualType ComplexElementTy =
+      OpInfo.Ty->castAs<ComplexType>()->getElementType();
+  QualType PromotionTypeRHS = getPromotionType(E->getRHS()->getType());
+
+  // The RHS should have been converted to the computation type.
+  if (E->getRHS()->getType()->isRealFloatingType()) {
+    if (!PromotionTypeRHS.isNull())
+      OpInfo.RHS = createComplexFromReal(
+          CGF.getBuilder(), CGF.getLoc(E->getExprLoc()),
+          CGF.buildPromotedScalarExpr(E->getRHS(), PromotionTypeRHS));
+    else {
+      assert(CGF.getContext().hasSameUnqualifiedType(ComplexElementTy,
+                                                     E->getRHS()->getType()));
+      OpInfo.RHS =
+          createComplexFromReal(CGF.getBuilder(), CGF.getLoc(E->getExprLoc()),
+                                CGF.buildScalarExpr(E->getRHS()));
+    }
+  } else {
+    if (!PromotionTypeRHS.isNull()) {
+      OpInfo.RHS = createComplexFromReal(
+          CGF.getBuilder(), CGF.getLoc(E->getExprLoc()),
+          CGF.buildPromotedComplexExpr(E->getRHS(), PromotionTypeRHS));
+    } else {
+      assert(CGF.getContext().hasSameUnqualifiedType(OpInfo.Ty,
+                                                     E->getRHS()->getType()));
+      OpInfo.RHS = Visit(E->getRHS());
+    }
+  }
+
+  LValue LHS = CGF.buildLValue(E->getLHS());
+
+  // Load from the l-value and convert it.
+  SourceLocation Loc = E->getExprLoc();
+  QualType PromotionTypeLHS = getPromotionType(E->getComputationLHSType());
+  if (LHSTy->isAnyComplexType()) {
+    mlir::Value LHSVal = buildLoadOfLValue(LHS, Loc);
+    if (!PromotionTypeLHS.isNull())
+      OpInfo.LHS =
+          buildComplexToComplexCast(LHSVal, LHSTy, PromotionTypeLHS, Loc);
+    else
+      OpInfo.LHS = buildComplexToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc);
+  } else {
+    mlir::Value LHSVal = CGF.buildLoadOfScalar(LHS, Loc);
+    // For floating point real operands we can directly pass the scalar form
+    // to the binary operator emission and potentially get more efficient code.
+    if (LHSTy->isRealFloatingType()) {
+      QualType PromotedComplexElementTy;
+      if (!PromotionTypeLHS.isNull()) {
+        PromotedComplexElementTy =
+            cast<ComplexType>(PromotionTypeLHS)->getElementType();
+        if (!CGF.getContext().hasSameUnqualifiedType(PromotedComplexElementTy,
+                                                     PromotionTypeLHS))
+          LHSVal = CGF.buildScalarConversion(LHSVal, LHSTy,
+                                             PromotedComplexElementTy, Loc);
+      } else {
+        if (!CGF.getContext().hasSameUnqualifiedType(ComplexElementTy, LHSTy))
+          LHSVal =
+              CGF.buildScalarConversion(LHSVal, LHSTy, ComplexElementTy, Loc);
+      }
+      OpInfo.LHS = createComplexFromReal(CGF.getBuilder(),
+                                         CGF.getLoc(E->getExprLoc()), LHSVal);
+    } else {
+      OpInfo.LHS = buildScalarToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc);
+    }
+  }
+
+  // Expand the binary operator.
+  mlir::Value Result = (this->*Func)(OpInfo);
+
+  // Truncate the result and store it into the LHS lvalue.
+  if (LHSTy->isAnyComplexType()) {
+    mlir::Value ResVal =
+        buildComplexToComplexCast(Result, OpInfo.Ty, LHSTy, Loc);
+    buildStoreOfComplex(CGF.getLoc(E->getExprLoc()), ResVal, LHS,
+                        /*isInit*/ false);
+    Val = RValue::getComplex(ResVal);
+  } else {
+    mlir::Value ResVal =
+        CGF.buildComplexToScalarConversion(Result, OpInfo.Ty, LHSTy, Loc);
+    CGF.buildStoreOfScalar(ResVal, LHS, /*isInit*/ false);
+    Val = RValue::get(ResVal);
+  }
+
+  return LHS;
+}
+
+mlir::Value ComplexExprEmitter::buildCompoundAssign(
+    const CompoundAssignOperator *E,
+    mlir::Value (ComplexExprEmitter::*Func)(const BinOpInfo &)) {
+  RValue Val;
+  LValue LV = buildCompoundAssignLValue(E, Func, Val);
+
+  // The result of an assignment in C is the assigned r-value.
+  if (!CGF.getLangOpts().CPlusPlus)
+    return Val.getComplexVal();
+
+  // If the lvalue is non-volatile, return the computed value of the assignment.
+  if (!LV.isVolatileQualified())
+    return Val.getComplexVal();
+
+  return buildLoadOfLValue(LV, E->getExprLoc());
+}
+
+mlir::Value ComplexExprEmitter::buildBinAdd(const BinOpInfo &Op) {
+  assert(!MissingFeatures::CGFPOptionsRAII());
+  return CGF.getBuilder().createComplexAdd(Op.Loc, Op.LHS, Op.RHS);
+}
+
+mlir::Value ComplexExprEmitter::buildBinSub(const BinOpInfo &Op) {
+  assert(!MissingFeatures::CGFPOptionsRAII());
+  return CGF.getBuilder().createComplexSub(Op.Loc, Op.LHS, Op.RHS);
+}
+
+static mlir::cir::ComplexRangeKind
+getComplexRangeAttr(LangOptions::ComplexRangeKind range) {
+  switch (range) {
+  case LangOptions::CX_Full:
+    return mlir::cir::ComplexRangeKind::Full;
+  case LangOptions::CX_Improved:
+    return mlir::cir::ComplexRangeKind::Improved;
+  case LangOptions::CX_Promoted:
+    return mlir::cir::ComplexRangeKind::Promoted;
+  case LangOptions::CX_Basic:
+    return mlir::cir::ComplexRangeKind::Basic;
+  case LangOptions::CX_None:
+    return mlir::cir::ComplexRangeKind::None;
+  }
+}
+
+mlir::Value ComplexExprEmitter::buildBinMul(const BinOpInfo &Op) {
+  assert(!MissingFeatures::CGFPOptionsRAII());
+  return CGF.getBuilder().createComplexMul(
+      Op.Loc, Op.LHS, Op.RHS,
+      getComplexRangeAttr(Op.FPFeatures.getComplexRange()), FPHasBeenPromoted);
+}
+
+mlir::Value ComplexExprEmitter::buildBinDiv(const BinOpInfo &Op) {
+  assert(!MissingFeatures::CGFPOptionsRAII());
+  return CGF.getBuilder().createComplexDiv(
+      Op.Loc, Op.LHS, Op.RHS,
+      getComplexRangeAttr(Op.FPFeatures.getComplexRange()), FPHasBeenPromoted);
+}
+
+LValue ComplexExprEmitter::buildBinAssignLValue(const BinaryOperator *E,
+                                                mlir::Value &Val) {
+  assert(CGF.getContext().hasSameUnqualifiedType(E->getLHS()->getType(),
+                                                 E->getRHS()->getType()) &&
+         "Invalid assignment");
+
+  // Emit the RHS.  __block variables need the RHS evaluated first.
+  Val = Visit(E->getRHS());
+
+  // Compute the address to store into.
+  LValue LHS = CGF.buildLValue(E->getLHS());
+
+  // Store the result value into the LHS lvalue.
+  buildStoreOfComplex(CGF.getLoc(E->getExprLoc()), Val, LHS, /*isInit*/ false);
+
+  return LHS;
+}
+
+mlir::Value
+ComplexExprEmitter::VisitImaginaryLiteral(const ImaginaryLiteral *IL) {
+  auto Loc = CGF.getLoc(IL->getExprLoc());
+  auto Ty = mlir::cast<mlir::cir::ComplexType>(CGF.getCIRType(IL->getType()));
+  auto ElementTy = Ty.getElementTy();
+
+  mlir::TypedAttr RealValueAttr;
+  mlir::TypedAttr ImagValueAttr;
+  if (mlir::isa<mlir::cir::IntType>(ElementTy)) {
+    auto ImagValue = cast<IntegerLiteral>(IL->getSubExpr())->getValue();
+    RealValueAttr = mlir::cir::IntAttr::get(ElementTy, 0);
+    ImagValueAttr = mlir::cir::IntAttr::get(ElementTy, ImagValue);
+  } else if (mlir::isa<mlir::cir::CIRFPTypeInterface>(ElementTy)) {
+    auto ImagValue = cast<FloatingLiteral>(IL->getSubExpr())->getValue();
+    RealValueAttr = mlir::cir::FPAttr::get(
+        ElementTy, llvm::APFloat::getZero(ImagValue.getSemantics()));
+    ImagValueAttr = mlir::cir::FPAttr::get(ElementTy, ImagValue);
+  } else
+    llvm_unreachable("unexpected complex element type");
+
+  auto RealValue = Builder.getConstant(Loc, RealValueAttr);
+  auto ImagValue = Builder.getConstant(Loc, ImagValueAttr);
+  return Builder.createComplexCreate(Loc, RealValue, ImagValue);
+}
+
+mlir::Value ComplexExprEmitter::VisitInitListExpr(InitListExpr *E) {
+  if (E->getNumInits() == 2) {
+    mlir::Value Real = CGF.buildScalarExpr(E->getInit(0));
+    mlir::Value Imag = CGF.buildScalarExpr(E->getInit(1));
+    return Builder.createComplexCreate(CGF.getLoc(E->getExprLoc()), Real, Imag);
+  }
+
+  if (E->getNumInits() == 1)
+    return Visit(E->getInit(0));
+
+  // Empty init list initializes to null
+  assert(E->getNumInits() == 0 && "Unexpected number of inits");
+  QualType Ty = E->getType()->castAs<ComplexType>()->getElementType();
+  return Builder.getZero(CGF.getLoc(E->getExprLoc()), CGF.ConvertType(Ty));
+}
+
+mlir::Value CIRGenFunction::buildPromotedComplexExpr(const Expr *E,
+                                                     QualType PromotionType) {
+  return ComplexExprEmitter(*this).buildPromoted(E, PromotionType);
+}
+
+mlir::Value CIRGenFunction::buildPromotedValue(mlir::Value result,
+                                               QualType PromotionType) {
+  assert(mlir::isa<mlir::cir::CIRFPTypeInterface>(
+             mlir::cast<mlir::cir::ComplexType>(result.getType())
+                 .getElementTy()) &&
+         "integral complex will never be promoted");
+  return builder.createCast(mlir::cir::CastKind::float_complex, result,
+                            ConvertType(PromotionType));
+}
+
+mlir::Value CIRGenFunction::buildUnPromotedValue(mlir::Value result,
+                                                 QualType UnPromotionType) {
+  assert(mlir::isa<mlir::cir::CIRFPTypeInterface>(
+             mlir::cast<mlir::cir::ComplexType>(result.getType())
+                 .getElementTy()) &&
+         "integral complex will never be promoted");
+  return builder.createCast(mlir::cir::CastKind::float_complex, result,
+                            ConvertType(UnPromotionType));
+}
+
+mlir::Value CIRGenFunction::buildComplexExpr(const Expr *E) {
+  assert(E && getComplexType(E->getType()) &&
+         "Invalid complex expression to emit");
+
+  return ComplexExprEmitter(*this).Visit(const_cast<Expr *>(E));
+}
+
+void CIRGenFunction::buildComplexExprIntoLValue(const Expr *E, LValue dest,
+                                                bool isInit) {
+  assert(E && getComplexType(E->getType()) &&
+         "Invalid complex expression to emit");
+  ComplexExprEmitter Emitter(*this);
+  mlir::Value Val = Emitter.Visit(const_cast<Expr *>(E));
+  Emitter.buildStoreOfComplex(getLoc(E->getExprLoc()), Val, dest, isInit);
+}
+
+void CIRGenFunction::buildStoreOfComplex(mlir::Location Loc, mlir::Value V,
+                                         LValue dest, bool isInit) {
+  ComplexExprEmitter(*this).buildStoreOfComplex(Loc, V, dest, isInit);
+}
+
+Address CIRGenFunction::buildAddrOfRealComponent(mlir::Location loc,
+                                                 Address addr,
+                                                 QualType complexType) {
+  return builder.createRealPtr(loc, addr);
+}
+
+Address CIRGenFunction::buildAddrOfImagComponent(mlir::Location loc,
+                                                 Address addr,
+                                                 QualType complexType) {
+  return builder.createImagPtr(loc, addr);
+}
+
+LValue CIRGenFunction::buildComplexAssignmentLValue(const BinaryOperator *E) {
+  assert(E->getOpcode() == BO_Assign);
+  mlir::Value Val; // ignored
+  LValue LVal = ComplexExprEmitter(*this).buildBinAssignLValue(E, Val);
+  if (getLangOpts().OpenMP)
+    llvm_unreachable("NYI");
+  return LVal;
+}
+
+using CompoundFunc =
+    mlir::Value (ComplexExprEmitter::*)(const ComplexExprEmitter::BinOpInfo &);
+
+static CompoundFunc getComplexOp(BinaryOperatorKind Op) {
+  switch (Op) {
+  case BO_MulAssign:
+    return &ComplexExprEmitter::buildBinMul;
+  case BO_DivAssign:
+    return &ComplexExprEmitter::buildBinDiv;
+  case BO_SubAssign:
+    return &ComplexExprEmitter::buildBinSub;
+  case BO_AddAssign:
+    return &ComplexExprEmitter::buildBinAdd;
+  default:
+    llvm_unreachable("unexpected complex compound assignment");
+  }
+}
+
+LValue CIRGenFunction::buildComplexCompoundAssignmentLValue(
+    const CompoundAssignOperator *E) {
+  CompoundFunc Op = getComplexOp(E->getOpcode());
+  RValue Val;
+  return ComplexExprEmitter(*this).buildCompoundAssignLValue(E, Op, Val);
+}
+
+mlir::Value CIRGenFunction::buildComplexPrePostIncDec(const UnaryOperator *E,
+                                                      LValue LV, bool isInc,
+                                                      bool isPre) {
+  mlir::Value InVal = buildLoadOfComplex(LV, E->getExprLoc());
+
+  auto Loc = getLoc(E->getExprLoc());
+  auto OpKind =
+      isInc ? mlir::cir::UnaryOpKind::Inc : mlir::cir::UnaryOpKind::Dec;
+  mlir::Value IncVal = builder.createUnaryOp(Loc, OpKind, InVal);
+
+  // Store the updated result through the lvalue.
+  buildStoreOfComplex(Loc, IncVal, LV, /*init*/ false);
+  if (getLangOpts().OpenMP)
+    llvm_unreachable("NYI");
+
+  // If this is a postinc, return the value read from memory, otherwise use the
+  // updated value.
+  return isPre ? IncVal : InVal;
+}
+
+mlir::Value CIRGenFunction::buildLoadOfComplex(LValue src, SourceLocation loc) {
+  return ComplexExprEmitter(*this).buildLoadOfLValue(src, loc);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp
new file mode 100644
index 000000000000..c3683b3f0fd1
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp
@@ -0,0 +1,1959 @@
+//===---- CIRGenExprCst.cpp - Emit LLVM Code from Constant Expressions ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit Constant Expr nodes as LLVM code.
+//
+//===----------------------------------------------------------------------===//
+#include "Address.h"
+#include "CIRGenCXXABI.h"
+#include "CIRGenCstEmitter.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinAttributeInterfaces.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "clang/AST/APValue.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/OperationKinds.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+
+using namespace clang;
+using namespace cir;
+
+//===----------------------------------------------------------------------===//
+//                            ConstantAggregateBuilder
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ConstExprEmitter;
+
+static mlir::Attribute
+buildArrayConstant(CIRGenModule &CGM, mlir::Type DesiredType,
+                   mlir::Type CommonElementType, unsigned ArrayBound,
+                   SmallVectorImpl<mlir::TypedAttr> &Elements,
+                   mlir::TypedAttr Filler);
+
+struct ConstantAggregateBuilderUtils {
+  CIRGenModule &CGM;
+  CIRDataLayout dataLayout;
+
+  ConstantAggregateBuilderUtils(CIRGenModule &CGM)
+      : CGM(CGM), dataLayout{CGM.getModule()} {}
+
+  CharUnits getAlignment(const mlir::TypedAttr C) const {
+    return CharUnits::fromQuantity(
+        dataLayout.getAlignment(C.getType(), /*useABI=*/true));
+  }
+
+  CharUnits getSize(mlir::Type Ty) const {
+    return CharUnits::fromQuantity(dataLayout.getTypeAllocSize(Ty));
+  }
+
+  CharUnits getSize(const mlir::TypedAttr C) const {
+    return getSize(C.getType());
+  }
+
+  mlir::TypedAttr getPadding(CharUnits size) const {
+    auto eltTy = CGM.UCharTy;
+    auto arSize = size.getQuantity();
+    auto &bld = CGM.getBuilder();
+    SmallVector<mlir::Attribute, 4> elts(arSize, bld.getZeroAttr(eltTy));
+    return bld.getConstArray(mlir::ArrayAttr::get(bld.getContext(), elts),
+                             bld.getArrayType(eltTy, arSize));
+  }
+
+  mlir::Attribute getZeroes(CharUnits ZeroSize) const {
+    llvm_unreachable("NYI");
+  }
+};
+
+/// Incremental builder for an mlir::TypedAttr holding a struct or array
+/// constant.
+class ConstantAggregateBuilder : private ConstantAggregateBuilderUtils {
+  /// The elements of the constant. These two arrays must have the same size;
+  /// Offsets[i] describes the offset of Elems[i] within the constant. The
+  /// elements are kept in increasing offset order, and we ensure that there
+  /// is no overlap: Offsets[i+1] >= Offsets[i] + getSize(Elemes[i]).
+  ///
+  /// This may contain explicit padding elements (in order to create a
+  /// natural layout), but need not. Gaps between elements are implicitly
+  /// considered to be filled with undef.
+  llvm::SmallVector<mlir::Attribute, 32> Elems;
+  llvm::SmallVector<CharUnits, 32> Offsets;
+
+  /// The size of the constant (the maximum end offset of any added element).
+  /// May be larger than the end of Elems.back() if we split the last element
+  /// and removed some trailing undefs.
+  CharUnits Size = CharUnits::Zero();
+
+  /// This is true only if laying out Elems in order as the elements of a
+  /// non-packed LLVM struct will give the correct layout.
+  bool NaturalLayout = true;
+
+  bool split(size_t Index, CharUnits Hint);
+  std::optional<size_t> splitAt(CharUnits Pos);
+
+  static mlir::Attribute
+  buildFrom(CIRGenModule &CGM, ArrayRef<mlir::Attribute> Elems,
+            ArrayRef<CharUnits> Offsets, CharUnits StartOffset, CharUnits Size,
+            bool NaturalLayout, mlir::Type DesiredTy, bool AllowOversized);
+
+public:
+  ConstantAggregateBuilder(CIRGenModule &CGM)
+      : ConstantAggregateBuilderUtils(CGM) {}
+
+  /// Update or overwrite the value starting at \p Offset with \c C.
+  ///
+  /// \param AllowOverwrite If \c true, this constant might overwrite (part of)
+  ///        a constant that has already been added. This flag is only used to
+  ///        detect bugs.
+  bool add(mlir::Attribute C, CharUnits Offset, bool AllowOverwrite);
+
+  /// Update or overwrite the bits starting at \p OffsetInBits with \p Bits.
+  bool addBits(llvm::APInt Bits, uint64_t OffsetInBits, bool AllowOverwrite);
+
+  /// Attempt to condense the value starting at \p Offset to a constant of type
+  /// \p DesiredTy.
+  void condense(CharUnits Offset, mlir::Type DesiredTy);
+
+  /// Produce a constant representing the entire accumulated value, ideally of
+  /// the specified type. If \p AllowOversized, the constant might be larger
+  /// than implied by \p DesiredTy (eg, if there is a flexible array member).
+  /// Otherwise, the constant will be of exactly the same size as \p DesiredTy
+  /// even if we can't represent it as that type.
+  mlir::Attribute build(mlir::Type DesiredTy, bool AllowOversized) const {
+    return buildFrom(CGM, Elems, Offsets, CharUnits::Zero(), Size,
+                     NaturalLayout, DesiredTy, AllowOversized);
+  }
+};
+
+template <typename Container, typename Range = std::initializer_list<
+                                  typename Container::value_type>>
+static void replace(Container &C, size_t BeginOff, size_t EndOff, Range Vals) {
+  assert(BeginOff <= EndOff && "invalid replacement range");
+  llvm::replace(C, C.begin() + BeginOff, C.begin() + EndOff, Vals);
+}
+
+bool ConstantAggregateBuilder::add(mlir::Attribute A, CharUnits Offset,
+                                   bool AllowOverwrite) {
+  // FIXME(cir): migrate most of this file to use mlir::TypedAttr directly.
+  mlir::TypedAttr C = mlir::dyn_cast<mlir::TypedAttr>(A);
+  assert(C && "expected typed attribute");
+  // Common case: appending to a layout.
+  if (Offset >= Size) {
+    CharUnits Align = getAlignment(C);
+    CharUnits AlignedSize = Size.alignTo(Align);
+    if (AlignedSize > Offset || Offset.alignTo(Align) != Offset)
+      NaturalLayout = false;
+    else if (AlignedSize < Offset) {
+      Elems.push_back(getPadding(Offset - Size));
+      Offsets.push_back(Size);
+    }
+    Elems.push_back(C);
+    Offsets.push_back(Offset);
+    Size = Offset + getSize(C);
+    return true;
+  }
+
+  // Uncommon case: constant overlaps what we've already created.
+  std::optional<size_t> FirstElemToReplace = splitAt(Offset);
+  if (!FirstElemToReplace)
+    return false;
+
+  CharUnits CSize = getSize(C);
+  std::optional<size_t> LastElemToReplace = splitAt(Offset + CSize);
+  if (!LastElemToReplace)
+    return false;
+
+  assert((FirstElemToReplace == LastElemToReplace || AllowOverwrite) &&
+         "unexpectedly overwriting field");
+
+  replace(Elems, *FirstElemToReplace, *LastElemToReplace, {C});
+  replace(Offsets, *FirstElemToReplace, *LastElemToReplace, {Offset});
+  Size = std::max(Size, Offset + CSize);
+  NaturalLayout = false;
+  return true;
+}
+
+bool ConstantAggregateBuilder::addBits(llvm::APInt Bits, uint64_t OffsetInBits,
+                                       bool AllowOverwrite) {
+  const ASTContext &Context = CGM.getASTContext();
+  const uint64_t CharWidth = CGM.getASTContext().getCharWidth();
+  auto charTy = CGM.getBuilder().getUIntNTy(CharWidth);
+  // Offset of where we want the first bit to go within the bits of the
+  // current char.
+  unsigned OffsetWithinChar = OffsetInBits % CharWidth;
+
+  // We split bit-fields up into individual bytes. Walk over the bytes and
+  // update them.
+  for (CharUnits OffsetInChars =
+           Context.toCharUnitsFromBits(OffsetInBits - OffsetWithinChar);
+       /**/; ++OffsetInChars) {
+    // Number of bits we want to fill in this char.
+    unsigned WantedBits =
+        std::min((uint64_t)Bits.getBitWidth(), CharWidth - OffsetWithinChar);
+
+    // Get a char containing the bits we want in the right places. The other
+    // bits have unspecified values.
+    llvm::APInt BitsThisChar = Bits;
+    if (BitsThisChar.getBitWidth() < CharWidth)
+      BitsThisChar = BitsThisChar.zext(CharWidth);
+    if (CGM.getDataLayout().isBigEndian()) {
+      // Figure out how much to shift by. We may need to left-shift if we have
+      // less than one byte of Bits left.
+      int Shift = Bits.getBitWidth() - CharWidth + OffsetWithinChar;
+      if (Shift > 0)
+        BitsThisChar.lshrInPlace(Shift);
+      else if (Shift < 0)
+        BitsThisChar = BitsThisChar.shl(-Shift);
+    } else {
+      BitsThisChar = BitsThisChar.shl(OffsetWithinChar);
+    }
+    if (BitsThisChar.getBitWidth() > CharWidth)
+      BitsThisChar = BitsThisChar.trunc(CharWidth);
+
+    if (WantedBits == CharWidth) {
+      // Got a full byte: just add it directly.
+      add(mlir::cir::IntAttr::get(charTy, BitsThisChar), OffsetInChars,
+          AllowOverwrite);
+    } else {
+      // Partial byte: update the existing integer if there is one. If we
+      // can't split out a 1-CharUnit range to update, then we can't add
+      // these bits and fail the entire constant emission.
+      std::optional<size_t> FirstElemToUpdate = splitAt(OffsetInChars);
+      if (!FirstElemToUpdate)
+        return false;
+      std::optional<size_t> LastElemToUpdate =
+          splitAt(OffsetInChars + CharUnits::One());
+      if (!LastElemToUpdate)
+        return false;
+      assert(*LastElemToUpdate - *FirstElemToUpdate < 2 &&
+             "should have at most one element covering one byte");
+
+      // Figure out which bits we want and discard the rest.
+      llvm::APInt UpdateMask(CharWidth, 0);
+      if (CGM.getDataLayout().isBigEndian())
+        UpdateMask.setBits(CharWidth - OffsetWithinChar - WantedBits,
+                           CharWidth - OffsetWithinChar);
+      else
+        UpdateMask.setBits(OffsetWithinChar, OffsetWithinChar + WantedBits);
+      BitsThisChar &= UpdateMask;
+      bool isNull = false;
+      if (*FirstElemToUpdate < Elems.size()) {
+        auto firstEltToUpdate =
+            dyn_cast<mlir::cir::IntAttr>(Elems[*FirstElemToUpdate]);
+        isNull = firstEltToUpdate && firstEltToUpdate.isNullValue();
+      }
+
+      if (*FirstElemToUpdate == *LastElemToUpdate || isNull) {
+        // All existing bits are either zero or undef.
+        add(CGM.getBuilder().getAttr<mlir::cir::IntAttr>(charTy, BitsThisChar),
+            OffsetInChars, /*AllowOverwrite*/ true);
+      } else {
+        mlir::cir::IntAttr CI =
+            dyn_cast<mlir::cir::IntAttr>(Elems[*FirstElemToUpdate]);
+        // In order to perform a partial update, we need the existing bitwise
+        // value, which we can only extract for a constant int.
+        // auto *CI = dyn_cast<llvm::ConstantInt>(ToUpdate);
+        if (!CI)
+          return false;
+        // Because this is a 1-CharUnit range, the constant occupying it must
+        // be exactly one CharUnit wide.
+        assert(CI.getBitWidth() == CharWidth && "splitAt failed");
+        assert((!(CI.getValue() & UpdateMask) || AllowOverwrite) &&
+               "unexpectedly overwriting bitfield");
+        BitsThisChar |= (CI.getValue() & ~UpdateMask);
+        Elems[*FirstElemToUpdate] =
+            CGM.getBuilder().getAttr<mlir::cir::IntAttr>(charTy, BitsThisChar);
+      }
+    }
+
+    // Stop if we've added all the bits.
+    if (WantedBits == Bits.getBitWidth())
+      break;
+
+    // Remove the consumed bits from Bits.
+    if (!CGM.getDataLayout().isBigEndian())
+      Bits.lshrInPlace(WantedBits);
+    Bits = Bits.trunc(Bits.getBitWidth() - WantedBits);
+
+    // The remanining bits go at the start of the following bytes.
+    OffsetWithinChar = 0;
+  }
+
+  return true;
+}
+
+/// Returns a position within Elems and Offsets such that all elements
+/// before the returned index end before Pos and all elements at or after
+/// the returned index begin at or after Pos. Splits elements as necessary
+/// to ensure this. Returns None if we find something we can't split.
+std::optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits Pos) {
+  if (Pos >= Size)
+    return Offsets.size();
+
+  while (true) {
+    auto FirstAfterPos = llvm::upper_bound(Offsets, Pos);
+    if (FirstAfterPos == Offsets.begin())
+      return 0;
+
+    // If we already have an element starting at Pos, we're done.
+    size_t LastAtOrBeforePosIndex = FirstAfterPos - Offsets.begin() - 1;
+    if (Offsets[LastAtOrBeforePosIndex] == Pos)
+      return LastAtOrBeforePosIndex;
+
+    // We found an element starting before Pos. Check for overlap.
+    // FIXME(cir): migrate most of this file to use mlir::TypedAttr directly.
+    mlir::TypedAttr C =
+        mlir::dyn_cast<mlir::TypedAttr>(Elems[LastAtOrBeforePosIndex]);
+    assert(C && "expected typed attribute");
+    if (Offsets[LastAtOrBeforePosIndex] + getSize(C) <= Pos)
+      return LastAtOrBeforePosIndex + 1;
+
+    // Try to decompose it into smaller constants.
+    if (!split(LastAtOrBeforePosIndex, Pos))
+      return std::nullopt;
+  }
+}
+
+/// Split the constant at index Index, if possible. Return true if we did.
+/// Hint indicates the location at which we'd like to split, but may be
+/// ignored.
+bool ConstantAggregateBuilder::split(size_t Index, CharUnits Hint) {
+  llvm_unreachable("NYI");
+}
+
+mlir::Attribute ConstantAggregateBuilder::buildFrom(
+    CIRGenModule &CGM, ArrayRef<mlir::Attribute> Elems,
+    ArrayRef<CharUnits> Offsets, CharUnits StartOffset, CharUnits Size,
+    bool NaturalLayout, mlir::Type DesiredTy, bool AllowOversized) {
+  ConstantAggregateBuilderUtils Utils(CGM);
+
+  if (Elems.empty())
+    return {};
+  auto Offset = [&](size_t I) { return Offsets[I] - StartOffset; };
+
+  // If we want an array type, see if all the elements are the same type and
+  // appropriately spaced.
+  if (auto aty = mlir::dyn_cast<mlir::cir::ArrayType>(DesiredTy)) {
+    llvm_unreachable("NYI");
+  }
+
+  // The size of the constant we plan to generate. This is usually just the size
+  // of the initialized type, but in AllowOversized mode (i.e. flexible array
+  // init), it can be larger.
+  CharUnits DesiredSize = Utils.getSize(DesiredTy);
+  if (Size > DesiredSize) {
+    assert(AllowOversized && "Elems are oversized");
+    DesiredSize = Size;
+  }
+
+  // The natural alignment of an unpacked CIR struct with the given elements.
+  CharUnits Align = CharUnits::One();
+  for (auto e : Elems) {
+    // FIXME(cir): migrate most of this file to use mlir::TypedAttr directly.
+    auto C = mlir::dyn_cast<mlir::TypedAttr>(e);
+    assert(C && "expected typed attribute");
+    Align = std::max(Align, Utils.getAlignment(C));
+  }
+
+  // The natural size of an unpacked LLVM struct with the given elements.
+  CharUnits AlignedSize = Size.alignTo(Align);
+
+  bool Packed = false;
+  ArrayRef<mlir::Attribute> UnpackedElems = Elems;
+  llvm::SmallVector<mlir::Attribute, 32> UnpackedElemStorage;
+  if (DesiredSize < AlignedSize || DesiredSize.alignTo(Align) != DesiredSize) {
+    NaturalLayout = false;
+    Packed = true;
+  } else if (DesiredSize > AlignedSize) {
+    // The natural layout would be too small. Add padding to fix it. (This
+    // is ignored if we choose a packed layout.)
+    UnpackedElemStorage.assign(Elems.begin(), Elems.end());
+    UnpackedElemStorage.push_back(Utils.getPadding(DesiredSize - Size));
+    UnpackedElems = UnpackedElemStorage;
+  }
+
+  // If we don't have a natural layout, insert padding as necessary.
+  // As we go, double-check to see if we can actually just emit Elems
+  // as a non-packed struct and do so opportunistically if possible.
+  llvm::SmallVector<mlir::Attribute, 32> PackedElems;
+  if (!NaturalLayout) {
+    CharUnits SizeSoFar = CharUnits::Zero();
+    for (size_t I = 0; I != Elems.size(); ++I) {
+      mlir::TypedAttr C = mlir::dyn_cast<mlir::TypedAttr>(Elems[I]);
+      assert(C && "expected typed attribute");
+
+      CharUnits Align = Utils.getAlignment(C);
+      CharUnits NaturalOffset = SizeSoFar.alignTo(Align);
+      CharUnits DesiredOffset = Offset(I);
+      assert(DesiredOffset >= SizeSoFar && "elements out of order");
+
+      if (DesiredOffset != NaturalOffset)
+        Packed = true;
+      if (DesiredOffset != SizeSoFar)
+        PackedElems.push_back(Utils.getPadding(DesiredOffset - SizeSoFar));
+      PackedElems.push_back(Elems[I]);
+      SizeSoFar = DesiredOffset + Utils.getSize(C);
+    }
+    // If we're using the packed layout, pad it out to the desired size if
+    // necessary.
+    if (Packed) {
+      assert(SizeSoFar <= DesiredSize &&
+             "requested size is too small for contents");
+
+      if (SizeSoFar < DesiredSize)
+        PackedElems.push_back(Utils.getPadding(DesiredSize - SizeSoFar));
+    }
+  }
+
+  auto &builder = CGM.getBuilder();
+  auto arrAttr = mlir::ArrayAttr::get(builder.getContext(),
+                                      Packed ? PackedElems : UnpackedElems);
+  auto strType = builder.getCompleteStructType(arrAttr, Packed);
+
+  if (auto desired = dyn_cast<mlir::cir::StructType>(DesiredTy))
+    if (desired.isLayoutIdentical(strType))
+      strType = desired;
+
+  return builder.getConstStructOrZeroAttr(arrAttr, Packed, strType);
+}
+
+void ConstantAggregateBuilder::condense(CharUnits Offset,
+                                        mlir::Type DesiredTy) {
+  CharUnits Size = getSize(DesiredTy);
+
+  std::optional<size_t> FirstElemToReplace = splitAt(Offset);
+  if (!FirstElemToReplace)
+    return;
+  size_t First = *FirstElemToReplace;
+
+  std::optional<size_t> LastElemToReplace = splitAt(Offset + Size);
+  if (!LastElemToReplace)
+    return;
+  size_t Last = *LastElemToReplace;
+
+  size_t Length = Last - First;
+  if (Length == 0)
+    return;
+
+  // FIXME(cir): migrate most of this file to use mlir::TypedAttr directly.
+  mlir::TypedAttr C = mlir::dyn_cast<mlir::TypedAttr>(Elems[First]);
+  assert(C && "expected typed attribute");
+  if (Length == 1 && Offsets[First] == Offset && getSize(C) == Size) {
+    // Re-wrap single element structs if necessary. Otherwise, leave any single
+    // element constant of the right size alone even if it has the wrong type.
+    llvm_unreachable("NYI");
+  }
+
+  mlir::Attribute Replacement = buildFrom(
+      CGM, ArrayRef(Elems).slice(First, Length),
+      ArrayRef(Offsets).slice(First, Length), Offset, getSize(DesiredTy),
+      /*known to have natural layout=*/false, DesiredTy, false);
+  replace(Elems, First, Last, {Replacement});
+  replace(Offsets, First, Last, {Offset});
+}
+
+//===----------------------------------------------------------------------===//
+//                            ConstStructBuilder
+//===----------------------------------------------------------------------===//
+
+class ConstStructBuilder {
+  CIRGenModule &CGM;
+  ConstantEmitter &Emitter;
+  ConstantAggregateBuilder &Builder;
+  CharUnits StartOffset;
+
+public:
+  static mlir::Attribute BuildStruct(ConstantEmitter &Emitter,
+                                     InitListExpr *ILE, QualType StructTy);
+  static mlir::Attribute BuildStruct(ConstantEmitter &Emitter,
+                                     const APValue &Value, QualType ValTy);
+  static bool UpdateStruct(ConstantEmitter &Emitter,
+                           ConstantAggregateBuilder &Const, CharUnits Offset,
+                           InitListExpr *Updater);
+
+private:
+  ConstStructBuilder(ConstantEmitter &Emitter,
+                     ConstantAggregateBuilder &Builder, CharUnits StartOffset)
+      : CGM(Emitter.CGM), Emitter(Emitter), Builder(Builder),
+        StartOffset(StartOffset) {}
+
+  bool AppendField(const FieldDecl *Field, uint64_t FieldOffset,
+                   mlir::Attribute InitExpr, bool AllowOverwrite = false);
+
+  bool AppendBytes(CharUnits FieldOffsetInChars, mlir::Attribute InitCst,
+                   bool AllowOverwrite = false);
+
+  bool AppendBitField(const FieldDecl *Field, uint64_t FieldOffset,
+                      mlir::cir::IntAttr InitExpr, bool AllowOverwrite = false);
+
+  bool Build(InitListExpr *ILE, bool AllowOverwrite);
+  bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
+             const CXXRecordDecl *VTableClass, CharUnits BaseOffset);
+  mlir::Attribute Finalize(QualType Ty);
+};
+
+bool ConstStructBuilder::AppendField(const FieldDecl *Field,
+                                     uint64_t FieldOffset,
+                                     mlir::Attribute InitCst,
+                                     bool AllowOverwrite) {
+  const ASTContext &Context = CGM.getASTContext();
+
+  CharUnits FieldOffsetInChars = Context.toCharUnitsFromBits(FieldOffset);
+
+  return AppendBytes(FieldOffsetInChars, InitCst, AllowOverwrite);
+}
+
+bool ConstStructBuilder::AppendBytes(CharUnits FieldOffsetInChars,
+                                     mlir::Attribute InitCst,
+                                     bool AllowOverwrite) {
+  return Builder.add(InitCst, StartOffset + FieldOffsetInChars, AllowOverwrite);
+}
+
+bool ConstStructBuilder::AppendBitField(const FieldDecl *Field,
+                                        uint64_t FieldOffset,
+                                        mlir::cir::IntAttr CI,
+                                        bool AllowOverwrite) {
+  const auto &RL = CGM.getTypes().getCIRGenRecordLayout(Field->getParent());
+  const auto &Info = RL.getBitFieldInfo(Field);
+  llvm::APInt FieldValue = CI.getValue();
+
+  // Promote the size of FieldValue if necessary
+  // FIXME: This should never occur, but currently it can because initializer
+  // constants are cast to bool, and because clang is not enforcing bitfield
+  // width limits.
+  if (Info.Size > FieldValue.getBitWidth())
+    FieldValue = FieldValue.zext(Info.Size);
+
+  // Truncate the size of FieldValue to the bit field size.
+  if (Info.Size < FieldValue.getBitWidth())
+    FieldValue = FieldValue.trunc(Info.Size);
+
+  return Builder.addBits(FieldValue,
+                         CGM.getASTContext().toBits(StartOffset) + FieldOffset,
+                         AllowOverwrite);
+}
+
+static bool EmitDesignatedInitUpdater(ConstantEmitter &Emitter,
+                                      ConstantAggregateBuilder &Const,
+                                      CharUnits Offset, QualType Type,
+                                      InitListExpr *Updater) {
+  if (Type->isRecordType())
+    return ConstStructBuilder::UpdateStruct(Emitter, Const, Offset, Updater);
+
+  auto CAT = Emitter.CGM.getASTContext().getAsConstantArrayType(Type);
+  if (!CAT)
+    return false;
+  QualType ElemType = CAT->getElementType();
+  CharUnits ElemSize = Emitter.CGM.getASTContext().getTypeSizeInChars(ElemType);
+  mlir::Type ElemTy = Emitter.CGM.getTypes().convertTypeForMem(ElemType);
+
+  mlir::Attribute FillC = nullptr;
+  if (Expr *Filler = Updater->getArrayFiller()) {
+    if (!isa<NoInitExpr>(Filler)) {
+      llvm_unreachable("NYI");
+    }
+  }
+
+  unsigned NumElementsToUpdate =
+      FillC ? CAT->getSize().getZExtValue() : Updater->getNumInits();
+  for (unsigned I = 0; I != NumElementsToUpdate; ++I, Offset += ElemSize) {
+    Expr *Init = nullptr;
+    if (I < Updater->getNumInits())
+      Init = Updater->getInit(I);
+
+    if (!Init && FillC) {
+      if (!Const.add(FillC, Offset, true))
+        return false;
+    } else if (!Init || isa<NoInitExpr>(Init)) {
+      continue;
+    } else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init)) {
+      if (!EmitDesignatedInitUpdater(Emitter, Const, Offset, ElemType,
+                                     ChildILE))
+        return false;
+      // Attempt to reduce the array element to a single constant if necessary.
+      Const.condense(Offset, ElemTy);
+    } else {
+      mlir::Attribute Val = Emitter.tryEmitPrivateForMemory(Init, ElemType);
+      if (!Const.add(Val, Offset, true))
+        return false;
+    }
+  }
+
+  return true;
+}
+
+bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) {
+  RecordDecl *RD = ILE->getType()->castAs<RecordType>()->getDecl();
+  const ASTRecordLayout &Layout = CGM.getASTContext().getASTRecordLayout(RD);
+
+  unsigned FieldNo = -1;
+  unsigned ElementNo = 0;
+
+  // Bail out if we have base classes. We could support these, but they only
+  // arise in C++1z where we will have already constant folded most interesting
+  // cases. FIXME: There are still a few more cases we can handle this way.
+  if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
+    if (CXXRD->getNumBases())
+      return false;
+
+  for (FieldDecl *Field : RD->fields()) {
+    ++FieldNo;
+
+    // If this is a union, skip all the fields that aren't being initialized.
+    if (RD->isUnion() &&
+        !declaresSameEntity(ILE->getInitializedFieldInUnion(), Field))
+      continue;
+
+    // Don't emit anonymous bitfields.
+    if (Field->isUnnamedBitField())
+      continue;
+
+    // Get the initializer.  A struct can include fields without initializers,
+    // we just use explicit null values for them.
+    Expr *Init = nullptr;
+    if (ElementNo < ILE->getNumInits())
+      Init = ILE->getInit(ElementNo++);
+    if (Init && isa<NoInitExpr>(Init))
+      continue;
+
+    // Zero-sized fields are not emitted, but their initializers may still
+    // prevent emission of this struct as a constant.
+    if (Field->isZeroSize(CGM.getASTContext())) {
+      if (Init->HasSideEffects(CGM.getASTContext()))
+        return false;
+      continue;
+    }
+
+    // When emitting a DesignatedInitUpdateExpr, a nested InitListExpr
+    // represents additional overwriting of our current constant value, and not
+    // a new constant to emit independently.
+    if (AllowOverwrite &&
+        (Field->getType()->isArrayType() || Field->getType()->isRecordType())) {
+      if (auto *SubILE = dyn_cast<InitListExpr>(Init)) {
+        CharUnits Offset = CGM.getASTContext().toCharUnitsFromBits(
+            Layout.getFieldOffset(FieldNo));
+        if (!EmitDesignatedInitUpdater(Emitter, Builder, StartOffset + Offset,
+                                       Field->getType(), SubILE))
+          return false;
+        // If we split apart the field's value, try to collapse it down to a
+        // single value now.
+        llvm_unreachable("NYI");
+        continue;
+      }
+    }
+
+    mlir::Attribute EltInit;
+    if (Init)
+      EltInit = Emitter.tryEmitPrivateForMemory(Init, Field->getType());
+    else
+      EltInit = Emitter.emitNullForMemory(CGM.getLoc(ILE->getSourceRange()),
+                                          Field->getType());
+
+    if (!EltInit)
+      return false;
+
+    if (!Field->isBitField()) {
+      // Handle non-bitfield members.
+      if (!AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit,
+                       AllowOverwrite))
+        return false;
+      // After emitting a non-empty field with [[no_unique_address]], we may
+      // need to overwrite its tail padding.
+      if (Field->hasAttr<NoUniqueAddressAttr>())
+        AllowOverwrite = true;
+    } else {
+      // Otherwise we have a bitfield.
+      if (auto constInt = dyn_cast<mlir::cir::IntAttr>(EltInit)) {
+        if (!AppendBitField(Field, Layout.getFieldOffset(FieldNo), constInt,
+                            AllowOverwrite))
+          return false;
+      } else {
+        // We are trying to initialize a bitfield with a non-trivial constant,
+        // this must require run-time code.
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+namespace {
+struct BaseInfo {
+  BaseInfo(const CXXRecordDecl *Decl, CharUnits Offset, unsigned Index)
+      : Decl(Decl), Offset(Offset), Index(Index) {}
+
+  const CXXRecordDecl *Decl;
+  CharUnits Offset;
+  unsigned Index;
+
+  bool operator<(const BaseInfo &O) const { return Offset < O.Offset; }
+};
+} // namespace
+
+bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
+                               bool IsPrimaryBase,
+                               const CXXRecordDecl *VTableClass,
+                               CharUnits Offset) {
+  const ASTRecordLayout &Layout = CGM.getASTContext().getASTRecordLayout(RD);
+
+  if (const CXXRecordDecl *CD = dyn_cast<CXXRecordDecl>(RD)) {
+    // Add a vtable pointer, if we need one and it hasn't already been added.
+    if (Layout.hasOwnVFPtr())
+      llvm_unreachable("NYI");
+
+    // Accumulate and sort bases, in order to visit them in address order, which
+    // may not be the same as declaration order.
+    SmallVector<BaseInfo, 8> Bases;
+    Bases.reserve(CD->getNumBases());
+    unsigned BaseNo = 0;
+    for (CXXRecordDecl::base_class_const_iterator Base = CD->bases_begin(),
+                                                  BaseEnd = CD->bases_end();
+         Base != BaseEnd; ++Base, ++BaseNo) {
+      assert(!Base->isVirtual() && "should not have virtual bases here");
+      const CXXRecordDecl *BD = Base->getType()->getAsCXXRecordDecl();
+      CharUnits BaseOffset = Layout.getBaseClassOffset(BD);
+      Bases.push_back(BaseInfo(BD, BaseOffset, BaseNo));
+    }
+    llvm::stable_sort(Bases);
+
+    for (unsigned I = 0, N = Bases.size(); I != N; ++I) {
+      BaseInfo &Base = Bases[I];
+
+      bool IsPrimaryBase = Layout.getPrimaryBase() == Base.Decl;
+      Build(Val.getStructBase(Base.Index), Base.Decl, IsPrimaryBase,
+            VTableClass, Offset + Base.Offset);
+    }
+  }
+
+  unsigned FieldNo = 0;
+  uint64_t OffsetBits = CGM.getASTContext().toBits(Offset);
+
+  bool AllowOverwrite = false;
+  for (RecordDecl::field_iterator Field = RD->field_begin(),
+                                  FieldEnd = RD->field_end();
+       Field != FieldEnd; ++Field, ++FieldNo) {
+    // If this is a union, skip all the fields that aren't being initialized.
+    if (RD->isUnion() && !declaresSameEntity(Val.getUnionField(), *Field))
+      continue;
+
+    // Don't emit anonymous bitfields or zero-sized fields.
+    if (Field->isUnnamedBitField() || Field->isZeroSize(CGM.getASTContext()))
+      continue;
+
+    // Emit the value of the initializer.
+    const APValue &FieldValue =
+        RD->isUnion() ? Val.getUnionValue() : Val.getStructField(FieldNo);
+    mlir::Attribute EltInit =
+        Emitter.tryEmitPrivateForMemory(FieldValue, Field->getType());
+    if (!EltInit)
+      return false;
+
+    if (!Field->isBitField()) {
+      // Handle non-bitfield members.
+      if (!AppendField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits,
+                       EltInit, AllowOverwrite))
+        return false;
+      // After emitting a non-empty field with [[no_unique_address]], we may
+      // need to overwrite its tail padding.
+      if (Field->hasAttr<NoUniqueAddressAttr>())
+        AllowOverwrite = true;
+    } else {
+      llvm_unreachable("NYI");
+    }
+  }
+
+  return true;
+}
+
+mlir::Attribute ConstStructBuilder::Finalize(QualType Type) {
+  Type = Type.getNonReferenceType();
+  RecordDecl *RD = Type->castAs<RecordType>()->getDecl();
+  mlir::Type ValTy = CGM.getTypes().ConvertType(Type);
+  return Builder.build(ValTy, RD->hasFlexibleArrayMember());
+}
+
+mlir::Attribute ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter,
+                                                InitListExpr *ILE,
+                                                QualType ValTy) {
+  ConstantAggregateBuilder Const(Emitter.CGM);
+  ConstStructBuilder Builder(Emitter, Const, CharUnits::Zero());
+
+  if (!Builder.Build(ILE, /*AllowOverwrite*/ false))
+    return nullptr;
+
+  return Builder.Finalize(ValTy);
+}
+
+mlir::Attribute ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter,
+                                                const APValue &Val,
+                                                QualType ValTy) {
+  ConstantAggregateBuilder Const(Emitter.CGM);
+  ConstStructBuilder Builder(Emitter, Const, CharUnits::Zero());
+
+  const RecordDecl *RD = ValTy->castAs<RecordType>()->getDecl();
+  const CXXRecordDecl *CD = dyn_cast<CXXRecordDecl>(RD);
+  if (!Builder.Build(Val, RD, false, CD, CharUnits::Zero()))
+    return nullptr;
+
+  return Builder.Finalize(ValTy);
+}
+
+bool ConstStructBuilder::UpdateStruct(ConstantEmitter &Emitter,
+                                      ConstantAggregateBuilder &Const,
+                                      CharUnits Offset, InitListExpr *Updater) {
+  return ConstStructBuilder(Emitter, Const, Offset)
+      .Build(Updater, /*AllowOverwrite*/ true);
+}
+
+//===----------------------------------------------------------------------===//
+//                             ConstExprEmitter
+//===----------------------------------------------------------------------===//
+
+// This class only needs to handle arrays, structs and unions.
+//
+// In LLVM codegen, when outside C++11 mode, those types are not constant
+// folded, while all other types are handled by constant folding.
+//
+// In CIR codegen, instead of folding things here, we should defer that work
+// to MLIR: do not attempt to do much here.
+class ConstExprEmitter
+    : public StmtVisitor<ConstExprEmitter, mlir::Attribute, QualType> {
+  CIRGenModule &CGM;
+  LLVM_ATTRIBUTE_UNUSED ConstantEmitter &Emitter;
+
+public:
+  ConstExprEmitter(ConstantEmitter &emitter)
+      : CGM(emitter.CGM), Emitter(emitter) {}
+
+  //===--------------------------------------------------------------------===//
+  //                            Visitor Methods
+  //===--------------------------------------------------------------------===//
+
+  mlir::Attribute VisitStmt(Stmt *S, QualType T) { return nullptr; }
+
+  mlir::Attribute VisitConstantExpr(ConstantExpr *CE, QualType T) {
+    if (mlir::Attribute Result = Emitter.tryEmitConstantExpr(CE))
+      return Result;
+    return Visit(CE->getSubExpr(), T);
+  }
+
+  mlir::Attribute VisitParenExpr(ParenExpr *PE, QualType T) {
+    return Visit(PE->getSubExpr(), T);
+  }
+
+  mlir::Attribute
+  VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *PE,
+                                    QualType T) {
+    return Visit(PE->getReplacement(), T);
+  }
+
+  mlir::Attribute VisitGenericSelectionExpr(GenericSelectionExpr *GE,
+                                            QualType T) {
+    return Visit(GE->getResultExpr(), T);
+  }
+
+  mlir::Attribute VisitChooseExpr(ChooseExpr *CE, QualType T) {
+    return Visit(CE->getChosenSubExpr(), T);
+  }
+
+  mlir::Attribute VisitCompoundLiteralExpr(CompoundLiteralExpr *E, QualType T) {
+    return Visit(E->getInitializer(), T);
+  }
+
+  mlir::Attribute VisitCastExpr(CastExpr *E, QualType destType) {
+    if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E))
+      CGM.buildExplicitCastExprType(ECE, Emitter.CGF);
+    Expr *subExpr = E->getSubExpr();
+
+    switch (E->getCastKind()) {
+    case CK_HLSLArrayRValue:
+    case CK_HLSLVectorTruncation:
+    case CK_ToUnion:
+      llvm_unreachable("not implemented");
+
+    case CK_AddressSpaceConversion: {
+      llvm_unreachable("not implemented");
+    }
+
+    case CK_LValueToRValue:
+    case CK_AtomicToNonAtomic:
+    case CK_NonAtomicToAtomic:
+    case CK_NoOp:
+    case CK_ConstructorConversion:
+      return Visit(subExpr, destType);
+
+    case CK_IntToOCLSampler:
+      llvm_unreachable("global sampler variables are not generated");
+
+    case CK_Dependent:
+      llvm_unreachable("saw dependent cast!");
+
+    case CK_BuiltinFnToFnPtr:
+      llvm_unreachable("builtin functions are handled elsewhere");
+
+    case CK_ReinterpretMemberPointer:
+    case CK_DerivedToBaseMemberPointer:
+    case CK_BaseToDerivedMemberPointer: {
+      llvm_unreachable("not implemented");
+    }
+
+    // These will never be supported.
+    case CK_ObjCObjectLValueCast:
+    case CK_ARCProduceObject:
+    case CK_ARCConsumeObject:
+    case CK_ARCReclaimReturnedObject:
+    case CK_ARCExtendBlockObject:
+    case CK_CopyAndAutoreleaseBlockObject:
+      return nullptr;
+
+    // These don't need to be handled here because Evaluate knows how to
+    // evaluate them in the cases where they can be folded.
+    case CK_BitCast:
+    case CK_ToVoid:
+    case CK_Dynamic:
+    case CK_LValueBitCast:
+    case CK_LValueToRValueBitCast:
+    case CK_NullToMemberPointer:
+    case CK_UserDefinedConversion:
+    case CK_CPointerToObjCPointerCast:
+    case CK_BlockPointerToObjCPointerCast:
+    case CK_AnyPointerToBlockPointerCast:
+    case CK_ArrayToPointerDecay:
+    case CK_FunctionToPointerDecay:
+    case CK_BaseToDerived:
+    case CK_DerivedToBase:
+    case CK_UncheckedDerivedToBase:
+    case CK_MemberPointerToBoolean:
+    case CK_VectorSplat:
+    case CK_FloatingRealToComplex:
+    case CK_FloatingComplexToReal:
+    case CK_FloatingComplexToBoolean:
+    case CK_FloatingComplexCast:
+    case CK_FloatingComplexToIntegralComplex:
+    case CK_IntegralRealToComplex:
+    case CK_IntegralComplexToReal:
+    case CK_IntegralComplexToBoolean:
+    case CK_IntegralComplexCast:
+    case CK_IntegralComplexToFloatingComplex:
+    case CK_PointerToIntegral:
+    case CK_PointerToBoolean:
+    case CK_NullToPointer:
+    case CK_IntegralCast:
+    case CK_BooleanToSignedIntegral:
+    case CK_IntegralToPointer:
+    case CK_IntegralToBoolean:
+    case CK_IntegralToFloating:
+    case CK_FloatingToIntegral:
+    case CK_FloatingToBoolean:
+    case CK_FloatingCast:
+    case CK_FloatingToFixedPoint:
+    case CK_FixedPointToFloating:
+    case CK_FixedPointCast:
+    case CK_FixedPointToBoolean:
+    case CK_FixedPointToIntegral:
+    case CK_IntegralToFixedPoint:
+    case CK_ZeroToOCLOpaqueType:
+    case CK_MatrixCast:
+      return nullptr;
+    }
+    llvm_unreachable("Invalid CastKind");
+  }
+
+  mlir::Attribute VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE, QualType T) {
+    // TODO(cir): figure out CIR story here...
+    // No need for a DefaultInitExprScope: we don't handle 'this' in a
+    // constant expression.
+    return Visit(DIE->getExpr(), T);
+  }
+
+  mlir::Attribute VisitExprWithCleanups(ExprWithCleanups *E, QualType T) {
+    // Since this about constant emission no need to wrap this under a scope.
+    return Visit(E->getSubExpr(), T);
+  }
+
+  mlir::Attribute VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E,
+                                                QualType T) {
+    return Visit(E->getSubExpr(), T);
+  }
+
+  mlir::Attribute EmitArrayInitialization(InitListExpr *ILE, QualType T) {
+    auto *CAT = CGM.getASTContext().getAsConstantArrayType(ILE->getType());
+    assert(CAT && "can't emit array init for non-constant-bound array");
+    unsigned NumInitElements = ILE->getNumInits();        // init list size
+    unsigned NumElements = CAT->getSize().getZExtValue(); // array size
+    unsigned NumInitableElts = std::min(NumInitElements, NumElements);
+
+    QualType EltTy = CAT->getElementType();
+    SmallVector<mlir::TypedAttr, 16> Elts;
+    Elts.reserve(NumElements);
+
+    // Emit array filler, if there is one.
+    mlir::Attribute Filler;
+    if (ILE->hasArrayFiller()) {
+      auto *aux = ILE->getArrayFiller();
+      Filler = Emitter.tryEmitAbstractForMemory(aux, CAT->getElementType());
+      if (!Filler)
+        return {};
+    }
+
+    // Emit initializer elements as MLIR attributes and check for common type.
+    mlir::Type CommonElementType;
+    for (unsigned i = 0; i != NumInitableElts; ++i) {
+      Expr *Init = ILE->getInit(i);
+      auto C = Emitter.tryEmitPrivateForMemory(Init, EltTy);
+      if (!C)
+        return {};
+      if (i == 0)
+        CommonElementType = C.getType();
+      else if (C.getType() != CommonElementType)
+        CommonElementType = nullptr;
+      Elts.push_back(std::move(C));
+    }
+
+    auto desiredType = CGM.getTypes().ConvertType(T);
+    auto typedFiller = llvm::dyn_cast_or_null<mlir::TypedAttr>(Filler);
+    if (Filler && !typedFiller)
+      llvm_unreachable("We shouldn't be receiving untyped attrs here");
+    return buildArrayConstant(CGM, desiredType, CommonElementType, NumElements,
+                              Elts, typedFiller);
+  }
+
+  mlir::Attribute EmitRecordInitialization(InitListExpr *ILE, QualType T) {
+    return ConstStructBuilder::BuildStruct(Emitter, ILE, T);
+  }
+
+  mlir::Attribute EmitVectorInitialization(InitListExpr *ILE, QualType T) {
+    mlir::cir::VectorType VecTy =
+        mlir::cast<mlir::cir::VectorType>(CGM.getTypes().ConvertType(T));
+    unsigned NumElements = VecTy.getSize();
+    unsigned NumInits = ILE->getNumInits();
+    assert(NumElements >= NumInits && "Too many initializers for a vector");
+    QualType EltTy = T->castAs<VectorType>()->getElementType();
+    SmallVector<mlir::Attribute, 8> Elts;
+    // Process the explicit initializers
+    for (unsigned i = 0; i < NumInits; ++i) {
+      auto Value = Emitter.tryEmitPrivateForMemory(ILE->getInit(i), EltTy);
+      if (!Value)
+        return {};
+      Elts.push_back(std::move(Value));
+    }
+    // Zero-fill the rest of the vector
+    for (unsigned i = NumInits; i < NumElements; ++i) {
+      Elts.push_back(CGM.getBuilder().getZeroInitAttr(VecTy.getEltType()));
+    }
+    return mlir::cir::ConstVectorAttr::get(
+        VecTy, mlir::ArrayAttr::get(CGM.getBuilder().getContext(), Elts));
+  }
+
+  mlir::Attribute VisitImplicitValueInitExpr(ImplicitValueInitExpr *E,
+                                             QualType T) {
+    return CGM.getBuilder().getZeroInitAttr(CGM.getCIRType(T));
+  }
+
+  mlir::Attribute VisitInitListExpr(InitListExpr *ILE, QualType T) {
+    if (ILE->isTransparent())
+      return Visit(ILE->getInit(0), T);
+
+    if (ILE->getType()->isArrayType())
+      return EmitArrayInitialization(ILE, T);
+
+    if (ILE->getType()->isRecordType())
+      return EmitRecordInitialization(ILE, T);
+
+    if (ILE->getType()->isVectorType())
+      return EmitVectorInitialization(ILE, T);
+
+    return nullptr;
+  }
+
+  mlir::Attribute VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E,
+                                                QualType destType) {
+    auto C = Visit(E->getBase(), destType);
+    if (!C)
+      return nullptr;
+
+    assert(0 && "not implemented");
+    return {};
+  }
+
+  mlir::Attribute VisitCXXConstructExpr(CXXConstructExpr *E, QualType Ty) {
+    if (!E->getConstructor()->isTrivial())
+      return nullptr;
+
+    // Only default and copy/move constructors can be trivial.
+    if (E->getNumArgs()) {
+      assert(E->getNumArgs() == 1 && "trivial ctor with > 1 argument");
+      assert(E->getConstructor()->isCopyOrMoveConstructor() &&
+             "trivial ctor has argument but isn't a copy/move ctor");
+
+      Expr *Arg = E->getArg(0);
+      assert(CGM.getASTContext().hasSameUnqualifiedType(Ty, Arg->getType()) &&
+             "argument to copy ctor is of wrong type");
+
+      // Look through the temporary; it's just converting the value to an lvalue
+      // to pass it to the constructor.
+      if (auto *MTE = dyn_cast<MaterializeTemporaryExpr>(Arg))
+        return Visit(MTE->getSubExpr(), Ty);
+      // Don't try to support arbitrary lvalue-to-rvalue conversions for now.
+      return nullptr;
+    }
+
+    return CGM.getBuilder().getZeroInitAttr(CGM.getCIRType(Ty));
+  }
+
+  mlir::Attribute VisitStringLiteral(StringLiteral *E, QualType T) {
+    // This is a string literal initializing an array in an initializer.
+    return CGM.getConstantArrayFromStringLiteral(E);
+  }
+
+  mlir::Attribute VisitObjCEncodeExpr(ObjCEncodeExpr *E, QualType T) {
+    assert(0 && "not implemented");
+    return {};
+  }
+
+  mlir::Attribute VisitUnaryExtension(const UnaryOperator *E, QualType T) {
+    return Visit(E->getSubExpr(), T);
+  }
+
+  // Utility methods
+  mlir::Type ConvertType(QualType T) { return CGM.getTypes().ConvertType(T); }
+};
+
+static mlir::Attribute
+buildArrayConstant(CIRGenModule &CGM, mlir::Type DesiredType,
+                   mlir::Type CommonElementType, unsigned ArrayBound,
+                   SmallVectorImpl<mlir::TypedAttr> &Elements,
+                   mlir::TypedAttr Filler) {
+  auto &builder = CGM.getBuilder();
+
+  // Figure out how long the initial prefix of non-zero elements is.
+  unsigned NonzeroLength = ArrayBound;
+  if (Elements.size() < NonzeroLength && builder.isNullValue(Filler))
+    NonzeroLength = Elements.size();
+  if (NonzeroLength == Elements.size()) {
+    while (NonzeroLength > 0 &&
+           builder.isNullValue(Elements[NonzeroLength - 1]))
+      --NonzeroLength;
+  }
+
+  if (NonzeroLength == 0)
+    return builder.getZeroInitAttr(DesiredType);
+
+  // Add a zeroinitializer array filler if we have lots of trailing zeroes.
+  unsigned TrailingZeroes = ArrayBound - NonzeroLength;
+  if (TrailingZeroes >= 8) {
+    assert(Elements.size() >= NonzeroLength &&
+           "missing initializer for non-zero element");
+
+    SmallVector<mlir::Attribute, 4> Eles;
+    Eles.reserve(Elements.size());
+    for (auto const &Element : Elements)
+      Eles.push_back(Element);
+
+    return builder.getConstArray(
+        mlir::ArrayAttr::get(builder.getContext(), Eles),
+        mlir::cir::ArrayType::get(builder.getContext(), CommonElementType,
+                                  ArrayBound));
+    // TODO(cir): If all the elements had the same type up to the trailing
+    // zeroes, emit a struct of two arrays (the nonzero data and the
+    // zeroinitializer). Use DesiredType to get the element type.
+  } else if (Elements.size() != ArrayBound) {
+    // Otherwise pad to the right size with the filler if necessary.
+    Elements.resize(ArrayBound, Filler);
+    if (Filler.getType() != CommonElementType)
+      CommonElementType = {};
+  }
+
+  // If all elements have the same type, just emit an array constant.
+  if (CommonElementType) {
+    SmallVector<mlir::Attribute, 4> Eles;
+    Eles.reserve(Elements.size());
+    for (auto const &Element : Elements)
+      Eles.push_back(Element);
+
+    return builder.getConstArray(
+        mlir::ArrayAttr::get(builder.getContext(), Eles),
+        mlir::cir::ArrayType::get(builder.getContext(), CommonElementType,
+                                  ArrayBound));
+  }
+
+  SmallVector<mlir::Attribute, 4> Eles;
+  Eles.reserve(Elements.size());
+  for (auto const &Element : Elements)
+    Eles.push_back(Element);
+
+  auto arrAttr = mlir::ArrayAttr::get(builder.getContext(), Eles);
+  return builder.getAnonConstStruct(arrAttr, false);
+}
+
+} // end anonymous namespace.
+
+//===----------------------------------------------------------------------===//
+//                          ConstantLValueEmitter
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// A struct which can be used to peephole certain kinds of finalization
+/// that normally happen during l-value emission.
+struct ConstantLValue {
+  llvm::PointerUnion<mlir::Value, mlir::Attribute> Value;
+  bool HasOffsetApplied;
+
+  /*implicit*/ ConstantLValue(mlir::Value value, bool hasOffsetApplied = false)
+      : Value(value), HasOffsetApplied(hasOffsetApplied) {}
+
+  /*implicit*/ ConstantLValue(mlir::cir::GlobalViewAttr address)
+      : Value(address), HasOffsetApplied(false) {}
+
+  ConstantLValue(std::nullptr_t) : ConstantLValue({}, false) {}
+};
+
+/// A helper class for emitting constant l-values.
+class ConstantLValueEmitter
+    : public ConstStmtVisitor<ConstantLValueEmitter, ConstantLValue> {
+  CIRGenModule &CGM;
+  ConstantEmitter &Emitter;
+  const APValue &Value;
+  QualType DestType;
+
+  // Befriend StmtVisitorBase so that we don't have to expose Visit*.
+  friend StmtVisitorBase;
+
+public:
+  ConstantLValueEmitter(ConstantEmitter &emitter, const APValue &value,
+                        QualType destType)
+      : CGM(emitter.CGM), Emitter(emitter), Value(value), DestType(destType) {}
+
+  mlir::Attribute tryEmit();
+
+private:
+  mlir::Attribute tryEmitAbsolute(mlir::Type destTy);
+  ConstantLValue tryEmitBase(const APValue::LValueBase &base);
+
+  ConstantLValue VisitStmt(const Stmt *S) { return nullptr; }
+  ConstantLValue VisitConstantExpr(const ConstantExpr *E);
+  ConstantLValue VisitCompoundLiteralExpr(const CompoundLiteralExpr *E);
+  ConstantLValue VisitStringLiteral(const StringLiteral *E);
+  ConstantLValue VisitObjCBoxedExpr(const ObjCBoxedExpr *E);
+  ConstantLValue VisitObjCEncodeExpr(const ObjCEncodeExpr *E);
+  ConstantLValue VisitObjCStringLiteral(const ObjCStringLiteral *E);
+  ConstantLValue VisitPredefinedExpr(const PredefinedExpr *E);
+  ConstantLValue VisitAddrLabelExpr(const AddrLabelExpr *E);
+  ConstantLValue VisitCallExpr(const CallExpr *E);
+  ConstantLValue VisitBlockExpr(const BlockExpr *E);
+  ConstantLValue VisitCXXTypeidExpr(const CXXTypeidExpr *E);
+  ConstantLValue
+  VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *expr);
+
+  bool hasNonZeroOffset() const { return !Value.getLValueOffset().isZero(); }
+
+  /// Return GEP-like value offset
+  mlir::ArrayAttr getOffset(mlir::Type Ty) {
+    auto Offset = Value.getLValueOffset().getQuantity();
+    CIRDataLayout Layout(CGM.getModule());
+    SmallVector<int64_t, 3> Idx;
+    CGM.getBuilder().computeGlobalViewIndicesFromFlatOffset(Offset, Ty, Layout,
+                                                            Idx);
+
+    llvm::SmallVector<mlir::Attribute, 3> Indices;
+    for (auto I : Idx) {
+      auto Attr = CGM.getBuilder().getI32IntegerAttr(I);
+      Indices.push_back(Attr);
+    }
+
+    if (Indices.empty())
+      return {};
+    return CGM.getBuilder().getArrayAttr(Indices);
+  }
+
+  // TODO(cir): create a proper interface to absctract CIR constant values.
+
+  /// Apply the value offset to the given constant.
+  ConstantLValue applyOffset(ConstantLValue &C) {
+
+    // Handle attribute constant LValues.
+    if (auto Attr = mlir::dyn_cast<mlir::Attribute>(C.Value)) {
+      if (auto GV = mlir::dyn_cast<mlir::cir::GlobalViewAttr>(Attr)) {
+        auto baseTy =
+            mlir::cast<mlir::cir::PointerType>(GV.getType()).getPointee();
+        auto destTy = CGM.getTypes().convertTypeForMem(DestType);
+        assert(!GV.getIndices() && "Global view is already indexed");
+        return mlir::cir::GlobalViewAttr::get(destTy, GV.getSymbol(),
+                                              getOffset(baseTy));
+      }
+      llvm_unreachable("Unsupported attribute type to offset");
+    }
+
+    // TODO(cir): use ptr_stride, or something...
+    llvm_unreachable("NYI");
+  }
+};
+
+} // namespace
+
+mlir::Attribute ConstantLValueEmitter::tryEmit() {
+  const APValue::LValueBase &base = Value.getLValueBase();
+
+  // The destination type should be a pointer or reference
+  // type, but it might also be a cast thereof.
+  //
+  // FIXME: the chain of casts required should be reflected in the APValue.
+  // We need this in order to correctly handle things like a ptrtoint of a
+  // non-zero null pointer and addrspace casts that aren't trivially
+  // represented in LLVM IR.
+  auto destTy = CGM.getTypes().convertTypeForMem(DestType);
+  assert(mlir::isa<mlir::cir::PointerType>(destTy));
+
+  // If there's no base at all, this is a null or absolute pointer,
+  // possibly cast back to an integer type.
+  if (!base) {
+    return tryEmitAbsolute(destTy);
+  }
+
+  // Otherwise, try to emit the base.
+  ConstantLValue result = tryEmitBase(base);
+
+  // If that failed, we're done.
+  auto &value = result.Value;
+  if (!value)
+    return {};
+
+  // Apply the offset if necessary and not already done.
+  if (!result.HasOffsetApplied) {
+    value = applyOffset(result).Value;
+  }
+
+  // Convert to the appropriate type; this could be an lvalue for
+  // an integer. FIXME: performAddrSpaceCast
+  if (mlir::isa<mlir::cir::PointerType>(destTy)) {
+    if (value.is<mlir::Attribute>())
+      return value.get<mlir::Attribute>();
+    llvm_unreachable("NYI");
+  }
+
+  llvm_unreachable("NYI");
+}
+
+/// Try to emit an absolute l-value, such as a null pointer or an integer
+/// bitcast to pointer type.
+mlir::Attribute ConstantLValueEmitter::tryEmitAbsolute(mlir::Type destTy) {
+  // If we're producing a pointer, this is easy.
+  auto destPtrTy = mlir::dyn_cast<mlir::cir::PointerType>(destTy);
+  assert(destPtrTy && "expected !cir.ptr type");
+  return CGM.getBuilder().getConstPtrAttr(
+      destPtrTy, Value.getLValueOffset().getQuantity());
+}
+
+ConstantLValue
+ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) {
+  // Handle values.
+  if (const ValueDecl *D = base.dyn_cast<const ValueDecl *>()) {
+    // The constant always points to the canonical declaration. We want to look
+    // at properties of the most recent declaration at the point of emission.
+    D = cast<ValueDecl>(D->getMostRecentDecl());
+
+    if (D->hasAttr<WeakRefAttr>())
+      llvm_unreachable("emit pointer base for weakref is NYI");
+
+    if (auto *FD = dyn_cast<FunctionDecl>(D)) {
+      auto fop = CGM.GetAddrOfFunction(FD);
+      auto builder = CGM.getBuilder();
+      auto ctxt = builder.getContext();
+      return mlir::cir::GlobalViewAttr::get(
+          builder.getPointerTo(fop.getFunctionType()),
+          mlir::FlatSymbolRefAttr::get(ctxt, fop.getSymNameAttr()));
+    }
+
+    if (auto *VD = dyn_cast<VarDecl>(D)) {
+      // We can never refer to a variable with local storage.
+      if (!VD->hasLocalStorage()) {
+        if (VD->isFileVarDecl() || VD->hasExternalStorage())
+          return CGM.getAddrOfGlobalVarAttr(VD);
+
+        if (VD->isLocalVarDecl()) {
+          auto linkage =
+              CGM.getCIRLinkageVarDefinition(VD, /*IsConstant=*/false);
+          return CGM.getBuilder().getGlobalViewAttr(
+              CGM.getOrCreateStaticVarDecl(*VD, linkage));
+        }
+      }
+    }
+  }
+
+  // Handle typeid(T).
+  if (TypeInfoLValue TI = base.dyn_cast<TypeInfoLValue>()) {
+    assert(0 && "NYI");
+  }
+
+  // Otherwise, it must be an expression.
+  return Visit(base.get<const Expr *>());
+}
+
+static ConstantLValue
+tryEmitGlobalCompoundLiteral(ConstantEmitter &emitter,
+                             const CompoundLiteralExpr *E) {
+  CIRGenModule &CGM = emitter.CGM;
+
+  LangAS addressSpace = E->getType().getAddressSpace();
+  mlir::Attribute C = emitter.tryEmitForInitializer(E->getInitializer(),
+                                                    addressSpace, E->getType());
+  if (!C) {
+    assert(!E->isFileScope() &&
+           "file-scope compound literal did not have constant initializer!");
+    return nullptr;
+  }
+
+  auto GV = CIRGenModule::createGlobalOp(
+      CGM, CGM.getLoc(E->getSourceRange()),
+      CGM.createGlobalCompoundLiteralName(),
+      CGM.getTypes().convertTypeForMem(E->getType()),
+      E->getType().isConstantStorage(CGM.getASTContext(), false, false));
+  GV.setInitialValueAttr(C);
+  GV.setLinkage(mlir::cir::GlobalLinkageKind::InternalLinkage);
+  CharUnits Align = CGM.getASTContext().getTypeAlignInChars(E->getType());
+  GV.setAlignment(Align.getAsAlign().value());
+
+  emitter.finalize(GV);
+  return CGM.getBuilder().getGlobalViewAttr(GV);
+}
+
+ConstantLValue ConstantLValueEmitter::VisitConstantExpr(const ConstantExpr *E) {
+  assert(0 && "NYI");
+  return Visit(E->getSubExpr());
+}
+
+ConstantLValue
+ConstantLValueEmitter::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
+  ConstantEmitter CompoundLiteralEmitter(CGM, Emitter.CGF);
+  CompoundLiteralEmitter.setInConstantContext(Emitter.isInConstantContext());
+  return tryEmitGlobalCompoundLiteral(CompoundLiteralEmitter, E);
+}
+
+ConstantLValue
+ConstantLValueEmitter::VisitStringLiteral(const StringLiteral *E) {
+  return CGM.getAddrOfConstantStringFromLiteral(E);
+}
+
+ConstantLValue
+ConstantLValueEmitter::VisitObjCEncodeExpr(const ObjCEncodeExpr *E) {
+  assert(0 && "NYI");
+  return nullptr;
+}
+
+ConstantLValue
+ConstantLValueEmitter::VisitObjCStringLiteral(const ObjCStringLiteral *E) {
+  assert(0 && "NYI");
+  return nullptr;
+}
+
+ConstantLValue
+ConstantLValueEmitter::VisitObjCBoxedExpr(const ObjCBoxedExpr *E) {
+  assert(0 && "NYI");
+  return nullptr;
+}
+
+ConstantLValue
+ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) {
+  assert(0 && "NYI");
+  return nullptr;
+}
+
+ConstantLValue
+ConstantLValueEmitter::VisitAddrLabelExpr(const AddrLabelExpr *E) {
+  assert(0 && "NYI");
+  return nullptr;
+}
+
+ConstantLValue ConstantLValueEmitter::VisitCallExpr(const CallExpr *E) {
+  assert(0 && "NYI");
+  return nullptr;
+}
+
+ConstantLValue ConstantLValueEmitter::VisitBlockExpr(const BlockExpr *E) {
+  assert(0 && "NYI");
+  return nullptr;
+}
+
+ConstantLValue
+ConstantLValueEmitter::VisitCXXTypeidExpr(const CXXTypeidExpr *E) {
+  assert(0 && "NYI");
+  return nullptr;
+}
+
+ConstantLValue ConstantLValueEmitter::VisitMaterializeTemporaryExpr(
+    const MaterializeTemporaryExpr *expr) {
+  assert(expr->getStorageDuration() == SD_Static);
+  const Expr *inner = expr->getSubExpr()->skipRValueSubobjectAdjustments();
+  mlir::Operation *globalTemp = CGM.getAddrOfGlobalTemporary(expr, inner);
+  CIRGenBuilderTy builder = CGM.getBuilder();
+  return ConstantLValue(
+      builder.getGlobalViewAttr(mlir::cast<mlir::cir::GlobalOp>(globalTemp)));
+}
+
+//===----------------------------------------------------------------------===//
+//                             ConstantEmitter
+//===----------------------------------------------------------------------===//
+
+mlir::Attribute ConstantEmitter::validateAndPopAbstract(mlir::Attribute C,
+                                                        AbstractState saved) {
+  Abstract = saved.OldValue;
+
+  assert(saved.OldPlaceholdersSize == PlaceholderAddresses.size() &&
+         "created a placeholder while doing an abstract emission?");
+
+  // No validation necessary for now.
+  // No cleanup to do for now.
+  return C;
+}
+
+mlir::Attribute ConstantEmitter::tryEmitForInitializer(const VarDecl &D) {
+  initializeNonAbstract(D.getType().getAddressSpace());
+  return markIfFailed(tryEmitPrivateForVarInit(D));
+}
+
+mlir::Attribute ConstantEmitter::tryEmitForInitializer(const Expr *E,
+                                                       LangAS destAddrSpace,
+                                                       QualType destType) {
+  initializeNonAbstract(destAddrSpace);
+  return markIfFailed(tryEmitPrivateForMemory(E, destType));
+}
+
+mlir::Attribute ConstantEmitter::emitForInitializer(const APValue &value,
+                                                    LangAS destAddrSpace,
+                                                    QualType destType) {
+  initializeNonAbstract(destAddrSpace);
+  auto c = tryEmitPrivateForMemory(value, destType);
+  assert(c && "couldn't emit constant value non-abstractly?");
+  return c;
+}
+
+void ConstantEmitter::finalize(mlir::cir::GlobalOp global) {
+  assert(InitializedNonAbstract &&
+         "finalizing emitter that was used for abstract emission?");
+  assert(!Finalized && "finalizing emitter multiple times");
+  assert(!global.isDeclaration());
+
+  // Note that we might also be Failed.
+  Finalized = true;
+
+  if (!PlaceholderAddresses.empty()) {
+    assert(0 && "not implemented");
+  }
+}
+
+ConstantEmitter::~ConstantEmitter() {
+  assert((!InitializedNonAbstract || Finalized || Failed) &&
+         "not finalized after being initialized for non-abstract emission");
+  assert(PlaceholderAddresses.empty() && "unhandled placeholders");
+}
+
+// TODO(cir): this can be shared with LLVM's codegen
+static QualType getNonMemoryType(CIRGenModule &CGM, QualType type) {
+  if (auto AT = type->getAs<AtomicType>()) {
+    return CGM.getASTContext().getQualifiedType(AT->getValueType(),
+                                                type.getQualifiers());
+  }
+  return type;
+}
+
+mlir::Attribute
+ConstantEmitter::tryEmitAbstractForInitializer(const VarDecl &D) {
+  auto state = pushAbstract();
+  auto C = tryEmitPrivateForVarInit(D);
+  return validateAndPopAbstract(C, state);
+}
+
+mlir::Attribute ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &D) {
+  // Make a quick check if variable can be default NULL initialized
+  // and avoid going through rest of code which may do, for c++11,
+  // initialization of memory to all NULLs.
+  if (!D.hasLocalStorage()) {
+    QualType Ty = CGM.getASTContext().getBaseElementType(D.getType());
+    if (Ty->isRecordType())
+      if (const CXXConstructExpr *E =
+              dyn_cast_or_null<CXXConstructExpr>(D.getInit())) {
+        const CXXConstructorDecl *CD = E->getConstructor();
+        // FIXME: we should probably model this more closely to C++ than
+        // just emitting a global with zero init (mimic what we do for trivial
+        // assignments and whatnots). Since this is for globals shouldn't
+        // be a problem for the near future.
+        if (CD->isTrivial() && CD->isDefaultConstructor())
+          return mlir::cir::ZeroAttr::get(
+              CGM.getBuilder().getContext(),
+              CGM.getTypes().ConvertType(D.getType()));
+      }
+  }
+  InConstantContext = D.hasConstantInitialization();
+
+  const Expr *E = D.getInit();
+  assert(E && "No initializer to emit");
+
+  QualType destType = D.getType();
+
+  if (!destType->isReferenceType()) {
+    QualType nonMemoryDestType = getNonMemoryType(CGM, destType);
+    if (auto C = ConstExprEmitter(*this).Visit(const_cast<Expr *>(E),
+                                               nonMemoryDestType))
+      return emitForMemory(C, destType);
+  }
+
+  // Try to emit the initializer.  Note that this can allow some things that
+  // are not allowed by tryEmitPrivateForMemory alone.
+  if (auto value = D.evaluateValue())
+    return tryEmitPrivateForMemory(*value, destType);
+
+  return nullptr;
+}
+
+mlir::Attribute ConstantEmitter::tryEmitAbstract(const Expr *E,
+                                                 QualType destType) {
+  auto state = pushAbstract();
+  auto C = tryEmitPrivate(E, destType);
+  return validateAndPopAbstract(C, state);
+}
+
+mlir::Attribute ConstantEmitter::tryEmitAbstract(const APValue &value,
+                                                 QualType destType) {
+  auto state = pushAbstract();
+  auto C = tryEmitPrivate(value, destType);
+  return validateAndPopAbstract(C, state);
+}
+
+mlir::Attribute ConstantEmitter::tryEmitConstantExpr(const ConstantExpr *CE) {
+  if (!CE->hasAPValueResult())
+    return nullptr;
+
+  QualType RetType = CE->getType();
+  if (CE->isGLValue())
+    RetType = CGM.getASTContext().getLValueReferenceType(RetType);
+
+  return emitAbstract(CE->getBeginLoc(), CE->getAPValueResult(), RetType);
+}
+
+mlir::Attribute ConstantEmitter::tryEmitAbstractForMemory(const Expr *E,
+                                                          QualType destType) {
+  auto nonMemoryDestType = getNonMemoryType(CGM, destType);
+  auto C = tryEmitAbstract(E, nonMemoryDestType);
+  return (C ? emitForMemory(C, destType) : nullptr);
+}
+
+mlir::Attribute ConstantEmitter::tryEmitAbstractForMemory(const APValue &value,
+                                                          QualType destType) {
+  auto nonMemoryDestType = getNonMemoryType(CGM, destType);
+  auto C = tryEmitAbstract(value, nonMemoryDestType);
+  return (C ? emitForMemory(C, destType) : nullptr);
+}
+
+mlir::TypedAttr ConstantEmitter::tryEmitPrivateForMemory(const Expr *E,
+                                                         QualType destType) {
+  auto nonMemoryDestType = getNonMemoryType(CGM, destType);
+  auto C = tryEmitPrivate(E, nonMemoryDestType);
+  if (C) {
+    auto attr = emitForMemory(C, destType);
+    auto typedAttr = llvm::dyn_cast<mlir::TypedAttr>(attr);
+    if (!typedAttr)
+      llvm_unreachable("this should always be typed");
+    return typedAttr;
+  } else {
+    return nullptr;
+  }
+}
+
+mlir::Attribute ConstantEmitter::tryEmitPrivateForMemory(const APValue &value,
+                                                         QualType destType) {
+  auto nonMemoryDestType = getNonMemoryType(CGM, destType);
+  auto C = tryEmitPrivate(value, nonMemoryDestType);
+  return (C ? emitForMemory(C, destType) : nullptr);
+}
+
+mlir::Attribute ConstantEmitter::emitForMemory(CIRGenModule &CGM,
+                                               mlir::Attribute C,
+                                               QualType destType) {
+  // For an _Atomic-qualified constant, we may need to add tail padding.
+  if (auto AT = destType->getAs<AtomicType>()) {
+    QualType destValueType = AT->getValueType();
+    C = emitForMemory(CGM, C, destValueType);
+
+    uint64_t innerSize = CGM.getASTContext().getTypeSize(destValueType);
+    uint64_t outerSize = CGM.getASTContext().getTypeSize(destType);
+    if (innerSize == outerSize)
+      return C;
+
+    assert(innerSize < outerSize && "emitted over-large constant for atomic");
+    auto &builder = CGM.getBuilder();
+    auto zeroArray = builder.getZeroInitAttr(
+        mlir::cir::ArrayType::get(builder.getContext(), builder.getUInt8Ty(),
+                                  (outerSize - innerSize) / 8));
+    SmallVector<mlir::Attribute, 4> anonElts = {C, zeroArray};
+    auto arrAttr = mlir::ArrayAttr::get(builder.getContext(), anonElts);
+    return builder.getAnonConstStruct(arrAttr, false);
+  }
+
+  // Zero-extend bool.
+  auto typed = mlir::dyn_cast<mlir::TypedAttr>(C);
+  if (typed && mlir::isa<mlir::cir::BoolType>(typed.getType())) {
+    // Already taken care given that bool values coming from
+    // integers only carry true/false.
+  }
+
+  return C;
+}
+
+mlir::TypedAttr ConstantEmitter::tryEmitPrivate(const Expr *E,
+                                                QualType destType) {
+  assert(!destType->isVoidType() && "can't emit a void constant");
+
+  if (auto C = ConstExprEmitter(*this).Visit(const_cast<Expr *>(E), destType)) {
+    if (auto TypedC = mlir::dyn_cast_if_present<mlir::TypedAttr>(C))
+      return TypedC;
+    llvm_unreachable("this should always be typed");
+  }
+
+  Expr::EvalResult Result;
+
+  bool Success;
+
+  if (destType->isReferenceType())
+    Success = E->EvaluateAsLValue(Result, CGM.getASTContext());
+  else
+    Success =
+        E->EvaluateAsRValue(Result, CGM.getASTContext(), InConstantContext);
+
+  if (Success && !Result.hasSideEffects()) {
+    auto C = tryEmitPrivate(Result.Val, destType);
+    if (auto TypedC = mlir::dyn_cast_if_present<mlir::TypedAttr>(C))
+      return TypedC;
+    llvm_unreachable("this should always be typed");
+  }
+
+  return nullptr;
+}
+
+mlir::Attribute ConstantEmitter::tryEmitPrivate(const APValue &Value,
+                                                QualType DestType) {
+  auto &builder = CGM.getBuilder();
+  switch (Value.getKind()) {
+  case APValue::None:
+  case APValue::Indeterminate:
+    // TODO(cir): LLVM models out-of-lifetime and indeterminate values as
+    // 'undef'. Find out what's better for CIR.
+    assert(0 && "not implemented");
+  case APValue::Int: {
+    mlir::Type ty = CGM.getCIRType(DestType);
+    if (mlir::isa<mlir::cir::BoolType>(ty))
+      return builder.getCIRBoolAttr(Value.getInt().getZExtValue());
+    assert(mlir::isa<mlir::cir::IntType>(ty) && "expected integral type");
+    return CGM.getBuilder().getAttr<mlir::cir::IntAttr>(ty, Value.getInt());
+  }
+  case APValue::Float: {
+    const llvm::APFloat &Init = Value.getFloat();
+    if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf() &&
+        !CGM.getASTContext().getLangOpts().NativeHalfType &&
+        CGM.getASTContext().getTargetInfo().useFP16ConversionIntrinsics())
+      assert(0 && "not implemented");
+    else {
+      mlir::Type ty = CGM.getCIRType(DestType);
+      assert(mlir::isa<mlir::cir::CIRFPTypeInterface>(ty) &&
+             "expected floating-point type");
+      return CGM.getBuilder().getAttr<mlir::cir::FPAttr>(ty, Init);
+    }
+  }
+  case APValue::Array: {
+    const ArrayType *ArrayTy = CGM.getASTContext().getAsArrayType(DestType);
+    unsigned NumElements = Value.getArraySize();
+    unsigned NumInitElts = Value.getArrayInitializedElts();
+
+    // Emit array filler, if there is one.
+    mlir::Attribute Filler;
+    if (Value.hasArrayFiller()) {
+      Filler = tryEmitAbstractForMemory(Value.getArrayFiller(),
+                                        ArrayTy->getElementType());
+      if (!Filler)
+        return {};
+    }
+
+    // Emit initializer elements.
+    SmallVector<mlir::TypedAttr, 16> Elts;
+    if (Filler && builder.isNullValue(Filler))
+      Elts.reserve(NumInitElts + 1);
+    else
+      Elts.reserve(NumElements);
+
+    mlir::Type CommonElementType;
+    for (unsigned I = 0; I < NumInitElts; ++I) {
+      auto C = tryEmitPrivateForMemory(Value.getArrayInitializedElt(I),
+                                       ArrayTy->getElementType());
+      if (!C)
+        return {};
+
+      assert(mlir::isa<mlir::TypedAttr>(C) &&
+             "This should always be a TypedAttr.");
+      auto CTyped = mlir::cast<mlir::TypedAttr>(C);
+
+      if (I == 0)
+        CommonElementType = CTyped.getType();
+      else if (CTyped.getType() != CommonElementType)
+        CommonElementType = {};
+      auto typedC = llvm::dyn_cast<mlir::TypedAttr>(C);
+      if (!typedC)
+        llvm_unreachable("this should always be typed");
+      Elts.push_back(typedC);
+    }
+
+    auto Desired = CGM.getTypes().ConvertType(DestType);
+
+    auto typedFiller = llvm::dyn_cast_or_null<mlir::TypedAttr>(Filler);
+    if (Filler && !typedFiller)
+      llvm_unreachable("this should always be typed");
+
+    return buildArrayConstant(CGM, Desired, CommonElementType, NumElements,
+                              Elts, typedFiller);
+  }
+  case APValue::Vector: {
+    const QualType ElementType =
+        DestType->castAs<VectorType>()->getElementType();
+    unsigned NumElements = Value.getVectorLength();
+    SmallVector<mlir::Attribute, 16> Elts;
+    Elts.reserve(NumElements);
+    for (unsigned i = 0; i < NumElements; ++i) {
+      auto C = tryEmitPrivateForMemory(Value.getVectorElt(i), ElementType);
+      if (!C)
+        return {};
+      Elts.push_back(C);
+    }
+    auto Desired =
+        mlir::cast<mlir::cir::VectorType>(CGM.getTypes().ConvertType(DestType));
+    return mlir::cir::ConstVectorAttr::get(
+        Desired, mlir::ArrayAttr::get(CGM.getBuilder().getContext(), Elts));
+  }
+  case APValue::MemberPointer: {
+    assert(!MissingFeatures::cxxABI());
+
+    const ValueDecl *memberDecl = Value.getMemberPointerDecl();
+    assert(!Value.isMemberPointerToDerivedMember() && "NYI");
+
+    if (const auto *memberFuncDecl = dyn_cast<CXXMethodDecl>(memberDecl))
+      assert(0 && "not implemented");
+
+    auto cirTy = mlir::cast<mlir::cir::DataMemberType>(
+        CGM.getTypes().ConvertType(DestType));
+
+    const auto *fieldDecl = cast<FieldDecl>(memberDecl);
+    return builder.getDataMemberAttr(cirTy, fieldDecl->getFieldIndex());
+  }
+  case APValue::LValue:
+    return ConstantLValueEmitter(*this, Value, DestType).tryEmit();
+  case APValue::Struct:
+  case APValue::Union:
+    return ConstStructBuilder::BuildStruct(*this, Value, DestType);
+  case APValue::FixedPoint:
+  case APValue::ComplexInt:
+  case APValue::ComplexFloat:
+  case APValue::AddrLabelDiff:
+    assert(0 && "not implemented");
+  }
+  llvm_unreachable("Unknown APValue kind");
+}
+
+mlir::Value CIRGenModule::buildNullConstant(QualType T, mlir::Location loc) {
+  if (T->getAs<PointerType>()) {
+    return builder.getNullPtr(getTypes().convertTypeForMem(T), loc);
+  }
+
+  if (getTypes().isZeroInitializable(T))
+    return builder.getNullValue(getTypes().convertTypeForMem(T), loc);
+
+  if (const ConstantArrayType *CAT =
+          getASTContext().getAsConstantArrayType(T)) {
+    llvm_unreachable("NYI");
+  }
+
+  if (const RecordType *RT = T->getAs<RecordType>())
+    llvm_unreachable("NYI");
+
+  assert(T->isMemberDataPointerType() &&
+         "Should only see pointers to data members here!");
+
+  llvm_unreachable("NYI");
+  return {};
+}
+
+mlir::Value CIRGenModule::buildMemberPointerConstant(const UnaryOperator *E) {
+  assert(!MissingFeatures::cxxABI());
+
+  auto loc = getLoc(E->getSourceRange());
+
+  const auto *decl = cast<DeclRefExpr>(E->getSubExpr())->getDecl();
+
+  // A member function pointer.
+  if (const auto *methodDecl = dyn_cast<CXXMethodDecl>(decl)) {
+    auto ty = mlir::cast<mlir::cir::MethodType>(getCIRType(E->getType()));
+    if (methodDecl->isVirtual())
+      return builder.create<mlir::cir::ConstantOp>(
+          loc, ty, getCXXABI().buildVirtualMethodAttr(ty, methodDecl));
+
+    auto methodFuncOp = GetAddrOfFunction(methodDecl);
+    return builder.create<mlir::cir::ConstantOp>(
+        loc, ty, builder.getMethodAttr(ty, methodFuncOp));
+  }
+
+  auto ty = mlir::cast<mlir::cir::DataMemberType>(getCIRType(E->getType()));
+
+  // Otherwise, a member data pointer.
+  const auto *fieldDecl = cast<FieldDecl>(decl);
+  return builder.create<mlir::cir::ConstantOp>(
+      loc, ty, builder.getDataMemberAttr(ty, fieldDecl->getFieldIndex()));
+}
+
+mlir::Attribute ConstantEmitter::emitAbstract(const Expr *E,
+                                              QualType destType) {
+  auto state = pushAbstract();
+  auto C = mlir::cast<mlir::Attribute>(tryEmitPrivate(E, destType));
+  C = validateAndPopAbstract(C, state);
+  if (!C) {
+    llvm_unreachable("NYI");
+  }
+  return C;
+}
+
+mlir::Attribute ConstantEmitter::emitAbstract(SourceLocation loc,
+                                              const APValue &value,
+                                              QualType destType) {
+  auto state = pushAbstract();
+  auto C = tryEmitPrivate(value, destType);
+  C = validateAndPopAbstract(C, state);
+  if (!C) {
+    CGM.Error(loc,
+              "internal error: could not emit constant value \"abstractly\"");
+    llvm_unreachable("NYI");
+  }
+  return C;
+}
+
+mlir::Attribute ConstantEmitter::emitNullForMemory(mlir::Location loc,
+                                                   CIRGenModule &CGM,
+                                                   QualType T) {
+  auto cstOp = dyn_cast<mlir::cir::ConstantOp>(
+      CGM.buildNullConstant(T, loc).getDefiningOp());
+  assert(cstOp && "expected cir.const op");
+  return emitForMemory(CGM, cstOp.getValue(), T);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
new file mode 100644
index 000000000000..1acc5a41b29a
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -0,0 +1,2778 @@
+//===--- CIRGenExprScalar.cpp - Emit CIR Code for Scalar Exprs ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit Expr nodes with scalar CIR types as CIR code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Address.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "CIRGenOpenMPRuntime.h"
+#include "TargetInfo.h"
+#include "clang/CIR/MissingFeatures.h"
+
+#include "clang/AST/StmtVisitor.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
+
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Value.h"
+
+using namespace cir;
+using namespace clang;
+
+namespace {
+
+struct BinOpInfo {
+  mlir::Value LHS;
+  mlir::Value RHS;
+  SourceRange Loc;
+  QualType FullType;             // Type of operands and result
+  QualType CompType;             // Type used for computations. Element type
+                                 // for vectors, otherwise same as FullType.
+  BinaryOperator::Opcode Opcode; // Opcode of BinOp to perform
+  FPOptions FPFeatures;
+  const Expr *E; // Entire expr, for error unsupported.  May not be binop.
+
+  /// Check if the binop computes a division or a remainder.
+  bool isDivremOp() const {
+    return Opcode == BO_Div || Opcode == BO_Rem || Opcode == BO_DivAssign ||
+           Opcode == BO_RemAssign;
+  }
+
+  /// Check if the binop can result in integer overflow.
+  bool mayHaveIntegerOverflow() const {
+    // Without constant input, we can't rule out overflow.
+    auto LHSCI = dyn_cast<mlir::cir::ConstantOp>(LHS.getDefiningOp());
+    auto RHSCI = dyn_cast<mlir::cir::ConstantOp>(RHS.getDefiningOp());
+    if (!LHSCI || !RHSCI)
+      return true;
+
+    llvm::APInt Result;
+    assert(!MissingFeatures::mayHaveIntegerOverflow());
+    llvm_unreachable("NYI");
+    return false;
+  }
+
+  /// Check if at least one operand is a fixed point type. In such cases,
+  /// this operation did not follow usual arithmetic conversion and both
+  /// operands might not be of the same type.
+  bool isFixedPointOp() const {
+    // We cannot simply check the result type since comparison operations
+    // return an int.
+    if (const auto *BinOp = llvm::dyn_cast<BinaryOperator>(E)) {
+      QualType LHSType = BinOp->getLHS()->getType();
+      QualType RHSType = BinOp->getRHS()->getType();
+      return LHSType->isFixedPointType() || RHSType->isFixedPointType();
+    }
+    if (const auto *UnOp = llvm::dyn_cast<UnaryOperator>(E))
+      return UnOp->getSubExpr()->getType()->isFixedPointType();
+    return false;
+  }
+};
+
+static bool PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(
+    QualType SrcType, QualType DstType) {
+  return SrcType->isIntegerType() && DstType->isIntegerType();
+}
+
+class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
+  CIRGenFunction &CGF;
+  CIRGenBuilderTy &Builder;
+  bool IgnoreResultAssign;
+
+public:
+  ScalarExprEmitter(CIRGenFunction &cgf, CIRGenBuilderTy &builder,
+                    bool ira = false)
+      : CGF(cgf), Builder(builder), IgnoreResultAssign(ira) {}
+
+  //===--------------------------------------------------------------------===//
+  //                               Utilities
+  //===--------------------------------------------------------------------===//
+
+  bool TestAndClearIgnoreResultAssign() {
+    bool I = IgnoreResultAssign;
+    IgnoreResultAssign = false;
+    return I;
+  }
+
+  mlir::Type ConvertType(QualType T) { return CGF.ConvertType(T); }
+  LValue buildLValue(const Expr *E) { return CGF.buildLValue(E); }
+  LValue buildCheckedLValue(const Expr *E, CIRGenFunction::TypeCheckKind TCK) {
+    return CGF.buildCheckedLValue(E, TCK);
+  }
+
+  mlir::Value buildComplexToScalarConversion(mlir::Location Loc, mlir::Value V,
+                                             CastKind Kind, QualType DestTy);
+
+  /// Emit a value that corresponds to null for the given type.
+  mlir::Value buildNullValue(QualType Ty, mlir::Location loc);
+
+  mlir::Value buildPromotedValue(mlir::Value result, QualType PromotionType) {
+    return Builder.createFloatingCast(result, ConvertType(PromotionType));
+  }
+
+  mlir::Value buildUnPromotedValue(mlir::Value result, QualType ExprType) {
+    return Builder.createFloatingCast(result, ConvertType(ExprType));
+  }
+
+  mlir::Value buildPromoted(const Expr *E, QualType PromotionType);
+
+  //===--------------------------------------------------------------------===//
+  //                            Visitor Methods
+  //===--------------------------------------------------------------------===//
+
+  mlir::Value Visit(Expr *E) {
+    return StmtVisitor<ScalarExprEmitter, mlir::Value>::Visit(E);
+  }
+
+  mlir::Value VisitStmt(Stmt *S) {
+    S->dump(llvm::errs(), CGF.getContext());
+    llvm_unreachable("Stmt can't have complex result type!");
+  }
+
+  mlir::Value VisitExpr(Expr *E) {
+    // Crashing here for "ScalarExprClassName"? Please implement
+    // VisitScalarExprClassName(...) to get this working.
+    emitError(CGF.getLoc(E->getExprLoc()), "scalar exp no implemented: '")
+        << E->getStmtClassName() << "'";
+    llvm_unreachable("NYI");
+    return {};
+  }
+
+  mlir::Value VisitConstantExpr(ConstantExpr *E) { llvm_unreachable("NYI"); }
+  mlir::Value VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr()); }
+  mlir::Value
+  VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *E) {
+    return Visit(E->getReplacement());
+  }
+  mlir::Value VisitGenericSelectionExpr(GenericSelectionExpr *GE) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitCoawaitExpr(CoawaitExpr *S) {
+    return CGF.buildCoawaitExpr(*S).getScalarVal();
+  }
+  mlir::Value VisitCoyieldExpr(CoyieldExpr *S) {
+    return CGF.buildCoyieldExpr(*S).getScalarVal();
+  }
+  mlir::Value VisitUnaryCoawait(const UnaryOperator *E) {
+    llvm_unreachable("NYI");
+  }
+
+  // Leaves.
+  mlir::Value VisitIntegerLiteral(const IntegerLiteral *E) {
+    mlir::Type Ty = CGF.getCIRType(E->getType());
+    return Builder.create<mlir::cir::ConstantOp>(
+        CGF.getLoc(E->getExprLoc()), Ty,
+        Builder.getAttr<mlir::cir::IntAttr>(Ty, E->getValue()));
+  }
+
+  mlir::Value VisitFixedPointLiteral(const FixedPointLiteral *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitFloatingLiteral(const FloatingLiteral *E) {
+    mlir::Type Ty = CGF.getCIRType(E->getType());
+    assert(mlir::isa<mlir::cir::CIRFPTypeInterface>(Ty) &&
+           "expect floating-point type");
+    return Builder.create<mlir::cir::ConstantOp>(
+        CGF.getLoc(E->getExprLoc()), Ty,
+        Builder.getAttr<mlir::cir::FPAttr>(Ty, E->getValue()));
+  }
+  mlir::Value VisitCharacterLiteral(const CharacterLiteral *E) {
+    mlir::Type Ty = CGF.getCIRType(E->getType());
+    auto loc = CGF.getLoc(E->getExprLoc());
+    auto init = mlir::cir::IntAttr::get(Ty, E->getValue());
+    return Builder.create<mlir::cir::ConstantOp>(loc, Ty, init);
+  }
+  mlir::Value VisitObjCBoolLiteralExpr(const ObjCBoolLiteralExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *E) {
+    mlir::Type Ty = CGF.getCIRType(E->getType());
+    return Builder.create<mlir::cir::ConstantOp>(
+        CGF.getLoc(E->getExprLoc()), Ty, Builder.getCIRBoolAttr(E->getValue()));
+  }
+
+  mlir::Value VisitCXXScalarValueInitExpr(const CXXScalarValueInitExpr *E) {
+    if (E->getType()->isVoidType())
+      return nullptr;
+
+    return buildNullValue(E->getType(), CGF.getLoc(E->getSourceRange()));
+  }
+  mlir::Value VisitGNUNullExpr(const GNUNullExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitOffsetOfExpr(OffsetOfExpr *E) {
+    // Try folding the offsetof to a constant.
+    Expr::EvalResult EVResult;
+    if (E->EvaluateAsInt(EVResult, CGF.getContext())) {
+      llvm::APSInt Value = EVResult.Val.getInt();
+      return Builder.getConstInt(CGF.getLoc(E->getExprLoc()), Value);
+    }
+
+    llvm_unreachable("NYI");
+  }
+
+  mlir::Value VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *E);
+  mlir::Value VisitAddrLabelExpr(const AddrLabelExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitSizeOfPackExpr(SizeOfPackExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitPseudoObjectExpr(PseudoObjectExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitOpaqueValueExpr(OpaqueValueExpr *E) {
+    if (E->isGLValue())
+      llvm_unreachable("NYI");
+
+    // Otherwise, assume the mapping is the scalar directly.
+    return CGF.getOrCreateOpaqueRValueMapping(E).getScalarVal();
+  }
+
+  /// Emits the address of the l-value, then loads and returns the result.
+  mlir::Value buildLoadOfLValue(const Expr *E) {
+    LValue LV = CGF.buildLValue(E);
+    // FIXME: add some akin to EmitLValueAlignmentAssumption(E, V);
+    return CGF.buildLoadOfLValue(LV, E->getExprLoc()).getScalarVal();
+  }
+
+  mlir::Value buildLoadOfLValue(LValue LV, SourceLocation Loc) {
+    return CGF.buildLoadOfLValue(LV, Loc).getScalarVal();
+  }
+
+  // l-values
+  mlir::Value VisitDeclRefExpr(DeclRefExpr *E) {
+    if (CIRGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) {
+      return CGF.buildScalarConstant(Constant, E);
+    }
+    return buildLoadOfLValue(E);
+  }
+
+  mlir::Value VisitObjCSelectorExpr(ObjCSelectorExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitObjCProtocolExpr(ObjCProtocolExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitObjCIVarRefExpr(ObjCIvarRefExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitObjCMessageExpr(ObjCMessageExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitObjCIsaExpr(ObjCIsaExpr *E) { llvm_unreachable("NYI"); }
+  mlir::Value VisitObjCAvailabilityCheckExpr(ObjCAvailabilityCheckExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
+    // Do we need anything like TestAndClearIgnoreResultAssign()?
+
+    if (E->getBase()->getType()->isVectorType()) {
+      assert(!MissingFeatures::scalableVectors() &&
+             "NYI: index into scalable vector");
+      // Subscript of vector type.  This is handled differently, with a custom
+      // operation.
+      mlir::Value VecValue = Visit(E->getBase());
+      mlir::Value IndexValue = Visit(E->getIdx());
+      return CGF.builder.create<mlir::cir::VecExtractOp>(
+          CGF.getLoc(E->getSourceRange()), VecValue, IndexValue);
+    }
+
+    // Just load the lvalue formed by the subscript expression.
+    return buildLoadOfLValue(E);
+  }
+
+  mlir::Value VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
+    if (E->getNumSubExprs() == 2) {
+      // The undocumented form of __builtin_shufflevector.
+      mlir::Value InputVec = Visit(E->getExpr(0));
+      mlir::Value IndexVec = Visit(E->getExpr(1));
+      return CGF.builder.create<mlir::cir::VecShuffleDynamicOp>(
+          CGF.getLoc(E->getSourceRange()), InputVec, IndexVec);
+    } else {
+      // The documented form of __builtin_shufflevector, where the indices are
+      // a variable number of integer constants. The constants will be stored
+      // in an ArrayAttr.
+      mlir::Value Vec1 = Visit(E->getExpr(0));
+      mlir::Value Vec2 = Visit(E->getExpr(1));
+      SmallVector<mlir::Attribute, 8> Indices;
+      for (unsigned i = 2; i < E->getNumSubExprs(); ++i) {
+        Indices.push_back(mlir::cir::IntAttr::get(
+            CGF.builder.getSInt64Ty(),
+            E->getExpr(i)
+                ->EvaluateKnownConstInt(CGF.getContext())
+                .getSExtValue()));
+      }
+      return CGF.builder.create<mlir::cir::VecShuffleOp>(
+          CGF.getLoc(E->getSourceRange()), CGF.getCIRType(E->getType()), Vec1,
+          Vec2, CGF.builder.getArrayAttr(Indices));
+    }
+  }
+  mlir::Value VisitConvertVectorExpr(ConvertVectorExpr *E) {
+    // __builtin_convertvector is an element-wise cast, and is implemented as a
+    // regular cast. The back end handles casts of vectors correctly.
+    return buildScalarConversion(Visit(E->getSrcExpr()),
+                                 E->getSrcExpr()->getType(), E->getType(),
+                                 E->getSourceRange().getBegin());
+  }
+
+  mlir::Value VisitExtVectorElementExpr(Expr *E) {
+    return buildLoadOfLValue(E);
+  }
+
+  mlir::Value VisitMemberExpr(MemberExpr *E);
+  mlir::Value VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
+    return buildLoadOfLValue(E);
+  }
+
+  mlir::Value VisitInitListExpr(InitListExpr *E);
+
+  mlir::Value VisitArrayInitIndexExpr(ArrayInitIndexExpr *E) {
+    llvm_unreachable("NYI");
+  }
+
+  mlir::Value VisitImplicitValueInitExpr(const ImplicitValueInitExpr *E) {
+    return buildNullValue(E->getType(), CGF.getLoc(E->getSourceRange()));
+  }
+  mlir::Value VisitExplicitCastExpr(ExplicitCastExpr *E) {
+    return VisitCastExpr(E);
+  }
+  mlir::Value VisitCastExpr(CastExpr *E);
+  mlir::Value VisitCallExpr(const CallExpr *E);
+
+  mlir::Value VisitStmtExpr(StmtExpr *E) {
+    assert(!MissingFeatures::stmtExprEvaluation() && "NYI");
+    Address retAlloca =
+        CGF.buildCompoundStmt(*E->getSubStmt(), !E->getType()->isVoidType());
+    if (!retAlloca.isValid())
+      return {};
+
+    // FIXME(cir): This is a work around the ScopeOp builder. If we build the
+    // ScopeOp before its body, we would be able to create the retAlloca
+    // direclty in the parent scope removing the need to hoist it.
+    assert(retAlloca.getDefiningOp() && "expected a alloca op");
+    CGF.getBuilder().hoistAllocaToParentRegion(
+        cast<mlir::cir::AllocaOp>(retAlloca.getDefiningOp()));
+
+    return CGF.buildLoadOfScalar(CGF.makeAddrLValue(retAlloca, E->getType()),
+                                 E->getExprLoc());
+  }
+
+  // Unary Operators.
+  mlir::Value VisitUnaryPostDec(const UnaryOperator *E) {
+    LValue LV = buildLValue(E->getSubExpr());
+    return buildScalarPrePostIncDec(E, LV, false, false);
+  }
+  mlir::Value VisitUnaryPostInc(const UnaryOperator *E) {
+    LValue LV = buildLValue(E->getSubExpr());
+    return buildScalarPrePostIncDec(E, LV, true, false);
+  }
+  mlir::Value VisitUnaryPreDec(const UnaryOperator *E) {
+    LValue LV = buildLValue(E->getSubExpr());
+    return buildScalarPrePostIncDec(E, LV, false, true);
+  }
+  mlir::Value VisitUnaryPreInc(const UnaryOperator *E) {
+    LValue LV = buildLValue(E->getSubExpr());
+    return buildScalarPrePostIncDec(E, LV, true, true);
+  }
+  mlir::Value buildScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
+                                       bool isInc, bool isPre) {
+    assert(!CGF.getLangOpts().OpenMP && "Not implemented");
+    QualType type = E->getSubExpr()->getType();
+
+    int amount = (isInc ? 1 : -1);
+    bool atomicPHI = false;
+    mlir::Value value{};
+    mlir::Value input{};
+
+    if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
+      llvm_unreachable("no atomics inc/dec yet");
+    } else {
+      value = buildLoadOfLValue(LV, E->getExprLoc());
+      input = value;
+    }
+
+    // NOTE: When possible, more frequent cases are handled first.
+
+    // Special case of integer increment that we have to check first: bool++.
+    // Due to promotion rules, we get:
+    //   bool++ -> bool = bool + 1
+    //          -> bool = (int)bool + 1
+    //          -> bool = ((int)bool + 1 != 0)
+    // An interesting aspect of this is that increment is always true.
+    // Decrement does not have this property.
+    if (isInc && type->isBooleanType()) {
+      value = Builder.create<mlir::cir::ConstantOp>(
+          CGF.getLoc(E->getExprLoc()), CGF.getCIRType(type),
+          Builder.getCIRBoolAttr(true));
+    } else if (type->isIntegerType()) {
+      QualType promotedType;
+      bool canPerformLossyDemotionCheck = false;
+      if (CGF.getContext().isPromotableIntegerType(type)) {
+        promotedType = CGF.getContext().getPromotedIntegerType(type);
+        assert(promotedType != type && "Shouldn't promote to the same type.");
+        canPerformLossyDemotionCheck = true;
+        canPerformLossyDemotionCheck &=
+            CGF.getContext().getCanonicalType(type) !=
+            CGF.getContext().getCanonicalType(promotedType);
+        canPerformLossyDemotionCheck &=
+            PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(
+                type, promotedType);
+
+        // TODO(cir): Currently, we store bitwidths in CIR types only for
+        // integers. This might also be required for other types.
+        auto srcCirTy = mlir::dyn_cast<mlir::cir::IntType>(ConvertType(type));
+        auto promotedCirTy =
+            mlir::dyn_cast<mlir::cir::IntType>(ConvertType(type));
+        assert(srcCirTy && promotedCirTy && "Expected integer type");
+
+        assert(
+            (!canPerformLossyDemotionCheck ||
+             type->isSignedIntegerOrEnumerationType() ||
+             promotedType->isSignedIntegerOrEnumerationType() ||
+             srcCirTy.getWidth() == promotedCirTy.getWidth()) &&
+            "The following check expects that if we do promotion to different "
+            "underlying canonical type, at least one of the types (either "
+            "base or promoted) will be signed, or the bitwidths will match.");
+      }
+
+      if (CGF.SanOpts.hasOneOf(
+              SanitizerKind::ImplicitIntegerArithmeticValueChange) &&
+          canPerformLossyDemotionCheck) {
+        llvm_unreachable(
+            "perform lossy demotion case for inc/dec not implemented yet");
+      } else if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) {
+        value = buildIncDecConsiderOverflowBehavior(E, value, isInc);
+      } else if (E->canOverflow() && type->isUnsignedIntegerType() &&
+                 CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) {
+        llvm_unreachable(
+            "unsigned integer overflow sanitized inc/dec not implemented");
+      } else {
+        auto Kind = E->isIncrementOp() ? mlir::cir::UnaryOpKind::Inc
+                                       : mlir::cir::UnaryOpKind::Dec;
+        // NOTE(CIR): clang calls CreateAdd but folds this to a unary op
+        value = buildUnaryOp(E, Kind, input);
+      }
+      // Next most common: pointer increment.
+    } else if (const PointerType *ptr = type->getAs<PointerType>()) {
+      QualType type = ptr->getPointeeType();
+      if (const VariableArrayType *vla =
+              CGF.getContext().getAsVariableArrayType(type)) {
+        // VLA types don't have constant size.
+        llvm_unreachable("NYI");
+      } else if (type->isFunctionType()) {
+        // Arithmetic on function pointers (!) is just +-1.
+        llvm_unreachable("NYI");
+      } else {
+        // For everything else, we can just do a simple increment.
+        auto loc = CGF.getLoc(E->getSourceRange());
+        auto &builder = CGF.getBuilder();
+        auto amt = builder.getSInt32(amount, loc);
+        if (CGF.getLangOpts().isSignedOverflowDefined()) {
+          value = builder.create<mlir::cir::PtrStrideOp>(loc, value.getType(),
+                                                         value, amt);
+        } else {
+          value = builder.create<mlir::cir::PtrStrideOp>(loc, value.getType(),
+                                                         value, amt);
+          assert(!MissingFeatures::emitCheckedInBoundsGEP());
+        }
+      }
+    } else if (type->isVectorType()) {
+      llvm_unreachable("no vector inc/dec yet");
+    } else if (type->isRealFloatingType()) {
+      // TODO(cir): CGFPOptionsRAII
+      assert(!MissingFeatures::CGFPOptionsRAII());
+
+      if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
+        llvm_unreachable("__fp16 type NYI");
+
+      if (mlir::isa<mlir::cir::SingleType, mlir::cir::DoubleType>(
+              value.getType())) {
+        // Create the inc/dec operation.
+        // NOTE(CIR): clang calls CreateAdd but folds this to a unary op
+        auto kind =
+            (isInc ? mlir::cir::UnaryOpKind::Inc : mlir::cir::UnaryOpKind::Dec);
+        value = buildUnaryOp(E, kind, input);
+      } else {
+        // Remaining types are Half, Bfloat16, LongDouble, __ibm128 or
+        // __float128. Convert from float.
+
+        llvm::APFloat F(static_cast<float>(amount));
+        bool ignored;
+        const llvm::fltSemantics *FS;
+        // Don't use getFloatTypeSemantics because Half isn't
+        // necessarily represented using the "half" LLVM type.
+        if (mlir::isa<mlir::cir::LongDoubleType>(value.getType()))
+          FS = &CGF.getTarget().getLongDoubleFormat();
+        else if (mlir::isa<mlir::cir::FP16Type>(value.getType()))
+          FS = &CGF.getTarget().getHalfFormat();
+        else if (mlir::isa<mlir::cir::BF16Type>(value.getType()))
+          FS = &CGF.getTarget().getBFloat16Format();
+        else
+          llvm_unreachable("fp128 / ppc_fp128 NYI");
+        F.convert(*FS, llvm::APFloat::rmTowardZero, &ignored);
+
+        auto loc = CGF.getLoc(E->getExprLoc());
+        auto amt = Builder.getConstant(
+            loc, mlir::cir::FPAttr::get(value.getType(), F));
+        value = Builder.createBinop(value, mlir::cir::BinOpKind::Add, amt);
+      }
+
+      if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
+        llvm_unreachable("NYI");
+
+    } else if (type->isFixedPointType()) {
+      llvm_unreachable("no fixed point inc/dec yet");
+    } else {
+      assert(type->castAs<ObjCObjectPointerType>());
+      llvm_unreachable("no objc pointer type inc/dec yet");
+    }
+
+    if (atomicPHI) {
+      llvm_unreachable("NYI");
+    }
+
+    CIRGenFunction::SourceLocRAIIObject sourceloc{
+        CGF, CGF.getLoc(E->getSourceRange())};
+
+    // Store the updated result through the lvalue
+    if (LV.isBitField())
+      CGF.buildStoreThroughBitfieldLValue(RValue::get(value), LV, value);
+    else
+      CGF.buildStoreThroughLValue(RValue::get(value), LV);
+
+    // If this is a postinc, return the value read from memory, otherwise use
+    // the updated value.
+    return isPre ? value : input;
+  }
+
+  mlir::Value buildIncDecConsiderOverflowBehavior(const UnaryOperator *E,
+                                                  mlir::Value InVal,
+                                                  bool IsInc) {
+    // NOTE(CIR): The SignedOverflowBehavior is attached to the global ModuleOp
+    // and the nsw behavior is handled during lowering.
+    auto Kind = E->isIncrementOp() ? mlir::cir::UnaryOpKind::Inc
+                                   : mlir::cir::UnaryOpKind::Dec;
+    switch (CGF.getLangOpts().getSignedOverflowBehavior()) {
+    case LangOptions::SOB_Defined:
+      return buildUnaryOp(E, Kind, InVal);
+    case LangOptions::SOB_Undefined:
+      if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
+        return buildUnaryOp(E, Kind, InVal);
+      llvm_unreachable(
+          "inc/dec overflow behavior SOB_Undefined not implemented yet");
+      break;
+    case LangOptions::SOB_Trapping:
+      if (!E->canOverflow())
+        return buildUnaryOp(E, Kind, InVal);
+      llvm_unreachable(
+          "inc/dec overflow behavior SOB_Trapping not implemented yet");
+      break;
+    }
+  }
+
+  mlir::Value VisitUnaryAddrOf(const UnaryOperator *E) {
+    if (llvm::isa<MemberPointerType>(E->getType()))
+      return CGF.CGM.buildMemberPointerConstant(E);
+
+    return CGF.buildLValue(E->getSubExpr()).getPointer();
+  }
+
+  mlir::Value VisitUnaryDeref(const UnaryOperator *E) {
+    if (E->getType()->isVoidType())
+      return Visit(E->getSubExpr()); // the actual value should be unused
+    return buildLoadOfLValue(E);
+  }
+  mlir::Value VisitUnaryPlus(const UnaryOperator *E,
+                             QualType PromotionType = QualType()) {
+    QualType promotionTy = PromotionType.isNull()
+                               ? getPromotionType(E->getSubExpr()->getType())
+                               : PromotionType;
+    auto result = VisitPlus(E, promotionTy);
+    if (result && !promotionTy.isNull())
+      return buildUnPromotedValue(result, E->getType());
+    return result;
+  }
+
+  mlir::Value VisitPlus(const UnaryOperator *E,
+                        QualType PromotionType = QualType()) {
+    // This differs from gcc, though, most likely due to a bug in gcc.
+    TestAndClearIgnoreResultAssign();
+
+    mlir::Value operand;
+    if (!PromotionType.isNull())
+      operand = CGF.buildPromotedScalarExpr(E->getSubExpr(), PromotionType);
+    else
+      operand = Visit(E->getSubExpr());
+
+    return buildUnaryOp(E, mlir::cir::UnaryOpKind::Plus, operand);
+  }
+
+  mlir::Value VisitUnaryMinus(const UnaryOperator *E,
+                              QualType PromotionType = QualType()) {
+    QualType promotionTy = PromotionType.isNull()
+                               ? getPromotionType(E->getSubExpr()->getType())
+                               : PromotionType;
+    auto result = VisitMinus(E, promotionTy);
+    if (result && !promotionTy.isNull())
+      return buildUnPromotedValue(result, E->getType());
+    return result;
+  }
+
+  mlir::Value VisitMinus(const UnaryOperator *E, QualType PromotionType) {
+    TestAndClearIgnoreResultAssign();
+
+    mlir::Value operand;
+    if (!PromotionType.isNull())
+      operand = CGF.buildPromotedScalarExpr(E->getSubExpr(), PromotionType);
+    else
+      operand = Visit(E->getSubExpr());
+
+    // NOTE: LLVM codegen will lower this directly to either a FNeg
+    // or a Sub instruction.  In CIR this will be handled later in LowerToLLVM.
+    return buildUnaryOp(E, mlir::cir::UnaryOpKind::Minus, operand);
+  }
+
+  mlir::Value VisitUnaryNot(const UnaryOperator *E) {
+    TestAndClearIgnoreResultAssign();
+    mlir::Value op = Visit(E->getSubExpr());
+    return buildUnaryOp(E, mlir::cir::UnaryOpKind::Not, op);
+  }
+
+  mlir::Value VisitUnaryLNot(const UnaryOperator *E);
+  mlir::Value VisitUnaryReal(const UnaryOperator *E) { return VisitReal(E); }
+  mlir::Value VisitUnaryImag(const UnaryOperator *E) { return VisitImag(E); }
+
+  mlir::Value VisitReal(const UnaryOperator *E);
+  mlir::Value VisitImag(const UnaryOperator *E);
+
+  mlir::Value VisitUnaryExtension(const UnaryOperator *E) {
+    // __extension__ doesn't requred any codegen
+    // just forward the value
+    return Visit(E->getSubExpr());
+  }
+
+  mlir::Value buildUnaryOp(const UnaryOperator *E, mlir::cir::UnaryOpKind kind,
+                           mlir::Value input) {
+    return Builder.create<mlir::cir::UnaryOp>(
+        CGF.getLoc(E->getSourceRange().getBegin()), input.getType(), kind,
+        input);
+  }
+
+  // C++
+  mlir::Value VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitSourceLocExpr(SourceLocExpr *E) { llvm_unreachable("NYI"); }
+  mlir::Value VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) {
+    CIRGenFunction::CXXDefaultArgExprScope Scope(CGF, DAE);
+    return Visit(DAE->getExpr());
+  }
+  mlir::Value VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) {
+    CIRGenFunction::CXXDefaultInitExprScope Scope(CGF, DIE);
+    return Visit(DIE->getExpr());
+  }
+
+  mlir::Value VisitCXXThisExpr(CXXThisExpr *TE) { return CGF.LoadCXXThis(); }
+
+  mlir::Value VisitExprWithCleanups(ExprWithCleanups *E);
+  mlir::Value VisitCXXNewExpr(const CXXNewExpr *E) {
+    return CGF.buildCXXNewExpr(E);
+  }
+  mlir::Value VisitCXXDeleteExpr(const CXXDeleteExpr *E) {
+    CGF.buildCXXDeleteExpr(E);
+    return {};
+  }
+  mlir::Value VisitTypeTraitExpr(const TypeTraitExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value
+  VisitConceptSpecializationExpr(const ConceptSpecializationExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitRequiresExpr(const RequiresExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitArrayTypeTraitExpr(const ArrayTypeTraitExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitExpressionTraitExpr(const ExpressionTraitExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitCXXPseudoDestructorExpr(const CXXPseudoDestructorExpr *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *E) {
+    return buildNullValue(E->getType(), CGF.getLoc(E->getSourceRange()));
+  }
+  mlir::Value VisitCXXThrowExpr(CXXThrowExpr *E) {
+    CGF.buildCXXThrowExpr(E);
+    return nullptr;
+  }
+  mlir::Value VisitCXXNoexceptExpr(CXXNoexceptExpr *E) {
+    llvm_unreachable("NYI");
+  }
+
+  /// Perform a pointer to boolean conversion.
+  mlir::Value buildPointerToBoolConversion(mlir::Value V, QualType QT) {
+    // TODO(cir): comparing the ptr to null is done when lowering CIR to LLVM.
+    // We might want to have a separate pass for these types of conversions.
+    return CGF.getBuilder().createPtrToBoolCast(V);
+  }
+
+  // Comparisons.
+#define VISITCOMP(CODE)                                                        \
+  mlir::Value VisitBin##CODE(const BinaryOperator *E) { return buildCmp(E); }
+  VISITCOMP(LT)
+  VISITCOMP(GT)
+  VISITCOMP(LE)
+  VISITCOMP(GE)
+  VISITCOMP(EQ)
+  VISITCOMP(NE)
+#undef VISITCOMP
+
+  mlir::Value VisitBinAssign(const BinaryOperator *E);
+  mlir::Value VisitBinLAnd(const BinaryOperator *B);
+  mlir::Value VisitBinLOr(const BinaryOperator *B);
+  mlir::Value VisitBinComma(const BinaryOperator *E) {
+    CGF.buildIgnoredExpr(E->getLHS());
+    // NOTE: We don't need to EnsureInsertPoint() like LLVM codegen.
+    return Visit(E->getRHS());
+  }
+
+  mlir::Value VisitBinPtrMemD(const BinaryOperator *E) {
+    return buildLoadOfLValue(E);
+  }
+
+  mlir::Value VisitBinPtrMemI(const BinaryOperator *E) {
+    return buildLoadOfLValue(E);
+  }
+
+  mlir::Value VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) {
+    return Visit(E->getSemanticForm());
+  }
+
+  // Other Operators.
+  mlir::Value VisitBlockExpr(const BlockExpr *E) { llvm_unreachable("NYI"); }
+  mlir::Value
+  VisitAbstractConditionalOperator(const AbstractConditionalOperator *E);
+  mlir::Value VisitChooseExpr(ChooseExpr *E) { llvm_unreachable("NYI"); }
+  mlir::Value VisitVAArgExpr(VAArgExpr *VE);
+  mlir::Value VisitObjCStringLiteral(const ObjCStringLiteral *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitObjCBoxedExpr(ObjCBoxedExpr *E) { llvm_unreachable("NYI"); }
+  mlir::Value VisitObjCArrayLiteral(ObjCArrayLiteral *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitObjCDictionaryLiteral(ObjCDictionaryLiteral *E) {
+    llvm_unreachable("NYI");
+  }
+  mlir::Value VisitAsTypeExpr(AsTypeExpr *E) { llvm_unreachable("NYI"); }
+  mlir::Value VisitAtomicExpr(AtomicExpr *E) {
+    return CGF.buildAtomicExpr(E).getScalarVal();
+  }
+
+  // Emit a conversion from the specified type to the specified destination
+  // type, both of which are CIR scalar types.
+  struct ScalarConversionOpts {
+    bool TreatBooleanAsSigned;
+    bool EmitImplicitIntegerTruncationChecks;
+    bool EmitImplicitIntegerSignChangeChecks;
+
+    ScalarConversionOpts()
+        : TreatBooleanAsSigned(false),
+          EmitImplicitIntegerTruncationChecks(false),
+          EmitImplicitIntegerSignChangeChecks(false) {}
+
+    ScalarConversionOpts(clang::SanitizerSet SanOpts)
+        : TreatBooleanAsSigned(false),
+          EmitImplicitIntegerTruncationChecks(
+              SanOpts.hasOneOf(SanitizerKind::ImplicitIntegerTruncation)),
+          EmitImplicitIntegerSignChangeChecks(
+              SanOpts.has(SanitizerKind::ImplicitIntegerSignChange)) {}
+  };
+  mlir::Value buildScalarCast(mlir::Value Src, QualType SrcType,
+                              QualType DstType, mlir::Type SrcTy,
+                              mlir::Type DstTy, ScalarConversionOpts Opts);
+
+  BinOpInfo buildBinOps(const BinaryOperator *E,
+                        QualType PromotionType = QualType()) {
+    BinOpInfo Result;
+    Result.LHS = CGF.buildPromotedScalarExpr(E->getLHS(), PromotionType);
+    Result.RHS = CGF.buildPromotedScalarExpr(E->getRHS(), PromotionType);
+    if (!PromotionType.isNull())
+      Result.FullType = PromotionType;
+    else
+      Result.FullType = E->getType();
+    Result.CompType = Result.FullType;
+    if (const auto *VecType = dyn_cast_or_null<VectorType>(Result.FullType)) {
+      Result.CompType = VecType->getElementType();
+    }
+    Result.Opcode = E->getOpcode();
+    Result.Loc = E->getSourceRange();
+    // TODO: Result.FPFeatures
+    assert(!MissingFeatures::getFPFeaturesInEffect());
+    Result.E = E;
+    return Result;
+  }
+
+  mlir::Value buildMul(const BinOpInfo &Ops);
+  mlir::Value buildDiv(const BinOpInfo &Ops);
+  mlir::Value buildRem(const BinOpInfo &Ops);
+  mlir::Value buildAdd(const BinOpInfo &Ops);
+  mlir::Value buildSub(const BinOpInfo &Ops);
+  mlir::Value buildShl(const BinOpInfo &Ops);
+  mlir::Value buildShr(const BinOpInfo &Ops);
+  mlir::Value buildAnd(const BinOpInfo &Ops);
+  mlir::Value buildXor(const BinOpInfo &Ops);
+  mlir::Value buildOr(const BinOpInfo &Ops);
+
+  LValue buildCompoundAssignLValue(
+      const CompoundAssignOperator *E,
+      mlir::Value (ScalarExprEmitter::*F)(const BinOpInfo &),
+      mlir::Value &Result);
+  mlir::Value
+  buildCompoundAssign(const CompoundAssignOperator *E,
+                      mlir::Value (ScalarExprEmitter::*F)(const BinOpInfo &));
+
+  // TODO(cir): Candidate to be in a common AST helper between CIR and LLVM
+  // codegen.
+  QualType getPromotionType(QualType Ty) {
+    if (auto *CT = Ty->getAs<ComplexType>()) {
+      llvm_unreachable("NYI");
+    }
+    if (Ty.UseExcessPrecision(CGF.getContext())) {
+      if (auto *VT = Ty->getAs<VectorType>())
+        llvm_unreachable("NYI");
+      return CGF.getContext().FloatTy;
+    }
+    return QualType();
+  }
+
+  // Binary operators and binary compound assignment operators.
+#define HANDLEBINOP(OP)                                                        \
+  mlir::Value VisitBin##OP(const BinaryOperator *E) {                          \
+    QualType promotionTy = getPromotionType(E->getType());                     \
+    auto result = build##OP(buildBinOps(E, promotionTy));                      \
+    if (result && !promotionTy.isNull())                                       \
+      result = buildUnPromotedValue(result, E->getType());                     \
+    return result;                                                             \
+  }                                                                            \
+  mlir::Value VisitBin##OP##Assign(const CompoundAssignOperator *E) {          \
+    return buildCompoundAssign(E, &ScalarExprEmitter::build##OP);              \
+  }
+
+  HANDLEBINOP(Mul)
+  HANDLEBINOP(Div)
+  HANDLEBINOP(Rem)
+  HANDLEBINOP(Add)
+  HANDLEBINOP(Sub)
+  HANDLEBINOP(Shl)
+  HANDLEBINOP(Shr)
+  HANDLEBINOP(And)
+  HANDLEBINOP(Xor)
+  HANDLEBINOP(Or)
+#undef HANDLEBINOP
+
+  mlir::Value buildCmp(const BinaryOperator *E) {
+    mlir::Value Result;
+    QualType LHSTy = E->getLHS()->getType();
+    QualType RHSTy = E->getRHS()->getType();
+
+    auto ClangCmpToCIRCmp = [](auto ClangCmp) -> mlir::cir::CmpOpKind {
+      switch (ClangCmp) {
+      case BO_LT:
+        return mlir::cir::CmpOpKind::lt;
+      case BO_GT:
+        return mlir::cir::CmpOpKind::gt;
+      case BO_LE:
+        return mlir::cir::CmpOpKind::le;
+      case BO_GE:
+        return mlir::cir::CmpOpKind::ge;
+      case BO_EQ:
+        return mlir::cir::CmpOpKind::eq;
+      case BO_NE:
+        return mlir::cir::CmpOpKind::ne;
+      default:
+        llvm_unreachable("unsupported comparison kind");
+        return mlir::cir::CmpOpKind(-1);
+      }
+    };
+
+    if (const MemberPointerType *MPT = LHSTy->getAs<MemberPointerType>()) {
+      assert(0 && "not implemented");
+    } else if (!LHSTy->isAnyComplexType() && !RHSTy->isAnyComplexType()) {
+      BinOpInfo BOInfo = buildBinOps(E);
+      mlir::Value LHS = BOInfo.LHS;
+      mlir::Value RHS = BOInfo.RHS;
+
+      if (LHSTy->isVectorType()) {
+        if (!E->getType()->isVectorType()) {
+          // If AltiVec, the comparison results in a numeric type, so we use
+          // intrinsics comparing vectors and giving 0 or 1 as a result
+          llvm_unreachable("NYI: AltiVec comparison");
+        } else {
+          // Other kinds of vectors.  Element-wise comparison returning
+          // a vector.
+          mlir::cir::CmpOpKind Kind = ClangCmpToCIRCmp(E->getOpcode());
+          return Builder.create<mlir::cir::VecCmpOp>(
+              CGF.getLoc(BOInfo.Loc), CGF.getCIRType(BOInfo.FullType), Kind,
+              BOInfo.LHS, BOInfo.RHS);
+        }
+      }
+      if (BOInfo.isFixedPointOp()) {
+        assert(0 && "not implemented");
+      } else {
+        // FIXME(cir): handle another if above for CIR equivalent on
+        // LHSTy->hasSignedIntegerRepresentation()
+
+        // Unsigned integers and pointers.
+        if (CGF.CGM.getCodeGenOpts().StrictVTablePointers &&
+            mlir::isa<mlir::cir::PointerType>(LHS.getType()) &&
+            mlir::isa<mlir::cir::PointerType>(RHS.getType())) {
+          llvm_unreachable("NYI");
+        }
+
+        mlir::cir::CmpOpKind Kind = ClangCmpToCIRCmp(E->getOpcode());
+        return Builder.create<mlir::cir::CmpOp>(CGF.getLoc(BOInfo.Loc),
+                                                CGF.getCIRType(BOInfo.FullType),
+                                                Kind, BOInfo.LHS, BOInfo.RHS);
+      }
+    } else { // Complex Comparison: can only be an equality comparison.
+      assert(0 && "not implemented");
+    }
+
+    return buildScalarConversion(Result, CGF.getContext().BoolTy, E->getType(),
+                                 E->getExprLoc());
+  }
+
+  mlir::Value buildFloatToBoolConversion(mlir::Value src, mlir::Location loc) {
+    auto boolTy = Builder.getBoolTy();
+    return Builder.create<mlir::cir::CastOp>(
+        loc, boolTy, mlir::cir::CastKind::float_to_bool, src);
+  }
+
+  mlir::Value buildIntToBoolConversion(mlir::Value srcVal, mlir::Location loc) {
+    // Because of the type rules of C, we often end up computing a
+    // logical value, then zero extending it to int, then wanting it
+    // as a logical value again.
+    // TODO: optimize this common case here or leave it for later
+    // CIR passes?
+    mlir::Type boolTy = CGF.getCIRType(CGF.getContext().BoolTy);
+    return Builder.create<mlir::cir::CastOp>(
+        loc, boolTy, mlir::cir::CastKind::int_to_bool, srcVal);
+  }
+
+  /// Convert the specified expression value to a boolean (!cir.bool) truth
+  /// value. This is equivalent to "Val != 0".
+  mlir::Value buildConversionToBool(mlir::Value Src, QualType SrcType,
+                                    mlir::Location loc) {
+    assert(SrcType.isCanonical() && "EmitScalarConversion strips typedefs");
+
+    if (SrcType->isRealFloatingType())
+      return buildFloatToBoolConversion(Src, loc);
+
+    if (auto *MPT = llvm::dyn_cast<MemberPointerType>(SrcType))
+      assert(0 && "not implemented");
+
+    if (SrcType->isIntegerType())
+      return buildIntToBoolConversion(Src, loc);
+
+    assert(::mlir::isa<::mlir::cir::PointerType>(Src.getType()));
+    return buildPointerToBoolConversion(Src, SrcType);
+  }
+
+  /// Emit a conversion from the specified type to the specified destination
+  /// type, both of which are CIR scalar types.
+  /// TODO: do we need ScalarConversionOpts here? Should be done in another
+  /// pass.
+  mlir::Value
+  buildScalarConversion(mlir::Value Src, QualType SrcType, QualType DstType,
+                        SourceLocation Loc,
+                        ScalarConversionOpts Opts = ScalarConversionOpts()) {
+    // All conversions involving fixed point types should be handled by the
+    // buildFixedPoint family functions. This is done to prevent bloating up
+    // this function more, and although fixed point numbers are represented by
+    // integers, we do not want to follow any logic that assumes they should be
+    // treated as integers.
+    // TODO(leonardchan): When necessary, add another if statement checking for
+    // conversions to fixed point types from other types.
+    if (SrcType->isFixedPointType()) {
+      llvm_unreachable("not implemented");
+    } else if (DstType->isFixedPointType()) {
+      llvm_unreachable("not implemented");
+    }
+
+    SrcType = CGF.getContext().getCanonicalType(SrcType);
+    DstType = CGF.getContext().getCanonicalType(DstType);
+    if (SrcType == DstType)
+      return Src;
+
+    if (DstType->isVoidType())
+      return nullptr;
+
+    mlir::Type SrcTy = Src.getType();
+
+    // Handle conversions to bool first, they are special: comparisons against
+    // 0.
+    if (DstType->isBooleanType())
+      return buildConversionToBool(Src, SrcType, CGF.getLoc(Loc));
+
+    mlir::Type DstTy = ConvertType(DstType);
+
+    // Cast from half through float if half isn't a native type.
+    if (SrcType->isHalfType() &&
+        !CGF.getContext().getLangOpts().NativeHalfType) {
+      llvm_unreachable("not implemented");
+    }
+
+    // TODO(cir): LLVM codegen ignore conversions like int -> uint,
+    // is there anything to be done for CIR here?
+    if (SrcTy == DstTy) {
+      if (Opts.EmitImplicitIntegerSignChangeChecks)
+        llvm_unreachable("not implemented");
+      return Src;
+    }
+
+    // Handle pointer conversions next: pointers can only be converted to/from
+    // other pointers and integers. Check for pointer types in terms of LLVM, as
+    // some native types (like Obj-C id) may map to a pointer type.
+    if (auto DstPT = dyn_cast<mlir::cir::PointerType>(DstTy)) {
+      llvm_unreachable("NYI");
+    }
+
+    if (isa<mlir::cir::PointerType>(SrcTy)) {
+      // Must be an ptr to int cast.
+      assert(isa<mlir::cir::IntType>(DstTy) && "not ptr->int?");
+      return Builder.createPtrToInt(Src, DstTy);
+    }
+
+    // A scalar can be splatted to an extended vector of the same element type
+    if (DstType->isExtVectorType() && !SrcType->isVectorType()) {
+      // Sema should add casts to make sure that the source expression's type
+      // is the same as the vector's element type (sans qualifiers)
+      assert(DstType->castAs<ExtVectorType>()->getElementType().getTypePtr() ==
+                 SrcType.getTypePtr() &&
+             "Splatted expr doesn't match with vector element type?");
+
+      llvm_unreachable("not implemented");
+    }
+
+    if (SrcType->isMatrixType() && DstType->isMatrixType())
+      llvm_unreachable("NYI: matrix type to matrix type conversion");
+    assert(!SrcType->isMatrixType() && !DstType->isMatrixType() &&
+           "Internal error: conversion between matrix type and scalar type");
+
+    // Finally, we have the arithmetic types or vectors of arithmetic types.
+    mlir::Value Res = nullptr;
+    mlir::Type ResTy = DstTy;
+
+    // An overflowing conversion has undefined behavior if eitehr the source
+    // type or the destination type is a floating-point type. However, we
+    // consider the range of representable values for all floating-point types
+    // to be [-inf,+inf], so no overflow can ever happen when the destination
+    // type is a floating-point type.
+    if (CGF.SanOpts.has(SanitizerKind::FloatCastOverflow))
+      llvm_unreachable("NYI");
+
+    // Cast to half through float if half isn't a native type.
+    if (DstType->isHalfType() &&
+        !CGF.getContext().getLangOpts().NativeHalfType) {
+      llvm_unreachable("NYI");
+    }
+
+    Res = buildScalarCast(Src, SrcType, DstType, SrcTy, DstTy, Opts);
+
+    if (DstTy != ResTy) {
+      llvm_unreachable("NYI");
+    }
+
+    if (Opts.EmitImplicitIntegerTruncationChecks)
+      llvm_unreachable("NYI");
+
+    if (Opts.EmitImplicitIntegerSignChangeChecks)
+      llvm_unreachable("NYI");
+
+    return Res;
+  }
+};
+
+} // namespace
+
+/// Emit the computation of the specified expression of scalar type,
+/// ignoring the result.
+mlir::Value CIRGenFunction::buildScalarExpr(const Expr *E) {
+  assert(E && hasScalarEvaluationKind(E->getType()) &&
+         "Invalid scalar expression to emit");
+
+  return ScalarExprEmitter(*this, builder).Visit(const_cast<Expr *>(E));
+}
+
+mlir::Value CIRGenFunction::buildPromotedScalarExpr(const Expr *E,
+                                                    QualType PromotionType) {
+  if (!PromotionType.isNull())
+    return ScalarExprEmitter(*this, builder).buildPromoted(E, PromotionType);
+  return ScalarExprEmitter(*this, builder).Visit(const_cast<Expr *>(E));
+}
+
+[[maybe_unused]] static bool MustVisitNullValue(const Expr *E) {
+  // If a null pointer expression's type is the C++0x nullptr_t, then
+  // it's not necessarily a simple constant and it must be evaluated
+  // for its potential side effects.
+  return E->getType()->isNullPtrType();
+}
+
+/// If \p E is a widened promoted integer, get its base (unpromoted) type.
+static std::optional<QualType> getUnwidenedIntegerType(const ASTContext &Ctx,
+                                                       const Expr *E) {
+  const Expr *Base = E->IgnoreImpCasts();
+  if (E == Base)
+    return std::nullopt;
+
+  QualType BaseTy = Base->getType();
+  if (!Ctx.isPromotableIntegerType(BaseTy) ||
+      Ctx.getTypeSize(BaseTy) >= Ctx.getTypeSize(E->getType()))
+    return std::nullopt;
+
+  return BaseTy;
+}
+
+/// Check if \p E is a widened promoted integer.
+[[maybe_unused]] static bool IsWidenedIntegerOp(const ASTContext &Ctx,
+                                                const Expr *E) {
+  return getUnwidenedIntegerType(Ctx, E).has_value();
+}
+
+/// Check if we can skip the overflow check for \p Op.
+[[maybe_unused]] static bool CanElideOverflowCheck(const ASTContext &Ctx,
+                                                   const BinOpInfo &Op) {
+  assert((isa<UnaryOperator>(Op.E) || isa<BinaryOperator>(Op.E)) &&
+         "Expected a unary or binary operator");
+
+  // If the binop has constant inputs and we can prove there is no overflow,
+  // we can elide the overflow check.
+  if (!Op.mayHaveIntegerOverflow())
+    return true;
+
+  // If a unary op has a widened operand, the op cannot overflow.
+  if (const auto *UO = dyn_cast<UnaryOperator>(Op.E))
+    return !UO->canOverflow();
+
+  // We usually don't need overflow checks for binops with widened operands.
+  // Multiplication with promoted unsigned operands is a special case.
+  const auto *BO = cast<BinaryOperator>(Op.E);
+  auto OptionalLHSTy = getUnwidenedIntegerType(Ctx, BO->getLHS());
+  if (!OptionalLHSTy)
+    return false;
+
+  auto OptionalRHSTy = getUnwidenedIntegerType(Ctx, BO->getRHS());
+  if (!OptionalRHSTy)
+    return false;
+
+  QualType LHSTy = *OptionalLHSTy;
+  QualType RHSTy = *OptionalRHSTy;
+
+  // This is the simple case: binops without unsigned multiplication, and with
+  // widened operands. No overflow check is needed here.
+  if ((Op.Opcode != BO_Mul && Op.Opcode != BO_MulAssign) ||
+      !LHSTy->isUnsignedIntegerType() || !RHSTy->isUnsignedIntegerType())
+    return true;
+
+  // For unsigned multiplication the overflow check can be elided if either one
+  // of the unpromoted types are less than half the size of the promoted type.
+  unsigned PromotedSize = Ctx.getTypeSize(Op.E->getType());
+  return (2 * Ctx.getTypeSize(LHSTy)) < PromotedSize ||
+         (2 * Ctx.getTypeSize(RHSTy)) < PromotedSize;
+}
+
+/// Emit pointer + index arithmetic.
+static mlir::Value buildPointerArithmetic(CIRGenFunction &CGF,
+                                          const BinOpInfo &op,
+                                          bool isSubtraction) {
+  // Must have binary (not unary) expr here.  Unary pointer
+  // increment/decrement doesn't use this path.
+  const BinaryOperator *expr = cast<BinaryOperator>(op.E);
+
+  mlir::Value pointer = op.LHS;
+  Expr *pointerOperand = expr->getLHS();
+  mlir::Value index = op.RHS;
+  Expr *indexOperand = expr->getRHS();
+
+  // In a subtraction, the LHS is always the pointer.
+  if (!isSubtraction && !mlir::isa<mlir::cir::PointerType>(pointer.getType())) {
+    std::swap(pointer, index);
+    std::swap(pointerOperand, indexOperand);
+  }
+
+  bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType();
+
+  // Some versions of glibc and gcc use idioms (particularly in their malloc
+  // routines) that add a pointer-sized integer (known to be a pointer value)
+  // to a null pointer in order to cast the value back to an integer or as
+  // part of a pointer alignment algorithm.  This is undefined behavior, but
+  // we'd like to be able to compile programs that use it.
+  //
+  // Normally, we'd generate a GEP with a null-pointer base here in response
+  // to that code, but it's also UB to dereference a pointer created that
+  // way.  Instead (as an acknowledged hack to tolerate the idiom) we will
+  // generate a direct cast of the integer value to a pointer.
+  //
+  // The idiom (p = nullptr + N) is not met if any of the following are true:
+  //
+  //   The operation is subtraction.
+  //   The index is not pointer-sized.
+  //   The pointer type is not byte-sized.
+  //
+  if (BinaryOperator::isNullPointerArithmeticExtension(
+          CGF.getContext(), op.Opcode, expr->getLHS(), expr->getRHS()))
+    return CGF.getBuilder().createIntToPtr(index, pointer.getType());
+
+  // Differently from LLVM codegen, ABI bits for index sizes is handled during
+  // LLVM lowering.
+
+  // If this is subtraction, negate the index.
+  if (isSubtraction)
+    index = CGF.getBuilder().createNeg(index);
+
+  if (CGF.SanOpts.has(SanitizerKind::ArrayBounds))
+    llvm_unreachable("array bounds sanitizer is NYI");
+
+  const PointerType *pointerType =
+      pointerOperand->getType()->getAs<PointerType>();
+  if (!pointerType)
+    llvm_unreachable("ObjC is NYI");
+
+  QualType elementType = pointerType->getPointeeType();
+  if (const VariableArrayType *vla =
+          CGF.getContext().getAsVariableArrayType(elementType)) {
+
+    // The element count here is the total number of non-VLA elements.
+    mlir::Value numElements = CGF.getVLASize(vla).NumElts;
+
+    // GEP indexes are signed, and scaling an index isn't permitted to
+    // signed-overflow, so we use the same semantics for our explicit
+    // multiply.  We suppress this if overflow is not undefined behavior.
+    mlir::Type elemTy = CGF.convertTypeForMem(vla->getElementType());
+
+    index = CGF.getBuilder().createCast(mlir::cir::CastKind::integral, index,
+                                        numElements.getType());
+    index = CGF.getBuilder().createMul(index, numElements);
+
+    if (CGF.getLangOpts().isSignedOverflowDefined()) {
+      pointer = CGF.getBuilder().create<mlir::cir::PtrStrideOp>(
+          CGF.getLoc(op.E->getExprLoc()), pointer.getType(), pointer, index);
+    } else {
+      pointer = CGF.buildCheckedInBoundsGEP(elemTy, pointer, index, isSigned,
+                                            isSubtraction, op.E->getExprLoc());
+    }
+    return pointer;
+  }
+  // Explicitly handle GNU void* and function pointer arithmetic extensions. The
+  // GNU void* casts amount to no-ops since our void* type is i8*, but this is
+  // future proof.
+  mlir::Type elemTy;
+  if (elementType->isVoidType() || elementType->isFunctionType())
+    elemTy = CGF.UInt8Ty;
+  else
+    elemTy = CGF.convertTypeForMem(elementType);
+
+  if (CGF.getLangOpts().isSignedOverflowDefined())
+    return CGF.getBuilder().create<mlir::cir::PtrStrideOp>(
+        CGF.getLoc(op.E->getExprLoc()), pointer.getType(), pointer, index);
+
+  return CGF.buildCheckedInBoundsGEP(elemTy, pointer, index, isSigned,
+                                     isSubtraction, op.E->getExprLoc());
+}
+
+mlir::Value ScalarExprEmitter::buildMul(const BinOpInfo &Ops) {
+  if (Ops.CompType->isSignedIntegerOrEnumerationType()) {
+    switch (CGF.getLangOpts().getSignedOverflowBehavior()) {
+    case LangOptions::SOB_Defined:
+      if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
+        return Builder.createMul(Ops.LHS, Ops.RHS);
+      [[fallthrough]];
+    case LangOptions::SOB_Undefined:
+      if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
+        return Builder.createNSWMul(Ops.LHS, Ops.RHS);
+      [[fallthrough]];
+    case LangOptions::SOB_Trapping:
+      if (CanElideOverflowCheck(CGF.getContext(), Ops))
+        return Builder.createNSWMul(Ops.LHS, Ops.RHS);
+      llvm_unreachable("NYI");
+    }
+  }
+  if (Ops.FullType->isConstantMatrixType()) {
+    llvm_unreachable("NYI");
+  }
+  if (Ops.CompType->isUnsignedIntegerType() &&
+      CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
+      !CanElideOverflowCheck(CGF.getContext(), Ops))
+    llvm_unreachable("NYI");
+
+  if (mlir::cir::isFPOrFPVectorTy(Ops.LHS.getType())) {
+    CIRGenFunction::CIRGenFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
+    return Builder.createFMul(Ops.LHS, Ops.RHS);
+  }
+
+  if (Ops.isFixedPointOp())
+    llvm_unreachable("NYI");
+
+  return Builder.create<mlir::cir::BinOp>(
+      CGF.getLoc(Ops.Loc), CGF.getCIRType(Ops.FullType),
+      mlir::cir::BinOpKind::Mul, Ops.LHS, Ops.RHS);
+}
+mlir::Value ScalarExprEmitter::buildDiv(const BinOpInfo &Ops) {
+  return Builder.create<mlir::cir::BinOp>(
+      CGF.getLoc(Ops.Loc), CGF.getCIRType(Ops.FullType),
+      mlir::cir::BinOpKind::Div, Ops.LHS, Ops.RHS);
+}
+mlir::Value ScalarExprEmitter::buildRem(const BinOpInfo &Ops) {
+  return Builder.create<mlir::cir::BinOp>(
+      CGF.getLoc(Ops.Loc), CGF.getCIRType(Ops.FullType),
+      mlir::cir::BinOpKind::Rem, Ops.LHS, Ops.RHS);
+}
+
+mlir::Value ScalarExprEmitter::buildAdd(const BinOpInfo &Ops) {
+  if (mlir::isa<mlir::cir::PointerType>(Ops.LHS.getType()) ||
+      mlir::isa<mlir::cir::PointerType>(Ops.RHS.getType()))
+    return buildPointerArithmetic(CGF, Ops, /*isSubtraction=*/false);
+  if (Ops.CompType->isSignedIntegerOrEnumerationType()) {
+    switch (CGF.getLangOpts().getSignedOverflowBehavior()) {
+    case LangOptions::SOB_Defined:
+      if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
+        return Builder.createAdd(Ops.LHS, Ops.RHS);
+      [[fallthrough]];
+    case LangOptions::SOB_Undefined:
+      if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
+        return Builder.createNSWAdd(Ops.LHS, Ops.RHS);
+      [[fallthrough]];
+    case LangOptions::SOB_Trapping:
+      if (CanElideOverflowCheck(CGF.getContext(), Ops))
+        return Builder.createNSWAdd(Ops.LHS, Ops.RHS);
+
+      llvm_unreachable("NYI");
+    }
+  }
+  if (Ops.FullType->isConstantMatrixType()) {
+    llvm_unreachable("NYI");
+  }
+
+  if (Ops.CompType->isUnsignedIntegerType() &&
+      CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
+      !CanElideOverflowCheck(CGF.getContext(), Ops))
+    llvm_unreachable("NYI");
+
+  if (mlir::cir::isFPOrFPVectorTy(Ops.LHS.getType())) {
+    CIRGenFunction::CIRGenFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
+    return Builder.createFAdd(Ops.LHS, Ops.RHS);
+  }
+
+  if (Ops.isFixedPointOp())
+    llvm_unreachable("NYI");
+
+  return Builder.create<mlir::cir::BinOp>(
+      CGF.getLoc(Ops.Loc), CGF.getCIRType(Ops.FullType),
+      mlir::cir::BinOpKind::Add, Ops.LHS, Ops.RHS);
+}
+
+mlir::Value ScalarExprEmitter::buildSub(const BinOpInfo &Ops) {
+  // The LHS is always a pointer if either side is.
+  if (!mlir::isa<mlir::cir::PointerType>(Ops.LHS.getType())) {
+    if (Ops.CompType->isSignedIntegerOrEnumerationType()) {
+      switch (CGF.getLangOpts().getSignedOverflowBehavior()) {
+      case LangOptions::SOB_Defined: {
+        if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
+          return Builder.createSub(Ops.LHS, Ops.RHS);
+        [[fallthrough]];
+      }
+      case LangOptions::SOB_Undefined:
+        if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
+          return Builder.createNSWSub(Ops.LHS, Ops.RHS);
+        [[fallthrough]];
+      case LangOptions::SOB_Trapping:
+        if (CanElideOverflowCheck(CGF.getContext(), Ops))
+          return Builder.createNSWSub(Ops.LHS, Ops.RHS);
+        llvm_unreachable("NYI");
+      }
+    }
+
+    if (Ops.FullType->isConstantMatrixType()) {
+      llvm_unreachable("NYI");
+    }
+
+    if (Ops.CompType->isUnsignedIntegerType() &&
+        CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
+        !CanElideOverflowCheck(CGF.getContext(), Ops))
+      llvm_unreachable("NYI");
+
+    if (mlir::cir::isFPOrFPVectorTy(Ops.LHS.getType())) {
+      CIRGenFunction::CIRGenFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
+      return Builder.createFSub(Ops.LHS, Ops.RHS);
+    }
+
+    if (Ops.isFixedPointOp())
+      llvm_unreachable("NYI");
+
+    return Builder.create<mlir::cir::BinOp>(
+        CGF.getLoc(Ops.Loc), CGF.getCIRType(Ops.FullType),
+        mlir::cir::BinOpKind::Sub, Ops.LHS, Ops.RHS);
+  }
+
+  // If the RHS is not a pointer, then we have normal pointer
+  // arithmetic.
+  if (!mlir::isa<mlir::cir::PointerType>(Ops.RHS.getType()))
+    return buildPointerArithmetic(CGF, Ops, /*isSubtraction=*/true);
+
+  // Otherwise, this is a pointer subtraction
+
+  // Do the raw subtraction part.
+  //
+  // TODO(cir): note for LLVM lowering out of this; when expanding this into
+  // LLVM we shall take VLA's, division by element size, etc.
+  //
+  // See more in `EmitSub` in CGExprScalar.cpp.
+  assert(!MissingFeatures::llvmLoweringPtrDiffConsidersPointee());
+  return Builder.create<mlir::cir::PtrDiffOp>(CGF.getLoc(Ops.Loc),
+                                              CGF.PtrDiffTy, Ops.LHS, Ops.RHS);
+}
+
+mlir::Value ScalarExprEmitter::buildShl(const BinOpInfo &Ops) {
+  // TODO: This misses out on the sanitizer check below.
+  if (Ops.isFixedPointOp())
+    llvm_unreachable("NYI");
+
+  // CIR accepts shift between different types, meaning nothing special
+  // to be done here. OTOH, LLVM requires the LHS and RHS to be the same type:
+  // promote or truncate the RHS to the same size as the LHS.
+
+  bool SanitizeSignedBase = CGF.SanOpts.has(SanitizerKind::ShiftBase) &&
+                            Ops.CompType->hasSignedIntegerRepresentation() &&
+                            !CGF.getLangOpts().isSignedOverflowDefined() &&
+                            !CGF.getLangOpts().CPlusPlus20;
+  bool SanitizeUnsignedBase =
+      CGF.SanOpts.has(SanitizerKind::UnsignedShiftBase) &&
+      Ops.CompType->hasUnsignedIntegerRepresentation();
+  bool SanitizeBase = SanitizeSignedBase || SanitizeUnsignedBase;
+  bool SanitizeExponent = CGF.SanOpts.has(SanitizerKind::ShiftExponent);
+
+  // OpenCL 6.3j: shift values are effectively % word size of LHS.
+  if (CGF.getLangOpts().OpenCL)
+    llvm_unreachable("NYI");
+  else if ((SanitizeBase || SanitizeExponent) &&
+           mlir::isa<mlir::cir::IntType>(Ops.LHS.getType())) {
+    llvm_unreachable("NYI");
+  }
+
+  return Builder.create<mlir::cir::ShiftOp>(
+      CGF.getLoc(Ops.Loc), CGF.getCIRType(Ops.FullType), Ops.LHS, Ops.RHS,
+      CGF.getBuilder().getUnitAttr());
+}
+
+mlir::Value ScalarExprEmitter::buildShr(const BinOpInfo &Ops) {
+  // TODO: This misses out on the sanitizer check below.
+  if (Ops.isFixedPointOp())
+    llvm_unreachable("NYI");
+
+  // CIR accepts shift between different types, meaning nothing special
+  // to be done here. OTOH, LLVM requires the LHS and RHS to be the same type:
+  // promote or truncate the RHS to the same size as the LHS.
+
+  // OpenCL 6.3j: shift values are effectively % word size of LHS.
+  if (CGF.getLangOpts().OpenCL)
+    llvm_unreachable("NYI");
+  else if (CGF.SanOpts.has(SanitizerKind::ShiftExponent) &&
+           mlir::isa<mlir::cir::IntType>(Ops.LHS.getType())) {
+    llvm_unreachable("NYI");
+  }
+
+  // Note that we don't need to distinguish unsigned treatment at this
+  // point since it will be handled later by LLVM lowering.
+  return Builder.create<mlir::cir::ShiftOp>(
+      CGF.getLoc(Ops.Loc), CGF.getCIRType(Ops.FullType), Ops.LHS, Ops.RHS);
+}
+
+mlir::Value ScalarExprEmitter::buildAnd(const BinOpInfo &Ops) {
+  return Builder.create<mlir::cir::BinOp>(
+      CGF.getLoc(Ops.Loc), CGF.getCIRType(Ops.FullType),
+      mlir::cir::BinOpKind::And, Ops.LHS, Ops.RHS);
+}
+mlir::Value ScalarExprEmitter::buildXor(const BinOpInfo &Ops) {
+  return Builder.create<mlir::cir::BinOp>(
+      CGF.getLoc(Ops.Loc), CGF.getCIRType(Ops.FullType),
+      mlir::cir::BinOpKind::Xor, Ops.LHS, Ops.RHS);
+}
+mlir::Value ScalarExprEmitter::buildOr(const BinOpInfo &Ops) {
+  return Builder.create<mlir::cir::BinOp>(
+      CGF.getLoc(Ops.Loc), CGF.getCIRType(Ops.FullType),
+      mlir::cir::BinOpKind::Or, Ops.LHS, Ops.RHS);
+}
+
+// Emit code for an explicit or implicit cast.  Implicit
+// casts have to handle a more broad range of conversions than explicit
+// casts, as they handle things like function to ptr-to-function decay
+// etc.
+mlir::Value ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
+  Expr *E = CE->getSubExpr();
+  QualType DestTy = CE->getType();
+  CastKind Kind = CE->getCastKind();
+
+  // These cases are generally not written to ignore the result of evaluating
+  // their sub-expressions, so we clear this now.
+  bool Ignored = TestAndClearIgnoreResultAssign();
+  (void)Ignored;
+
+  // Since almost all cast kinds apply to scalars, this switch doesn't have a
+  // default case, so the compiler will warn on a missing case. The cases are
+  // in the same order as in the CastKind enum.
+  switch (Kind) {
+  case clang::CK_Dependent:
+    llvm_unreachable("dependent cast kind in CIR gen!");
+  case clang::CK_BuiltinFnToFnPtr:
+    llvm_unreachable("builtin functions are handled elsewhere");
+
+  case CK_LValueBitCast:
+  case CK_ObjCObjectLValueCast:
+  case CK_LValueToRValueBitCast: {
+    LValue SourceLVal = CGF.buildLValue(E);
+    Address SourceAddr = SourceLVal.getAddress();
+
+    mlir::Type DestElemTy = CGF.convertTypeForMem(DestTy);
+    mlir::Type DestPtrTy = CGF.getBuilder().getPointerTo(DestElemTy);
+    mlir::Value DestPtr = CGF.getBuilder().createBitcast(
+        CGF.getLoc(E->getExprLoc()), SourceAddr.getPointer(), DestPtrTy);
+
+    Address DestAddr =
+        SourceAddr.withPointer(DestPtr).withElementType(DestElemTy);
+    LValue DestLVal = CGF.makeAddrLValue(DestAddr, DestTy);
+
+    if (Kind == CK_LValueToRValueBitCast)
+      assert(!MissingFeatures::tbaa());
+
+    return buildLoadOfLValue(DestLVal, CE->getExprLoc());
+  }
+
+  case CK_CPointerToObjCPointerCast:
+  case CK_BlockPointerToObjCPointerCast:
+  case CK_AnyPointerToBlockPointerCast:
+  case CK_BitCast: {
+    auto Src = Visit(const_cast<Expr *>(E));
+    mlir::Type DstTy = CGF.convertType(DestTy);
+
+    assert(!MissingFeatures::addressSpace());
+    if (CGF.SanOpts.has(SanitizerKind::CFIUnrelatedCast)) {
+      llvm_unreachable("NYI");
+    }
+
+    if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) {
+      llvm_unreachable("NYI");
+    }
+
+    // Update heapallocsite metadata when there is an explicit pointer cast.
+    assert(!MissingFeatures::addHeapAllocSiteMetadata());
+
+    // If Src is a fixed vector and Dst is a scalable vector, and both have the
+    // same element type, use the llvm.vector.insert intrinsic to perform the
+    // bitcast.
+    assert(!MissingFeatures::scalableVectors());
+
+    // If Src is a scalable vector and Dst is a fixed vector, and both have the
+    // same element type, use the llvm.vector.extract intrinsic to perform the
+    // bitcast.
+    assert(!MissingFeatures::scalableVectors());
+
+    // Perform VLAT <-> VLST bitcast through memory.
+    // TODO: since the llvm.experimental.vector.{insert,extract} intrinsics
+    //       require the element types of the vectors to be the same, we
+    //       need to keep this around for bitcasts between VLAT <-> VLST where
+    //       the element types of the vectors are not the same, until we figure
+    //       out a better way of doing these casts.
+    assert(!MissingFeatures::scalableVectors());
+
+    return CGF.getBuilder().createBitcast(CGF.getLoc(E->getSourceRange()), Src,
+                                          DstTy);
+  }
+  case CK_AddressSpaceConversion: {
+    Expr::EvalResult Result;
+    if (E->EvaluateAsRValue(Result, CGF.getContext()) &&
+        Result.Val.isNullPointer()) {
+      // If E has side effect, it is emitted even if its final result is a
+      // null pointer. In that case, a DCE pass should be able to
+      // eliminate the useless instructions emitted during translating E.
+      if (Result.HasSideEffects)
+        Visit(E);
+      return CGF.CGM.buildNullConstant(DestTy, CGF.getLoc(E->getExprLoc()));
+    }
+    // Since target may map different address spaces in AST to the same address
+    // space, an address space conversion may end up as a bitcast.
+    auto SrcAS = CGF.builder.getAddrSpaceAttr(
+        E->getType()->getPointeeType().getAddressSpace());
+    auto DestAS = CGF.builder.getAddrSpaceAttr(
+        DestTy->getPointeeType().getAddressSpace());
+    return CGF.CGM.getTargetCIRGenInfo().performAddrSpaceCast(
+        CGF, Visit(E), SrcAS, DestAS, ConvertType(DestTy));
+  }
+  case CK_AtomicToNonAtomic:
+    llvm_unreachable("NYI");
+  case CK_NonAtomicToAtomic:
+  case CK_UserDefinedConversion:
+    return Visit(const_cast<Expr *>(E));
+  case CK_NoOp: {
+    auto V = Visit(const_cast<Expr *>(E));
+    if (V) {
+      // CK_NoOp can model a pointer qualification conversion, which can remove
+      // an array bound and change the IR type.
+      // FIXME: Once pointee types are removed from IR, remove this.
+      auto T = CGF.convertType(DestTy);
+      if (T != V.getType())
+        assert(0 && "NYI");
+    }
+    return V;
+  }
+  case CK_BaseToDerived:
+    llvm_unreachable("NYI");
+  case CK_DerivedToBase: {
+    // The EmitPointerWithAlignment path does this fine; just discard
+    // the alignment.
+    return CGF.buildPointerWithAlignment(CE).getPointer();
+  }
+  case CK_Dynamic: {
+    Address V = CGF.buildPointerWithAlignment(E);
+    const auto *DCE = cast<CXXDynamicCastExpr>(CE);
+    return CGF.buildDynamicCast(V, DCE);
+  }
+  case CK_ArrayToPointerDecay:
+    return CGF.buildArrayToPointerDecay(E).getPointer();
+  case CK_FunctionToPointerDecay:
+    return buildLValue(E).getPointer();
+
+  case CK_NullToPointer: {
+    // FIXME: use MustVisitNullValue(E) and evaluate expr.
+    // Note that DestTy is used as the MLIR type instead of a custom
+    // nullptr type.
+    mlir::Type Ty = CGF.getCIRType(DestTy);
+    return Builder.getNullPtr(Ty, CGF.getLoc(E->getExprLoc()));
+  }
+
+  case CK_NullToMemberPointer: {
+    if (MustVisitNullValue(E))
+      CGF.buildIgnoredExpr(E);
+
+    assert(!MissingFeatures::cxxABI());
+
+    const MemberPointerType *MPT = CE->getType()->getAs<MemberPointerType>();
+    if (MPT->isMemberFunctionPointerType()) {
+      auto Ty = mlir::cast<mlir::cir::MethodType>(CGF.getCIRType(DestTy));
+      return Builder.getNullMethodPtr(Ty, CGF.getLoc(E->getExprLoc()));
+    }
+
+    auto Ty = mlir::cast<mlir::cir::DataMemberType>(CGF.getCIRType(DestTy));
+    return Builder.getNullDataMemberPtr(Ty, CGF.getLoc(E->getExprLoc()));
+  }
+  case CK_ReinterpretMemberPointer:
+    llvm_unreachable("NYI");
+  case CK_BaseToDerivedMemberPointer:
+    llvm_unreachable("NYI");
+  case CK_DerivedToBaseMemberPointer:
+    llvm_unreachable("NYI");
+  case CK_ARCProduceObject:
+    llvm_unreachable("NYI");
+  case CK_ARCConsumeObject:
+    llvm_unreachable("NYI");
+  case CK_ARCReclaimReturnedObject:
+    llvm_unreachable("NYI");
+  case CK_ARCExtendBlockObject:
+    llvm_unreachable("NYI");
+  case CK_CopyAndAutoreleaseBlockObject:
+    llvm_unreachable("NYI");
+
+  case CK_FloatingRealToComplex:
+  case CK_FloatingComplexCast:
+  case CK_IntegralRealToComplex:
+  case CK_IntegralComplexCast:
+  case CK_IntegralComplexToFloatingComplex:
+  case CK_FloatingComplexToIntegralComplex:
+    llvm_unreachable("scalar cast to non-scalar value");
+
+  case CK_ConstructorConversion:
+    llvm_unreachable("NYI");
+  case CK_ToUnion:
+    llvm_unreachable("NYI");
+
+  case CK_LValueToRValue:
+    assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), DestTy));
+    assert(E->isGLValue() && "lvalue-to-rvalue applied to r-value!");
+    return Visit(const_cast<Expr *>(E));
+
+  case CK_IntegralToPointer: {
+    auto DestCIRTy = ConvertType(DestTy);
+    mlir::Value Src = Visit(const_cast<Expr *>(E));
+
+    // Properly resize by casting to an int of the same size as the pointer.
+    // Clang's IntegralToPointer includes 'bool' as the source, but in CIR
+    // 'bool' is not an integral type.  So check the source type to get the
+    // correct CIR conversion.
+    auto MiddleTy = CGF.CGM.getDataLayout().getIntPtrType(DestCIRTy);
+    auto MiddleVal = Builder.createCast(E->getType()->isBooleanType()
+                                            ? mlir::cir::CastKind::bool_to_int
+                                            : mlir::cir::CastKind::integral,
+                                        Src, MiddleTy);
+
+    if (CGF.CGM.getCodeGenOpts().StrictVTablePointers)
+      llvm_unreachable("NYI");
+
+    return Builder.createIntToPtr(MiddleVal, DestCIRTy);
+  }
+  case CK_PointerToIntegral: {
+    assert(!DestTy->isBooleanType() && "bool should use PointerToBool");
+    if (CGF.CGM.getCodeGenOpts().StrictVTablePointers)
+      llvm_unreachable("NYI");
+    return Builder.createPtrToInt(Visit(E), ConvertType(DestTy));
+  }
+  case CK_ToVoid: {
+    CGF.buildIgnoredExpr(E);
+    return nullptr;
+  }
+  case CK_MatrixCast:
+    llvm_unreachable("NYI");
+  case CK_VectorSplat: {
+    // Create a vector object and fill all elements with the same scalar value.
+    assert(DestTy->isVectorType() && "CK_VectorSplat to non-vector type");
+    return CGF.getBuilder().create<mlir::cir::VecSplatOp>(
+        CGF.getLoc(E->getSourceRange()), CGF.getCIRType(DestTy), Visit(E));
+  }
+  case CK_FixedPointCast:
+    llvm_unreachable("NYI");
+  case CK_FixedPointToBoolean:
+    llvm_unreachable("NYI");
+  case CK_FixedPointToIntegral:
+    llvm_unreachable("NYI");
+  case CK_IntegralToFixedPoint:
+    llvm_unreachable("NYI");
+
+  case CK_IntegralCast: {
+    ScalarConversionOpts Opts;
+    if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE)) {
+      if (!ICE->isPartOfExplicitCast())
+        Opts = ScalarConversionOpts(CGF.SanOpts);
+    }
+    return buildScalarConversion(Visit(E), E->getType(), DestTy,
+                                 CE->getExprLoc(), Opts);
+  }
+
+  case CK_IntegralToFloating:
+  case CK_FloatingToIntegral:
+  case CK_FloatingCast:
+  case CK_FixedPointToFloating:
+  case CK_FloatingToFixedPoint: {
+    if (Kind == CK_FixedPointToFloating || Kind == CK_FloatingToFixedPoint)
+      llvm_unreachable("Fixed point casts are NYI.");
+    CIRGenFunction::CIRGenFPOptionsRAII FPOptsRAII(CGF, CE);
+    return buildScalarConversion(Visit(E), E->getType(), DestTy,
+                                 CE->getExprLoc());
+  }
+  case CK_BooleanToSignedIntegral:
+    llvm_unreachable("NYI");
+
+  case CK_IntegralToBoolean: {
+    return buildIntToBoolConversion(Visit(E), CGF.getLoc(CE->getSourceRange()));
+  }
+
+  case CK_PointerToBoolean:
+    return buildPointerToBoolConversion(Visit(E), E->getType());
+  case CK_FloatingToBoolean:
+    return buildFloatToBoolConversion(Visit(E), CGF.getLoc(E->getExprLoc()));
+  case CK_MemberPointerToBoolean:
+    llvm_unreachable("NYI");
+  case CK_FloatingComplexToReal:
+  case CK_IntegralComplexToReal:
+  case CK_FloatingComplexToBoolean:
+  case CK_IntegralComplexToBoolean: {
+    mlir::Value V = CGF.buildComplexExpr(E);
+    return buildComplexToScalarConversion(CGF.getLoc(CE->getExprLoc()), V, Kind,
+                                          DestTy);
+  }
+  case CK_ZeroToOCLOpaqueType:
+    llvm_unreachable("NYI");
+  case CK_IntToOCLSampler:
+    llvm_unreachable("NYI");
+
+  default:
+    emitError(CGF.getLoc(CE->getExprLoc()), "cast kind not implemented: '")
+        << CE->getCastKindName() << "'";
+    return nullptr;
+  } // end of switch
+
+  llvm_unreachable("unknown scalar cast");
+}
+
+mlir::Value ScalarExprEmitter::VisitCallExpr(const CallExpr *E) {
+  if (E->getCallReturnType(CGF.getContext())->isReferenceType())
+    return buildLoadOfLValue(E);
+
+  auto V = CGF.buildCallExpr(E).getScalarVal();
+  assert(!MissingFeatures::buildLValueAlignmentAssumption());
+  return V;
+}
+
+mlir::Value ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) {
+  // TODO(cir): Folding all this constants sound like work for MLIR optimizers,
+  // keep assertion for now.
+  assert(!MissingFeatures::tryEmitAsConstant());
+  Expr::EvalResult Result;
+  if (E->EvaluateAsInt(Result, CGF.getContext(), Expr::SE_AllowSideEffects)) {
+    llvm::APSInt Value = Result.Val.getInt();
+    CGF.buildIgnoredExpr(E->getBase());
+    return Builder.getConstInt(CGF.getLoc(E->getExprLoc()), Value);
+  }
+  return buildLoadOfLValue(E);
+}
+
+/// Emit a conversion from the specified type to the specified destination
+/// type, both of which are CIR scalar types.
+mlir::Value CIRGenFunction::buildScalarConversion(mlir::Value Src,
+                                                  QualType SrcTy,
+                                                  QualType DstTy,
+                                                  SourceLocation Loc) {
+  assert(CIRGenFunction::hasScalarEvaluationKind(SrcTy) &&
+         CIRGenFunction::hasScalarEvaluationKind(DstTy) &&
+         "Invalid scalar expression to emit");
+  return ScalarExprEmitter(*this, builder)
+      .buildScalarConversion(Src, SrcTy, DstTy, Loc);
+}
+
+mlir::Value CIRGenFunction::buildComplexToScalarConversion(mlir::Value Src,
+                                                           QualType SrcTy,
+                                                           QualType DstTy,
+                                                           SourceLocation Loc) {
+  assert(SrcTy->isAnyComplexType() && hasScalarEvaluationKind(DstTy) &&
+         "Invalid complex -> scalar conversion");
+
+  auto ComplexElemTy = SrcTy->castAs<ComplexType>()->getElementType();
+  if (DstTy->isBooleanType()) {
+    auto Kind = ComplexElemTy->isFloatingType()
+                    ? mlir::cir::CastKind::float_complex_to_bool
+                    : mlir::cir::CastKind::int_complex_to_bool;
+    return builder.createCast(getLoc(Loc), Kind, Src, ConvertType(DstTy));
+  }
+
+  auto Kind = ComplexElemTy->isFloatingType()
+                  ? mlir::cir::CastKind::float_complex_to_real
+                  : mlir::cir::CastKind::int_complex_to_real;
+  auto Real =
+      builder.createCast(getLoc(Loc), Kind, Src, ConvertType(ComplexElemTy));
+  return buildScalarConversion(Real, ComplexElemTy, DstTy, Loc);
+}
+
+/// If the specified expression does not fold
+/// to a constant, or if it does but contains a label, return false.  If it
+/// constant folds return true and set the boolean result in Result.
+bool CIRGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
+                                                  bool &ResultBool,
+                                                  bool AllowLabels) {
+  llvm::APSInt ResultInt;
+  if (!ConstantFoldsToSimpleInteger(Cond, ResultInt, AllowLabels))
+    return false;
+
+  ResultBool = ResultInt.getBoolValue();
+  return true;
+}
+
+mlir::Value ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
+  bool Ignore = TestAndClearIgnoreResultAssign();
+  (void)Ignore;
+  assert(Ignore == false && "init list ignored");
+  unsigned NumInitElements = E->getNumInits();
+
+  if (E->hadArrayRangeDesignator())
+    llvm_unreachable("NYI");
+
+  if (E->getType()->isVectorType()) {
+    assert(!MissingFeatures::scalableVectors() && "NYI: scalable vector init");
+    assert(!MissingFeatures::vectorConstants() && "NYI: vector constants");
+    auto VectorType =
+        mlir::dyn_cast<mlir::cir::VectorType>(CGF.getCIRType(E->getType()));
+    SmallVector<mlir::Value, 16> Elements;
+    for (Expr *init : E->inits()) {
+      Elements.push_back(Visit(init));
+    }
+    // Zero-initialize any remaining values.
+    if (NumInitElements < VectorType.getSize()) {
+      mlir::Value ZeroValue = CGF.getBuilder().create<mlir::cir::ConstantOp>(
+          CGF.getLoc(E->getSourceRange()), VectorType.getEltType(),
+          CGF.getBuilder().getZeroInitAttr(VectorType.getEltType()));
+      for (uint64_t i = NumInitElements; i < VectorType.getSize(); ++i) {
+        Elements.push_back(ZeroValue);
+      }
+    }
+    return CGF.getBuilder().create<mlir::cir::VecCreateOp>(
+        CGF.getLoc(E->getSourceRange()), VectorType, Elements);
+  }
+
+  if (NumInitElements == 0) {
+    // C++11 value-initialization for the scalar.
+    return buildNullValue(E->getType(), CGF.getLoc(E->getExprLoc()));
+  }
+
+  return Visit(E->getInit(0));
+}
+
+mlir::Value ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) {
+  // Perform vector logical not on comparison with zero vector.
+  if (E->getType()->isVectorType() &&
+      E->getType()->castAs<VectorType>()->getVectorKind() ==
+          VectorKind::Generic) {
+    llvm_unreachable("NYI");
+  }
+
+  // Compare operand to zero.
+  mlir::Value boolVal = CGF.evaluateExprAsBool(E->getSubExpr());
+
+  // Invert value.
+  boolVal = Builder.createNot(boolVal);
+
+  // ZExt result to the expr type.
+  auto dstTy = ConvertType(E->getType());
+  if (mlir::isa<mlir::cir::IntType>(dstTy))
+    return Builder.createBoolToInt(boolVal, dstTy);
+  if (mlir::isa<mlir::cir::BoolType>(dstTy))
+    return boolVal;
+
+  llvm_unreachable("destination type for logical-not unary operator is NYI");
+}
+
+mlir::Value ScalarExprEmitter::VisitReal(const UnaryOperator *E) {
+  // TODO(cir): handle scalar promotion.
+
+  Expr *Op = E->getSubExpr();
+  if (Op->getType()->isAnyComplexType()) {
+    // If it's an l-value, load through the appropriate subobject l-value.
+    // Note that we have to ask E because Op might be an l-value that
+    // this won't work for, e.g. an Obj-C property.
+    if (E->isGLValue())
+      return CGF.buildLoadOfLValue(CGF.buildLValue(E), E->getExprLoc())
+          .getScalarVal();
+    // Otherwise, calculate and project.
+    llvm_unreachable("NYI");
+  }
+
+  return Visit(Op);
+}
+
+mlir::Value ScalarExprEmitter::VisitImag(const UnaryOperator *E) {
+  // TODO(cir): handle scalar promotion.
+
+  Expr *Op = E->getSubExpr();
+  if (Op->getType()->isAnyComplexType()) {
+    // If it's an l-value, load through the appropriate subobject l-value.
+    // Note that we have to ask E because Op might be an l-value that
+    // this won't work for, e.g. an Obj-C property.
+    if (E->isGLValue())
+      return CGF.buildLoadOfLValue(CGF.buildLValue(E), E->getExprLoc())
+          .getScalarVal();
+    // Otherwise, calculate and project.
+    llvm_unreachable("NYI");
+  }
+
+  return Visit(Op);
+}
+
+// Conversion from bool, integral, or floating-point to integral or
+// floating-point. Conversions involving other types are handled elsewhere.
+// Conversion to bool is handled elsewhere because that's a comparison against
+// zero, not a simple cast. This handles both individual scalars and vectors.
+mlir::Value ScalarExprEmitter::buildScalarCast(
+    mlir::Value Src, QualType SrcType, QualType DstType, mlir::Type SrcTy,
+    mlir::Type DstTy, ScalarConversionOpts Opts) {
+  assert(!SrcType->isMatrixType() && !DstType->isMatrixType() &&
+         "Internal error: matrix types not handled by this function.");
+  if (mlir::isa<mlir::IntegerType>(SrcTy) ||
+      mlir::isa<mlir::IntegerType>(DstTy))
+    llvm_unreachable("Obsolete code. Don't use mlir::IntegerType with CIR.");
+
+  mlir::Type FullDstTy = DstTy;
+  if (mlir::isa<mlir::cir::VectorType>(SrcTy) &&
+      mlir::isa<mlir::cir::VectorType>(DstTy)) {
+    // Use the element types of the vectors to figure out the CastKind.
+    SrcTy = mlir::dyn_cast<mlir::cir::VectorType>(SrcTy).getEltType();
+    DstTy = mlir::dyn_cast<mlir::cir::VectorType>(DstTy).getEltType();
+  }
+  assert(!mlir::isa<mlir::cir::VectorType>(SrcTy) &&
+         !mlir::isa<mlir::cir::VectorType>(DstTy) &&
+         "buildScalarCast given a vector type and a non-vector type");
+
+  std::optional<mlir::cir::CastKind> CastKind;
+
+  if (mlir::isa<mlir::cir::BoolType>(SrcTy)) {
+    if (Opts.TreatBooleanAsSigned)
+      llvm_unreachable("NYI: signed bool");
+    if (CGF.getBuilder().isInt(DstTy)) {
+      CastKind = mlir::cir::CastKind::bool_to_int;
+    } else if (mlir::isa<mlir::cir::CIRFPTypeInterface>(DstTy)) {
+      CastKind = mlir::cir::CastKind::bool_to_float;
+    } else {
+      llvm_unreachable("Internal error: Cast to unexpected type");
+    }
+  } else if (CGF.getBuilder().isInt(SrcTy)) {
+    if (CGF.getBuilder().isInt(DstTy)) {
+      CastKind = mlir::cir::CastKind::integral;
+    } else if (mlir::isa<mlir::cir::CIRFPTypeInterface>(DstTy)) {
+      CastKind = mlir::cir::CastKind::int_to_float;
+    } else {
+      llvm_unreachable("Internal error: Cast to unexpected type");
+    }
+  } else if (mlir::isa<mlir::cir::CIRFPTypeInterface>(SrcTy)) {
+    if (CGF.getBuilder().isInt(DstTy)) {
+      // If we can't recognize overflow as undefined behavior, assume that
+      // overflow saturates. This protects against normal optimizations if we
+      // are compiling with non-standard FP semantics.
+      if (!CGF.CGM.getCodeGenOpts().StrictFloatCastOverflow)
+        llvm_unreachable("NYI");
+      if (Builder.getIsFPConstrained())
+        llvm_unreachable("NYI");
+      CastKind = mlir::cir::CastKind::float_to_int;
+    } else if (mlir::isa<mlir::cir::CIRFPTypeInterface>(DstTy)) {
+      // TODO: split this to createFPExt/createFPTrunc
+      return Builder.createFloatingCast(Src, FullDstTy);
+    } else {
+      llvm_unreachable("Internal error: Cast to unexpected type");
+    }
+  } else {
+    llvm_unreachable("Internal error: Cast from unexpected type");
+  }
+
+  assert(CastKind.has_value() && "Internal error: CastKind not set.");
+  return Builder.create<mlir::cir::CastOp>(Src.getLoc(), FullDstTy, *CastKind,
+                                           Src);
+}
+
+LValue
+CIRGenFunction::buildCompoundAssignmentLValue(const CompoundAssignOperator *E) {
+  ScalarExprEmitter Scalar(*this, builder);
+  mlir::Value Result;
+  switch (E->getOpcode()) {
+#define COMPOUND_OP(Op)                                                        \
+  case BO_##Op##Assign:                                                        \
+    return Scalar.buildCompoundAssignLValue(E, &ScalarExprEmitter::build##Op,  \
+                                            Result)
+    COMPOUND_OP(Mul);
+    COMPOUND_OP(Div);
+    COMPOUND_OP(Rem);
+    COMPOUND_OP(Add);
+    COMPOUND_OP(Sub);
+    COMPOUND_OP(Shl);
+    COMPOUND_OP(Shr);
+    COMPOUND_OP(And);
+    COMPOUND_OP(Xor);
+    COMPOUND_OP(Or);
+#undef COMPOUND_OP
+
+  case BO_PtrMemD:
+  case BO_PtrMemI:
+  case BO_Mul:
+  case BO_Div:
+  case BO_Rem:
+  case BO_Add:
+  case BO_Sub:
+  case BO_Shl:
+  case BO_Shr:
+  case BO_LT:
+  case BO_GT:
+  case BO_LE:
+  case BO_GE:
+  case BO_EQ:
+  case BO_NE:
+  case BO_Cmp:
+  case BO_And:
+  case BO_Xor:
+  case BO_Or:
+  case BO_LAnd:
+  case BO_LOr:
+  case BO_Assign:
+  case BO_Comma:
+    llvm_unreachable("Not valid compound assignment operators");
+  }
+  llvm_unreachable("Unhandled compound assignment operator");
+}
+
+LValue ScalarExprEmitter::buildCompoundAssignLValue(
+    const CompoundAssignOperator *E,
+    mlir::Value (ScalarExprEmitter::*Func)(const BinOpInfo &),
+    mlir::Value &Result) {
+  QualType LHSTy = E->getLHS()->getType();
+  BinOpInfo OpInfo;
+
+  if (E->getComputationResultType()->isAnyComplexType())
+    assert(0 && "not implemented");
+
+  // Emit the RHS first.  __block variables need to have the rhs evaluated
+  // first, plus this should improve codegen a little.
+
+  QualType PromotionTypeCR = getPromotionType(E->getComputationResultType());
+  if (PromotionTypeCR.isNull())
+    PromotionTypeCR = E->getComputationResultType();
+
+  QualType PromotionTypeLHS = getPromotionType(E->getComputationLHSType());
+  QualType PromotionTypeRHS = getPromotionType(E->getRHS()->getType());
+
+  if (!PromotionTypeRHS.isNull())
+    OpInfo.RHS = CGF.buildPromotedScalarExpr(E->getRHS(), PromotionTypeRHS);
+  else
+    OpInfo.RHS = Visit(E->getRHS());
+
+  OpInfo.FullType = PromotionTypeCR;
+  OpInfo.CompType = OpInfo.FullType;
+  if (auto VecType = dyn_cast_or_null<VectorType>(OpInfo.FullType)) {
+    OpInfo.CompType = VecType->getElementType();
+  }
+  OpInfo.Opcode = E->getOpcode();
+  OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
+  OpInfo.E = E;
+  OpInfo.Loc = E->getSourceRange();
+
+  // Load/convert the LHS
+  LValue LHSLV = CGF.buildLValue(E->getLHS());
+
+  if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
+    assert(0 && "not implemented");
+  }
+
+  OpInfo.LHS = buildLoadOfLValue(LHSLV, E->getExprLoc());
+
+  CIRGenFunction::SourceLocRAIIObject sourceloc{
+      CGF, CGF.getLoc(E->getSourceRange())};
+  SourceLocation Loc = E->getExprLoc();
+  if (!PromotionTypeLHS.isNull())
+    OpInfo.LHS = buildScalarConversion(OpInfo.LHS, LHSTy, PromotionTypeLHS,
+                                       E->getExprLoc());
+  else
+    OpInfo.LHS = buildScalarConversion(OpInfo.LHS, LHSTy,
+                                       E->getComputationLHSType(), Loc);
+
+  // Expand the binary operator.
+  Result = (this->*Func)(OpInfo);
+
+  // Convert the result back to the LHS type,
+  // potentially with Implicit Conversion sanitizer check.
+  Result = buildScalarConversion(Result, PromotionTypeCR, LHSTy, Loc,
+                                 ScalarConversionOpts(CGF.SanOpts));
+
+  // Store the result value into the LHS lvalue. Bit-fields are handled
+  // specially because the result is altered by the store, i.e., [C99 6.5.16p1]
+  // 'An assignment expression has the value of the left operand after the
+  // assignment...'.
+  if (LHSLV.isBitField())
+    CGF.buildStoreThroughBitfieldLValue(RValue::get(Result), LHSLV, Result);
+  else
+    CGF.buildStoreThroughLValue(RValue::get(Result), LHSLV);
+
+  if (CGF.getLangOpts().OpenMP)
+    CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF,
+                                                                  E->getLHS());
+  return LHSLV;
+}
+
+mlir::Value ScalarExprEmitter::buildComplexToScalarConversion(
+    mlir::Location Loc, mlir::Value V, CastKind Kind, QualType DestTy) {
+  mlir::cir::CastKind CastOpKind;
+  switch (Kind) {
+  case CK_FloatingComplexToReal:
+    CastOpKind = mlir::cir::CastKind::float_complex_to_real;
+    break;
+  case CK_IntegralComplexToReal:
+    CastOpKind = mlir::cir::CastKind::int_complex_to_real;
+    break;
+  case CK_FloatingComplexToBoolean:
+    CastOpKind = mlir::cir::CastKind::float_complex_to_bool;
+    break;
+  case CK_IntegralComplexToBoolean:
+    CastOpKind = mlir::cir::CastKind::int_complex_to_bool;
+    break;
+  default:
+    llvm_unreachable("invalid complex-to-scalar cast kind");
+  }
+
+  return Builder.createCast(Loc, CastOpKind, V, CGF.ConvertType(DestTy));
+}
+
+mlir::Value ScalarExprEmitter::buildNullValue(QualType Ty, mlir::Location loc) {
+  return CGF.buildFromMemory(CGF.CGM.buildNullConstant(Ty, loc), Ty);
+}
+
+mlir::Value ScalarExprEmitter::buildPromoted(const Expr *E,
+                                             QualType PromotionType) {
+  E = E->IgnoreParens();
+  if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
+    switch (BO->getOpcode()) {
+#define HANDLE_BINOP(OP)                                                       \
+  case BO_##OP:                                                                \
+    return build##OP(buildBinOps(BO, PromotionType));
+      HANDLE_BINOP(Add)
+      HANDLE_BINOP(Sub)
+      HANDLE_BINOP(Mul)
+      HANDLE_BINOP(Div)
+#undef HANDLE_BINOP
+    default:
+      break;
+    }
+  } else if (const auto *UO = dyn_cast<UnaryOperator>(E)) {
+    switch (UO->getOpcode()) {
+    case UO_Imag:
+    case UO_Real:
+      llvm_unreachable("NYI");
+    case UO_Minus:
+      return VisitMinus(UO, PromotionType);
+    case UO_Plus:
+      return VisitPlus(UO, PromotionType);
+    default:
+      break;
+    }
+  }
+  auto result = Visit(const_cast<Expr *>(E));
+  if (result) {
+    if (!PromotionType.isNull())
+      return buildPromotedValue(result, PromotionType);
+    return buildUnPromotedValue(result, E->getType());
+  }
+  return result;
+}
+
+mlir::Value ScalarExprEmitter::buildCompoundAssign(
+    const CompoundAssignOperator *E,
+    mlir::Value (ScalarExprEmitter::*Func)(const BinOpInfo &)) {
+
+  bool Ignore = TestAndClearIgnoreResultAssign();
+  mlir::Value RHS;
+  LValue LHS = buildCompoundAssignLValue(E, Func, RHS);
+
+  // If the result is clearly ignored, return now.
+  if (Ignore)
+    return {};
+
+  // The result of an assignment in C is the assigned r-value.
+  if (!CGF.getLangOpts().CPlusPlus)
+    return RHS;
+
+  // If the lvalue is non-volatile, return the computed value of the assignment.
+  if (!LHS.isVolatileQualified())
+    return RHS;
+
+  // Otherwise, reload the value.
+  return buildLoadOfLValue(LHS, E->getExprLoc());
+}
+
+mlir::Value ScalarExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) {
+  auto scopeLoc = CGF.getLoc(E->getSourceRange());
+  auto &builder = CGF.builder;
+
+  auto scope = builder.create<mlir::cir::ScopeOp>(
+      scopeLoc, /*scopeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Type &yieldTy, mlir::Location loc) {
+        CIRGenFunction::LexicalScope lexScope{CGF, loc,
+                                              builder.getInsertionBlock()};
+        auto scopeYieldVal = Visit(E->getSubExpr());
+        if (scopeYieldVal) {
+          builder.create<mlir::cir::YieldOp>(loc, scopeYieldVal);
+          yieldTy = scopeYieldVal.getType();
+        }
+      });
+
+  // Defend against dominance problems caused by jumps out of expression
+  // evaluation through the shared cleanup block.
+  // TODO(cir): Scope.ForceCleanup({&V});
+  return scope.getNumResults() > 0 ? scope->getResult(0) : nullptr;
+}
+
+mlir::Value ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
+  bool Ignore = TestAndClearIgnoreResultAssign();
+
+  mlir::Value RHS;
+  LValue LHS;
+
+  switch (E->getLHS()->getType().getObjCLifetime()) {
+  case Qualifiers::OCL_Strong:
+    llvm_unreachable("NYI");
+  case Qualifiers::OCL_Autoreleasing:
+    llvm_unreachable("NYI");
+  case Qualifiers::OCL_ExplicitNone:
+    llvm_unreachable("NYI");
+  case Qualifiers::OCL_Weak:
+    llvm_unreachable("NYI");
+  case Qualifiers::OCL_None:
+    // __block variables need to have the rhs evaluated first, plus this should
+    // improve codegen just a little.
+    RHS = Visit(E->getRHS());
+    LHS = buildCheckedLValue(E->getLHS(), CIRGenFunction::TCK_Store);
+
+    // Store the value into the LHS. Bit-fields are handled specially because
+    // the result is altered by the store, i.e., [C99 6.5.16p1]
+    // 'An assignment expression has the value of the left operand after the
+    // assignment...'.
+    if (LHS.isBitField()) {
+      CGF.buildStoreThroughBitfieldLValue(RValue::get(RHS), LHS, RHS);
+    } else {
+      CGF.buildNullabilityCheck(LHS, RHS, E->getExprLoc());
+      CIRGenFunction::SourceLocRAIIObject loc{CGF,
+                                              CGF.getLoc(E->getSourceRange())};
+      CGF.buildStoreThroughLValue(RValue::get(RHS), LHS);
+    }
+  }
+
+  // If the result is clearly ignored, return now.
+  if (Ignore)
+    return nullptr;
+
+  // The result of an assignment in C is the assigned r-value.
+  if (!CGF.getLangOpts().CPlusPlus)
+    return RHS;
+
+  // If the lvalue is non-volatile, return the computed value of the assignment.
+  if (!LHS.isVolatileQualified())
+    return RHS;
+
+  // Otherwise, reload the value.
+  return buildLoadOfLValue(LHS, E->getExprLoc());
+}
+
+/// Return true if the specified expression is cheap enough and side-effect-free
+/// enough to evaluate unconditionally instead of conditionally.  This is used
+/// to convert control flow into selects in some cases.
+/// TODO(cir): can be shared with LLVM codegen.
+static bool isCheapEnoughToEvaluateUnconditionally(const Expr *E,
+                                                   CIRGenFunction &CGF) {
+  // Anything that is an integer or floating point constant is fine.
+  return E->IgnoreParens()->isEvaluatable(CGF.getContext());
+
+  // Even non-volatile automatic variables can't be evaluated unconditionally.
+  // Referencing a thread_local may cause non-trivial initialization work to
+  // occur. If we're inside a lambda and one of the variables is from the scope
+  // outside the lambda, that function may have returned already. Reading its
+  // locals is a bad idea. Also, these reads may introduce races there didn't
+  // exist in the source-level program.
+}
+
+mlir::Value ScalarExprEmitter::VisitAbstractConditionalOperator(
+    const AbstractConditionalOperator *E) {
+  auto &builder = CGF.getBuilder();
+  auto loc = CGF.getLoc(E->getSourceRange());
+  TestAndClearIgnoreResultAssign();
+
+  // Bind the common expression if necessary.
+  CIRGenFunction::OpaqueValueMapping binding(CGF, E);
+
+  Expr *condExpr = E->getCond();
+  Expr *lhsExpr = E->getTrueExpr();
+  Expr *rhsExpr = E->getFalseExpr();
+
+  // If the condition constant folds and can be elided, try to avoid emitting
+  // the condition and the dead arm.
+  bool CondExprBool;
+  if (CGF.ConstantFoldsToSimpleInteger(condExpr, CondExprBool)) {
+    Expr *live = lhsExpr, *dead = rhsExpr;
+    if (!CondExprBool)
+      std::swap(live, dead);
+
+    // If the dead side doesn't have labels we need, just emit the Live part.
+    if (!CGF.ContainsLabel(dead)) {
+      if (CondExprBool)
+        assert(!MissingFeatures::incrementProfileCounter());
+      auto Result = Visit(live);
+
+      // If the live part is a throw expression, it acts like it has a void
+      // type, so evaluating it returns a null Value.  However, a conditional
+      // with non-void type must return a non-null Value.
+      if (!Result && !E->getType()->isVoidType()) {
+        llvm_unreachable("NYI");
+      }
+
+      return Result;
+    }
+  }
+
+  // OpenCL: If the condition is a vector, we can treat this condition like
+  // the select function.
+  if ((CGF.getLangOpts().OpenCL && condExpr->getType()->isVectorType()) ||
+      condExpr->getType()->isExtVectorType()) {
+    llvm_unreachable("NYI");
+  }
+
+  if (condExpr->getType()->isVectorType() ||
+      condExpr->getType()->isSveVLSBuiltinType()) {
+    assert(condExpr->getType()->isVectorType() && "?: op for SVE vector NYI");
+    mlir::Value condValue = Visit(condExpr);
+    mlir::Value lhsValue = Visit(lhsExpr);
+    mlir::Value rhsValue = Visit(rhsExpr);
+    return builder.create<mlir::cir::VecTernaryOp>(loc, condValue, lhsValue,
+                                                   rhsValue);
+  }
+
+  // If this is a really simple expression (like x ? 4 : 5), emit this as a
+  // select instead of as control flow.  We can only do this if it is cheap and
+  // safe to evaluate the LHS and RHS unconditionally.
+  if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) &&
+      isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) {
+    bool lhsIsVoid = false;
+    auto condV = CGF.evaluateExprAsBool(condExpr);
+    assert(!MissingFeatures::incrementProfileCounter());
+
+    return builder
+        .create<mlir::cir::TernaryOp>(
+            loc, condV, /*thenBuilder=*/
+            [&](mlir::OpBuilder &b, mlir::Location loc) {
+              auto lhs = Visit(lhsExpr);
+              if (!lhs) {
+                lhs = builder.getNullValue(CGF.VoidTy, loc);
+                lhsIsVoid = true;
+              }
+              builder.create<mlir::cir::YieldOp>(loc, lhs);
+            },
+            /*elseBuilder=*/
+            [&](mlir::OpBuilder &b, mlir::Location loc) {
+              auto rhs = Visit(rhsExpr);
+              if (lhsIsVoid) {
+                assert(!rhs && "lhs and rhs types must match");
+                rhs = builder.getNullValue(CGF.VoidTy, loc);
+              }
+              builder.create<mlir::cir::YieldOp>(loc, rhs);
+            })
+        .getResult();
+  }
+
+  mlir::Value condV = CGF.buildOpOnBoolExpr(loc, condExpr);
+  CIRGenFunction::ConditionalEvaluation eval(CGF);
+  SmallVector<mlir::OpBuilder::InsertPoint, 2> insertPoints{};
+  mlir::Type yieldTy{};
+
+  auto patchVoidOrThrowSites = [&]() {
+    if (insertPoints.empty())
+      return;
+    // If both arms are void, so be it.
+    if (!yieldTy)
+      yieldTy = CGF.VoidTy;
+
+    // Insert required yields.
+    for (auto &toInsert : insertPoints) {
+      mlir::OpBuilder::InsertionGuard guard(builder);
+      builder.restoreInsertionPoint(toInsert);
+
+      // Block does not return: build empty yield.
+      if (mlir::isa<mlir::cir::VoidType>(yieldTy)) {
+        builder.create<mlir::cir::YieldOp>(loc);
+      } else { // Block returns: set null yield value.
+        mlir::Value op0 = builder.getNullValue(yieldTy, loc);
+        builder.create<mlir::cir::YieldOp>(loc, op0);
+      }
+    }
+  };
+
+  return builder
+      .create<mlir::cir::TernaryOp>(
+          loc, condV, /*trueBuilder=*/
+          [&](mlir::OpBuilder &b, mlir::Location loc) {
+            CIRGenFunction::LexicalScope lexScope{CGF, loc,
+                                                  b.getInsertionBlock()};
+            CGF.currLexScope->setAsTernary();
+
+            assert(!MissingFeatures::incrementProfileCounter());
+            eval.begin(CGF);
+            auto lhs = Visit(lhsExpr);
+            eval.end(CGF);
+
+            if (lhs) {
+              yieldTy = lhs.getType();
+              b.create<mlir::cir::YieldOp>(loc, lhs);
+              return;
+            }
+            // If LHS or RHS is a throw or void expression we need to patch arms
+            // as to properly match yield types.
+            insertPoints.push_back(b.saveInsertionPoint());
+          },
+          /*falseBuilder=*/
+          [&](mlir::OpBuilder &b, mlir::Location loc) {
+            CIRGenFunction::LexicalScope lexScope{CGF, loc,
+                                                  b.getInsertionBlock()};
+            CGF.currLexScope->setAsTernary();
+
+            assert(!MissingFeatures::incrementProfileCounter());
+            eval.begin(CGF);
+            auto rhs = Visit(rhsExpr);
+            eval.end(CGF);
+
+            if (rhs) {
+              yieldTy = rhs.getType();
+              b.create<mlir::cir::YieldOp>(loc, rhs);
+            } else {
+              // If LHS or RHS is a throw or void expression we need to patch
+              // arms as to properly match yield types.
+              insertPoints.push_back(b.saveInsertionPoint());
+            }
+
+            patchVoidOrThrowSites();
+          })
+      .getResult();
+}
+
+mlir::Value CIRGenFunction::buildScalarPrePostIncDec(const UnaryOperator *E,
+                                                     LValue LV, bool isInc,
+                                                     bool isPre) {
+  return ScalarExprEmitter(*this, builder)
+      .buildScalarPrePostIncDec(E, LV, isInc, isPre);
+}
+
+mlir::Value ScalarExprEmitter::VisitBinLAnd(const clang::BinaryOperator *E) {
+  if (E->getType()->isVectorType()) {
+    llvm_unreachable("NYI");
+  }
+
+  bool InstrumentRegions = CGF.CGM.getCodeGenOpts().hasProfileClangInstr();
+  mlir::Type ResTy = ConvertType(E->getType());
+  mlir::Location Loc = CGF.getLoc(E->getExprLoc());
+
+  // If we have 0 && RHS, see if we can elide RHS, if so, just return 0.
+  // If we have 1 && X, just emit X without inserting the control flow.
+  bool LHSCondVal;
+  if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
+    if (LHSCondVal) { // If we have 1 && X, just emit X.
+
+      mlir::Value RHSCond = CGF.evaluateExprAsBool(E->getRHS());
+
+      if (InstrumentRegions) {
+        llvm_unreachable("NYI");
+      }
+      // ZExt result to int or bool.
+      return Builder.createZExtOrBitCast(RHSCond.getLoc(), RHSCond, ResTy);
+    }
+    // 0 && RHS: If it is safe, just elide the RHS, and return 0/false.
+    if (!CGF.ContainsLabel(E->getRHS()))
+      return Builder.getNullValue(ResTy, Loc);
+  }
+
+  CIRGenFunction::ConditionalEvaluation eval(CGF);
+
+  mlir::Value LHSCondV = CGF.evaluateExprAsBool(E->getLHS());
+  auto ResOp = Builder.create<mlir::cir::TernaryOp>(
+      Loc, LHSCondV, /*trueBuilder=*/
+      [&](mlir::OpBuilder &B, mlir::Location Loc) {
+        CIRGenFunction::LexicalScope LexScope{CGF, Loc, B.getInsertionBlock()};
+        CGF.currLexScope->setAsTernary();
+        mlir::Value RHSCondV = CGF.evaluateExprAsBool(E->getRHS());
+        auto res = B.create<mlir::cir::TernaryOp>(
+            Loc, RHSCondV, /*trueBuilder*/
+            [&](mlir::OpBuilder &B, mlir::Location Loc) {
+              CIRGenFunction::LexicalScope lexScope{CGF, Loc,
+                                                    B.getInsertionBlock()};
+              CGF.currLexScope->setAsTernary();
+              auto res = B.create<mlir::cir::ConstantOp>(
+                  Loc, Builder.getBoolTy(),
+                  Builder.getAttr<mlir::cir::BoolAttr>(Builder.getBoolTy(),
+                                                       true));
+              B.create<mlir::cir::YieldOp>(Loc, res.getRes());
+            },
+            /*falseBuilder*/
+            [&](mlir::OpBuilder &b, mlir::Location Loc) {
+              CIRGenFunction::LexicalScope lexScope{CGF, Loc,
+                                                    b.getInsertionBlock()};
+              CGF.currLexScope->setAsTernary();
+              auto res = b.create<mlir::cir::ConstantOp>(
+                  Loc, Builder.getBoolTy(),
+                  Builder.getAttr<mlir::cir::BoolAttr>(Builder.getBoolTy(),
+                                                       false));
+              b.create<mlir::cir::YieldOp>(Loc, res.getRes());
+            });
+        B.create<mlir::cir::YieldOp>(Loc, res.getResult());
+      },
+      /*falseBuilder*/
+      [&](mlir::OpBuilder &B, mlir::Location Loc) {
+        CIRGenFunction::LexicalScope lexScope{CGF, Loc, B.getInsertionBlock()};
+        CGF.currLexScope->setAsTernary();
+        auto res = B.create<mlir::cir::ConstantOp>(
+            Loc, Builder.getBoolTy(),
+            Builder.getAttr<mlir::cir::BoolAttr>(Builder.getBoolTy(), false));
+        B.create<mlir::cir::YieldOp>(Loc, res.getRes());
+      });
+  return Builder.createZExtOrBitCast(ResOp.getLoc(), ResOp.getResult(), ResTy);
+}
+
+mlir::Value ScalarExprEmitter::VisitBinLOr(const clang::BinaryOperator *E) {
+  if (E->getType()->isVectorType()) {
+    llvm_unreachable("NYI");
+  }
+
+  bool InstrumentRegions = CGF.CGM.getCodeGenOpts().hasProfileClangInstr();
+  mlir::Type ResTy = ConvertType(E->getType());
+  mlir::Location Loc = CGF.getLoc(E->getExprLoc());
+
+  // If we have 1 || RHS, see if we can elide RHS, if so, just return 1.
+  // If we have 0 || X, just emit X without inserting the control flow.
+  bool LHSCondVal;
+  if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
+    if (!LHSCondVal) { // If we have 0 || X, just emit X.
+
+      mlir::Value RHSCond = CGF.evaluateExprAsBool(E->getRHS());
+
+      if (InstrumentRegions) {
+        llvm_unreachable("NYI");
+      }
+      // ZExt result to int or bool.
+      return Builder.createZExtOrBitCast(RHSCond.getLoc(), RHSCond, ResTy);
+    }
+    // 1 || RHS: If it is safe, just elide the RHS, and return 1/true.
+    if (!CGF.ContainsLabel(E->getRHS())) {
+      if (auto intTy = mlir::dyn_cast<mlir::cir::IntType>(ResTy))
+        return Builder.getConstInt(Loc, intTy, 1);
+      else
+        return Builder.getBool(true, Loc);
+    }
+  }
+
+  CIRGenFunction::ConditionalEvaluation eval(CGF);
+
+  mlir::Value LHSCondV = CGF.evaluateExprAsBool(E->getLHS());
+  auto ResOp = Builder.create<mlir::cir::TernaryOp>(
+      Loc, LHSCondV, /*trueBuilder=*/
+      [&](mlir::OpBuilder &B, mlir::Location Loc) {
+        CIRGenFunction::LexicalScope lexScope{CGF, Loc, B.getInsertionBlock()};
+        CGF.currLexScope->setAsTernary();
+        auto res = B.create<mlir::cir::ConstantOp>(
+            Loc, Builder.getBoolTy(),
+            Builder.getAttr<mlir::cir::BoolAttr>(Builder.getBoolTy(), true));
+        B.create<mlir::cir::YieldOp>(Loc, res.getRes());
+      },
+      /*falseBuilder*/
+      [&](mlir::OpBuilder &B, mlir::Location Loc) {
+        CIRGenFunction::LexicalScope LexScope{CGF, Loc, B.getInsertionBlock()};
+        CGF.currLexScope->setAsTernary();
+        mlir::Value RHSCondV = CGF.evaluateExprAsBool(E->getRHS());
+        auto res = B.create<mlir::cir::TernaryOp>(
+            Loc, RHSCondV, /*trueBuilder*/
+            [&](mlir::OpBuilder &B, mlir::Location Loc) {
+              SmallVector<mlir::Location, 2> Locs;
+              if (mlir::isa<mlir::FileLineColLoc>(Loc)) {
+                Locs.push_back(Loc);
+                Locs.push_back(Loc);
+              } else if (mlir::isa<mlir::FusedLoc>(Loc)) {
+                auto fusedLoc = mlir::cast<mlir::FusedLoc>(Loc);
+                Locs.push_back(fusedLoc.getLocations()[0]);
+                Locs.push_back(fusedLoc.getLocations()[1]);
+              }
+              CIRGenFunction::LexicalScope lexScope{CGF, Loc,
+                                                    B.getInsertionBlock()};
+              CGF.currLexScope->setAsTernary();
+              auto res = B.create<mlir::cir::ConstantOp>(
+                  Loc, Builder.getBoolTy(),
+                  Builder.getAttr<mlir::cir::BoolAttr>(Builder.getBoolTy(),
+                                                       true));
+              B.create<mlir::cir::YieldOp>(Loc, res.getRes());
+            },
+            /*falseBuilder*/
+            [&](mlir::OpBuilder &b, mlir::Location Loc) {
+              SmallVector<mlir::Location, 2> Locs;
+              if (mlir::isa<mlir::FileLineColLoc>(Loc)) {
+                Locs.push_back(Loc);
+                Locs.push_back(Loc);
+              } else if (mlir::isa<mlir::FusedLoc>(Loc)) {
+                auto fusedLoc = mlir::cast<mlir::FusedLoc>(Loc);
+                Locs.push_back(fusedLoc.getLocations()[0]);
+                Locs.push_back(fusedLoc.getLocations()[1]);
+              }
+              CIRGenFunction::LexicalScope lexScope{CGF, Loc,
+                                                    B.getInsertionBlock()};
+              CGF.currLexScope->setAsTernary();
+              auto res = b.create<mlir::cir::ConstantOp>(
+                  Loc, Builder.getBoolTy(),
+                  Builder.getAttr<mlir::cir::BoolAttr>(Builder.getBoolTy(),
+                                                       false));
+              b.create<mlir::cir::YieldOp>(Loc, res.getRes());
+            });
+        B.create<mlir::cir::YieldOp>(Loc, res.getResult());
+      });
+
+  return Builder.createZExtOrBitCast(ResOp.getLoc(), ResOp.getResult(), ResTy);
+}
+
+mlir::Value ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
+  QualType Ty = VE->getType();
+
+  if (Ty->isVariablyModifiedType())
+    assert(!MissingFeatures::variablyModifiedTypeEmission() && "NYI");
+
+  Address ArgValue = Address::invalid();
+  mlir::Value Val = CGF.buildVAArg(VE, ArgValue);
+
+  return Val;
+}
+
+/// Return the size or alignment of the type of argument of the sizeof
+/// expression as an integer.
+mlir::Value ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
+    const UnaryExprOrTypeTraitExpr *E) {
+  QualType TypeToSize = E->getTypeOfArgument();
+  if (E->getKind() == UETT_SizeOf) {
+    if (const VariableArrayType *VAT =
+            CGF.getContext().getAsVariableArrayType(TypeToSize)) {
+
+      if (E->isArgumentType()) {
+        // sizeof(type) - make sure to emit the VLA size.
+        CGF.buildVariablyModifiedType(TypeToSize);
+      } else {
+        // C99 6.5.3.4p2: If the argument is an expression of type
+        // VLA, it is evaluated.
+        CGF.buildIgnoredExpr(E->getArgumentExpr());
+      }
+
+      auto VlaSize = CGF.getVLASize(VAT);
+      mlir::Value size = VlaSize.NumElts;
+
+      // Scale the number of non-VLA elements by the non-VLA element size.
+      CharUnits eltSize = CGF.getContext().getTypeSizeInChars(VlaSize.Type);
+      if (!eltSize.isOne())
+        size = Builder.createMul(size, CGF.CGM.getSize(eltSize).getValue());
+
+      return size;
+    }
+  } else if (E->getKind() == UETT_OpenMPRequiredSimdAlign) {
+    llvm_unreachable("NYI");
+  }
+
+  // If this isn't sizeof(vla), the result must be constant; use the constant
+  // folding logic so we don't have to duplicate it here.
+  return Builder.getConstInt(CGF.getLoc(E->getSourceRange()),
+                             E->EvaluateKnownConstInt(CGF.getContext()));
+}
+
+mlir::Value CIRGenFunction::buildCheckedInBoundsGEP(
+    mlir::Type ElemTy, mlir::Value Ptr, ArrayRef<mlir::Value> IdxList,
+    bool SignedIndices, bool IsSubtraction, SourceLocation Loc) {
+  mlir::Type PtrTy = Ptr.getType();
+  assert(IdxList.size() == 1 && "multi-index ptr arithmetic NYI");
+  mlir::Value GEPVal = builder.create<mlir::cir::PtrStrideOp>(
+      CGM.getLoc(Loc), PtrTy, Ptr, IdxList[0]);
+
+  // If the pointer overflow sanitizer isn't enabled, do nothing.
+  if (!SanOpts.has(SanitizerKind::PointerOverflow))
+    return GEPVal;
+
+  // TODO(cir): the unreachable code below hides a substantial amount of code
+  // from the original codegen related with pointer overflow sanitizer.
+  assert(MissingFeatures::pointerOverflowSanitizer());
+  llvm_unreachable("pointer overflow sanitizer NYI");
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
new file mode 100644
index 000000000000..e686c7b0e63b
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -0,0 +1,1881 @@
+//===- CIRGenFunction.cpp - Emit CIR from ASTs for a Function -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This coordinates the per-function state used while generating code
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenFunction.h"
+#include "CIRGenCXXABI.h"
+#include "CIRGenModule.h"
+#include "CIRGenOpenMPRuntime.h"
+#include "clang/CIR/MissingFeatures.h"
+
+#include "clang/AST/ASTLambda.h"
+#include "clang/AST/ExprObjC.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/Basic/DiagnosticCategories.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/FPEnv.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
+
+#include "CIRGenTBAA.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Support/LogicalResult.h"
+
+using namespace cir;
+using namespace clang;
+using namespace mlir::cir;
+
+CIRGenFunction::CIRGenFunction(CIRGenModule &CGM, CIRGenBuilderTy &builder,
+                               bool suppressNewContext)
+    : CIRGenTypeCache(CGM), CGM{CGM}, builder(builder),
+      SanOpts(CGM.getLangOpts().Sanitize), CurFPFeatures(CGM.getLangOpts()),
+      ShouldEmitLifetimeMarkers(false) {
+  if (!suppressNewContext)
+    CGM.getCXXABI().getMangleContext().startNewFunction();
+  EHStack.setCGF(this);
+
+  // TODO(CIR): SetFastMathFlags(CurFPFeatures);
+}
+
+CIRGenFunction::~CIRGenFunction() {
+  assert(LifetimeExtendedCleanupStack.empty() && "failed to emit a cleanup");
+  assert(DeferredDeactivationCleanupStack.empty() &&
+         "missed to deactivate a cleanup");
+
+  // TODO(cir): set function is finished.
+  assert(!MissingFeatures::openMPRuntime());
+
+  // If we have an OpenMPIRBuilder we want to finalize functions (incl.
+  // outlining etc) at some point. Doing it once the function codegen is done
+  // seems to be a reasonable spot. We do it here, as opposed to the deletion
+  // time of the CodeGenModule, because we have to ensure the IR has not yet
+  // been "emitted" to the outside, thus, modifications are still sensible.
+  assert(!MissingFeatures::openMPRuntime());
+}
+
+clang::ASTContext &CIRGenFunction::getContext() const {
+  return CGM.getASTContext();
+}
+
+mlir::Type CIRGenFunction::ConvertType(QualType T) {
+  return CGM.getTypes().ConvertType(T);
+}
+
+TypeEvaluationKind CIRGenFunction::getEvaluationKind(QualType type) {
+  type = type.getCanonicalType();
+  while (true) {
+    switch (type->getTypeClass()) {
+#define TYPE(name, parent)
+#define ABSTRACT_TYPE(name, parent)
+#define NON_CANONICAL_TYPE(name, parent) case Type::name:
+#define DEPENDENT_TYPE(name, parent) case Type::name:
+#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(name, parent) case Type::name:
+#include "clang/AST/TypeNodes.inc"
+      llvm_unreachable("non-canonical or dependent type in IR-generation");
+
+    case Type::ArrayParameter:
+      llvm_unreachable("NYI");
+
+    case Type::Auto:
+    case Type::DeducedTemplateSpecialization:
+      llvm_unreachable("undeduced type in IR-generation");
+
+    // Various scalar types.
+    case Type::Builtin:
+    case Type::Pointer:
+    case Type::BlockPointer:
+    case Type::LValueReference:
+    case Type::RValueReference:
+    case Type::MemberPointer:
+    case Type::Vector:
+    case Type::ExtVector:
+    case Type::ConstantMatrix:
+    case Type::FunctionProto:
+    case Type::FunctionNoProto:
+    case Type::Enum:
+    case Type::ObjCObjectPointer:
+    case Type::Pipe:
+    case Type::BitInt:
+      return TEK_Scalar;
+
+    // Complexes.
+    case Type::Complex:
+      return TEK_Complex;
+
+    // Arrays, records, and Objective-C objects.
+    case Type::ConstantArray:
+    case Type::IncompleteArray:
+    case Type::VariableArray:
+    case Type::Record:
+    case Type::ObjCObject:
+    case Type::ObjCInterface:
+      return TEK_Aggregate;
+
+    // We operate on atomic values according to their underlying type.
+    case Type::Atomic:
+      type = cast<AtomicType>(type)->getValueType();
+      continue;
+    }
+    llvm_unreachable("unknown type kind!");
+  }
+}
+
+mlir::Type CIRGenFunction::convertTypeForMem(QualType T) {
+  return CGM.getTypes().convertTypeForMem(T);
+}
+
+mlir::Type CIRGenFunction::convertType(QualType T) {
+  return CGM.getTypes().ConvertType(T);
+}
+
+mlir::Location CIRGenFunction::getLoc(SourceLocation SLoc) {
+  // Some AST nodes might contain invalid source locations (e.g.
+  // CXXDefaultArgExpr), workaround that to still get something out.
+  if (SLoc.isValid()) {
+    const SourceManager &SM = getContext().getSourceManager();
+    PresumedLoc PLoc = SM.getPresumedLoc(SLoc);
+    StringRef Filename = PLoc.getFilename();
+    return mlir::FileLineColLoc::get(builder.getStringAttr(Filename),
+                                     PLoc.getLine(), PLoc.getColumn());
+  } else {
+    // Do our best...
+    assert(currSrcLoc && "expected to inherit some source location");
+    return *currSrcLoc;
+  }
+}
+
+mlir::Location CIRGenFunction::getLoc(SourceRange SLoc) {
+  // Some AST nodes might contain invalid source locations (e.g.
+  // CXXDefaultArgExpr), workaround that to still get something out.
+  if (SLoc.isValid()) {
+    mlir::Location B = getLoc(SLoc.getBegin());
+    mlir::Location E = getLoc(SLoc.getEnd());
+    SmallVector<mlir::Location, 2> locs = {B, E};
+    mlir::Attribute metadata;
+    return mlir::FusedLoc::get(locs, metadata, builder.getContext());
+  } else if (currSrcLoc) {
+    return *currSrcLoc;
+  }
+
+  // We're brave, but time to give up.
+  return builder.getUnknownLoc();
+}
+
+mlir::Location CIRGenFunction::getLoc(mlir::Location lhs, mlir::Location rhs) {
+  SmallVector<mlir::Location, 2> locs = {lhs, rhs};
+  mlir::Attribute metadata;
+  return mlir::FusedLoc::get(locs, metadata, builder.getContext());
+}
+
+/// Return true if the statement contains a label in it.  If
+/// this statement is not executed normally, it not containing a label means
+/// that we can just remove the code.
+bool CIRGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) {
+  // Null statement, not a label!
+  if (!S)
+    return false;
+
+  // If this is a label, we have to emit the code, consider something like:
+  // if (0) {  ...  foo:  bar(); }  goto foo;
+  //
+  // TODO: If anyone cared, we could track __label__'s, since we know that you
+  // can't jump to one from outside their declared region.
+  if (isa<LabelStmt>(S))
+    return true;
+
+  // If this is a case/default statement, and we haven't seen a switch, we
+  // have to emit the code.
+  if (isa<SwitchCase>(S) && !IgnoreCaseStmts)
+    return true;
+
+  // If this is a switch statement, we want to ignore cases below it.
+  if (isa<SwitchStmt>(S))
+    IgnoreCaseStmts = true;
+
+  // Scan subexpressions for verboten labels.
+  for (const Stmt *SubStmt : S->children())
+    if (ContainsLabel(SubStmt, IgnoreCaseStmts))
+      return true;
+
+  return false;
+}
+
+bool CIRGenFunction::sanitizePerformTypeCheck() const {
+  return SanOpts.has(SanitizerKind::Null) ||
+         SanOpts.has(SanitizerKind::Alignment) ||
+         SanOpts.has(SanitizerKind::ObjectSize) ||
+         SanOpts.has(SanitizerKind::Vptr);
+}
+
+void CIRGenFunction::buildTypeCheck(TypeCheckKind TCK,
+                                    clang::SourceLocation Loc, mlir::Value V,
+                                    clang::QualType Type,
+                                    clang::CharUnits Alignment,
+                                    clang::SanitizerSet SkippedChecks,
+                                    std::optional<mlir::Value> ArraySize) {
+  if (!sanitizePerformTypeCheck())
+    return;
+
+  assert(false && "type check NYI");
+}
+
+/// If the specified expression does not fold
+/// to a constant, or if it does but contains a label, return false.  If it
+/// constant folds return true and set the folded value.
+bool CIRGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
+                                                  llvm::APSInt &ResultInt,
+                                                  bool AllowLabels) {
+  // FIXME: Rename and handle conversion of other evaluatable things
+  // to bool.
+  Expr::EvalResult Result;
+  if (!Cond->EvaluateAsInt(Result, getContext()))
+    return false; // Not foldable, not integer or not fully evaluatable.
+
+  llvm::APSInt Int = Result.Val.getInt();
+  if (!AllowLabels && ContainsLabel(Cond))
+    return false; // Contains a label.
+
+  ResultInt = Int;
+  return true;
+}
+
+mlir::Type CIRGenFunction::getCIRType(const QualType &type) {
+  return CGM.getCIRType(type);
+}
+
+/// Determine whether the function F ends with a return stmt.
+static bool endsWithReturn(const Decl *F) {
+  const Stmt *Body = nullptr;
+  if (auto *FD = dyn_cast_or_null<FunctionDecl>(F))
+    Body = FD->getBody();
+  else if (auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(F))
+    llvm_unreachable("NYI");
+
+  if (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) {
+    auto LastStmt = CS->body_rbegin();
+    if (LastStmt != CS->body_rend())
+      return isa<ReturnStmt>(*LastStmt);
+  }
+  return false;
+}
+
+void CIRGenFunction::buildAndUpdateRetAlloca(QualType ty, mlir::Location loc,
+                                             CharUnits alignment) {
+
+  if (ty->isVoidType()) {
+    // Void type; nothing to return.
+    ReturnValue = Address::invalid();
+
+    // Count the implicit return.
+    if (!endsWithReturn(CurFuncDecl))
+      ++NumReturnExprs;
+  } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect) {
+    // TODO(CIR): Consider this implementation in CIRtoLLVM
+    llvm_unreachable("NYI");
+    // TODO(CIR): Consider this implementation in CIRtoLLVM
+  } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::InAlloca) {
+    llvm_unreachable("NYI");
+  } else {
+    auto addr = buildAlloca("__retval", ty, loc, alignment);
+    FnRetAlloca = addr;
+    ReturnValue = Address(addr, alignment);
+
+    // Tell the epilog emitter to autorelease the result. We do this now so
+    // that various specialized functions can suppress it during their IR -
+    // generation
+    if (getLangOpts().ObjCAutoRefCount)
+      llvm_unreachable("NYI");
+  }
+}
+
+mlir::LogicalResult CIRGenFunction::declare(const Decl *var, QualType ty,
+                                            mlir::Location loc,
+                                            CharUnits alignment,
+                                            mlir::Value &addr, bool isParam) {
+  const auto *namedVar = dyn_cast_or_null<NamedDecl>(var);
+  assert(namedVar && "Needs a named decl");
+  assert(!symbolTable.count(var) && "not supposed to be available just yet");
+
+  addr = buildAlloca(namedVar->getName(), ty, loc, alignment);
+  if (isParam) {
+    auto allocaOp = cast<mlir::cir::AllocaOp>(addr.getDefiningOp());
+    allocaOp.setInitAttr(mlir::UnitAttr::get(builder.getContext()));
+  }
+
+  symbolTable.insert(var, addr);
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRGenFunction::declare(Address addr, const Decl *var,
+                                            QualType ty, mlir::Location loc,
+                                            CharUnits alignment,
+                                            mlir::Value &addrVal,
+                                            bool isParam) {
+  const auto *namedVar = dyn_cast_or_null<NamedDecl>(var);
+  assert(namedVar && "Needs a named decl");
+  assert(!symbolTable.count(var) && "not supposed to be available just yet");
+
+  addrVal = addr.getPointer();
+  if (isParam) {
+    auto allocaOp = cast<mlir::cir::AllocaOp>(addrVal.getDefiningOp());
+    allocaOp.setInitAttr(mlir::UnitAttr::get(builder.getContext()));
+  }
+
+  symbolTable.insert(var, addrVal);
+  return mlir::success();
+}
+
+/// All scope related cleanup needed:
+/// - Patching up unsolved goto's.
+/// - Build all cleanup code and insert yield/returns.
+void CIRGenFunction::LexicalScope::cleanup() {
+  auto &builder = CGF.builder;
+  auto *localScope = CGF.currLexScope;
+
+  auto applyCleanup = [&]() {
+    if (PerformCleanup) {
+      // ApplyDebugLocation
+      assert(!MissingFeatures::generateDebugInfo());
+      ForceCleanup();
+    }
+  };
+
+  // Cleanup are done right before codegen resume a scope. This is where
+  // objects are destroyed.
+  unsigned curLoc = 0;
+  for (auto *retBlock : localScope->getRetBlocks()) {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+    builder.setInsertionPointToEnd(retBlock);
+    mlir::Location retLoc = *localScope->getRetLocs()[curLoc];
+    curLoc++;
+    (void)buildReturn(retLoc);
+  }
+
+  auto insertCleanupAndLeave = [&](mlir::Block *InsPt) {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+    builder.setInsertionPointToEnd(InsPt);
+
+    // Leverage and defers to RunCleanupsScope's dtor and scope handling.
+    applyCleanup();
+
+    if (localScope->Depth == 0) {
+      buildImplicitReturn();
+      return;
+    }
+
+    // End of any local scope != function
+    // Ternary ops have to deal with matching arms for yielding types
+    // and do return a value, it must do its own cir.yield insertion.
+    if (!localScope->isTernary()) {
+      !retVal ? builder.create<YieldOp>(localScope->EndLoc)
+              : builder.create<YieldOp>(localScope->EndLoc, retVal);
+    }
+  };
+
+  // If a cleanup block has been created at some point, branch to it
+  // and set the insertion point to continue at the cleanup block.
+  // Terminators are then inserted either in the cleanup block or
+  // inline in this current block.
+  auto *cleanupBlock = localScope->getCleanupBlock(builder);
+  if (cleanupBlock)
+    insertCleanupAndLeave(cleanupBlock);
+
+  // Now deal with any pending block wrap up like implicit end of
+  // scope.
+
+  // If a terminator is already present in the current block, nothing
+  // else to do here.
+  auto *currBlock = builder.getBlock();
+  if (isGlobalInit() && !currBlock)
+    return;
+  if (currBlock->mightHaveTerminator() && currBlock->getTerminator())
+    return;
+
+  // An empty non-entry block has nothing to offer, and since this is
+  // synthetic, losing information does not affect anything.
+  bool entryBlock = builder.getInsertionBlock()->isEntryBlock();
+  if (!entryBlock && currBlock->empty()) {
+    currBlock->erase();
+    // Remove unused cleanup blocks.
+    if (cleanupBlock && cleanupBlock->hasNoPredecessors())
+      cleanupBlock->erase();
+    // FIXME(cir): ideally we should call applyCleanup() before we
+    // get into this condition and emit the proper cleanup. This is
+    // needed to get nrvo to interop with dtor logic.
+    PerformCleanup = false;
+    return;
+  }
+
+  // If there's a cleanup block, branch to it, nothing else to do.
+  if (cleanupBlock) {
+    builder.create<BrOp>(currBlock->back().getLoc(), cleanupBlock);
+    return;
+  }
+
+  // No pre-existent cleanup block, emit cleanup code and yield/return.
+  insertCleanupAndLeave(currBlock);
+}
+
+mlir::cir::ReturnOp
+CIRGenFunction::LexicalScope::buildReturn(mlir::Location loc) {
+  auto &builder = CGF.getBuilder();
+
+  // If we are on a coroutine, add the coro_end builtin call.
+  auto Fn = dyn_cast<mlir::cir::FuncOp>(CGF.CurFn);
+  assert(Fn && "other callables NYI");
+  if (Fn.getCoroutine())
+    CGF.buildCoroEndBuiltinCall(
+        loc, builder.getNullPtr(builder.getVoidPtrTy(), loc));
+
+  if (CGF.FnRetCIRTy.has_value()) {
+    // If there's anything to return, load it first.
+    auto val = builder.create<LoadOp>(loc, *CGF.FnRetCIRTy, *CGF.FnRetAlloca);
+    return builder.create<ReturnOp>(loc, llvm::ArrayRef(val.getResult()));
+  }
+  return builder.create<ReturnOp>(loc);
+}
+
+void CIRGenFunction::LexicalScope::buildImplicitReturn() {
+  auto &builder = CGF.getBuilder();
+  auto *localScope = CGF.currLexScope;
+
+  const auto *FD = cast<clang::FunctionDecl>(CGF.CurGD.getDecl());
+
+  // C++11 [stmt.return]p2:
+  //   Flowing off the end of a function [...] results in undefined behavior
+  //   in a value-returning function.
+  // C11 6.9.1p12:
+  //   If the '}' that terminates a function is reached, and the value of the
+  //   function call is used by the caller, the behavior is undefined.
+  if (CGF.getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() &&
+      !CGF.SawAsmBlock && !FD->getReturnType()->isVoidType() &&
+      builder.getInsertionBlock()) {
+    bool shouldEmitUnreachable = CGF.CGM.getCodeGenOpts().StrictReturn ||
+                                 !CGF.CGM.MayDropFunctionReturn(
+                                     FD->getASTContext(), FD->getReturnType());
+
+    if (CGF.SanOpts.has(SanitizerKind::Return)) {
+      assert(!MissingFeatures::sanitizerReturn());
+      llvm_unreachable("NYI");
+    } else if (shouldEmitUnreachable) {
+      if (CGF.CGM.getCodeGenOpts().OptimizationLevel == 0) {
+        builder.create<mlir::cir::TrapOp>(localScope->EndLoc);
+        builder.clearInsertionPoint();
+        return;
+      }
+    }
+
+    if (CGF.SanOpts.has(SanitizerKind::Return) || shouldEmitUnreachable) {
+      builder.create<mlir::cir::UnreachableOp>(localScope->EndLoc);
+      builder.clearInsertionPoint();
+      return;
+    }
+  }
+
+  (void)buildReturn(localScope->EndLoc);
+}
+
+mlir::cir::TryOp CIRGenFunction::LexicalScope::getClosestTryParent() {
+  auto *scope = this;
+  while (scope) {
+    if (scope->isTry())
+      return scope->getTry();
+    scope = scope->ParentScope;
+  }
+  return nullptr;
+}
+
+void CIRGenFunction::finishFunction(SourceLocation EndLoc) {
+  // CIRGen doesn't use a BreakContinueStack or evaluates OnlySimpleReturnStmts.
+
+  // Usually the return expression is evaluated before the cleanup
+  // code.  If the function contains only a simple return statement,
+  // such as a constant, the location before the cleanup code becomes
+  // the last useful breakpoint in the function, because the simple
+  // return expression will be evaluated after the cleanup code. To be
+  // safe, set the debug location for cleanup code to the location of
+  // the return statement.  Otherwise the cleanup code should be at the
+  // end of the function's lexical scope.
+  //
+  // If there are multiple branches to the return block, the branch
+  // instructions will get the location of the return statements and
+  // all will be fine.
+  if (auto *DI = getDebugInfo())
+    assert(!MissingFeatures::generateDebugInfo() && "NYI");
+
+  // Pop any cleanups that might have been associated with the
+  // parameters.  Do this in whatever block we're currently in; it's
+  // important to do this before we enter the return block or return
+  // edges will be *really* confused.
+  bool HasCleanups = EHStack.stable_begin() != PrologueCleanupDepth;
+  if (HasCleanups) {
+    // Make sure the line table doesn't jump back into the body for
+    // the ret after it's been at EndLoc.
+    if (auto *DI = getDebugInfo())
+      assert(!MissingFeatures::generateDebugInfo() && "NYI");
+    // FIXME(cir): vla.c test currently crashes here.
+    // PopCleanupBlocks(PrologueCleanupDepth);
+  }
+
+  // Emit function epilog (to return).
+
+  // Original LLVM codegen does EmitReturnBlock() here, CIRGen handles
+  // this as part of LexicalScope instead, given CIR might have multiple
+  // blocks with `cir.return`.
+  if (ShouldInstrumentFunction()) {
+    assert(!MissingFeatures::shouldInstrumentFunction() && "NYI");
+  }
+
+  // Emit debug descriptor for function end.
+  if (auto *DI = getDebugInfo())
+    assert(!MissingFeatures::generateDebugInfo() && "NYI");
+
+  // Reset the debug location to that of the simple 'return' expression, if any
+  // rather than that of the end of the function's scope '}'.
+  assert(!MissingFeatures::generateDebugInfo() && "NYI");
+
+  assert(!MissingFeatures::emitFunctionEpilog() && "NYI");
+  assert(!MissingFeatures::emitEndEHSpec() && "NYI");
+
+  // FIXME(cir): vla.c test currently crashes here.
+  // assert(EHStack.empty() && "did not remove all scopes from cleanup stack!");
+
+  // If someone did an indirect goto, emit the indirect goto block at the end of
+  // the function.
+  assert(!MissingFeatures::indirectBranch() && "NYI");
+
+  // If some of our locals escaped, insert a call to llvm.localescape in the
+  // entry block.
+  assert(!MissingFeatures::escapedLocals() && "NYI");
+
+  // If someone took the address of a label but never did an indirect goto, we
+  // made a zero entry PHI node, which is illegal, zap it now.
+  assert(!MissingFeatures::indirectBranch() && "NYI");
+
+  // CIRGen doesn't need to emit EHResumeBlock, TerminateLandingPad,
+  // TerminateHandler, UnreachableBlock, TerminateFunclets, NormalCleanupDest
+  // here because the basic blocks aren't shared.
+
+  assert(!MissingFeatures::emitDeclMetadata() && "NYI");
+  assert(!MissingFeatures::deferredReplacements() && "NYI");
+
+  // Add the min-legal-vector-width attribute. This contains the max width from:
+  // 1. min-vector-width attribute used in the source program.
+  // 2. Any builtins used that have a vector width specified.
+  // 3. Values passed in and out of inline assembly.
+  // 4. Width of vector arguments and return types for this function.
+  // 5. Width of vector arguments and return types for functions called by
+  // this function.
+  assert(!MissingFeatures::minLegalVectorWidthAttr() && "NYI");
+
+  // Add vscale_range attribute if appropriate.
+  assert(!MissingFeatures::vscaleRangeAttr() && "NYI");
+
+  // In traditional LLVM codegen, if clang generated an unreachable return
+  // block, it'd be deleted now. Same for unused ret allocas from ReturnValue
+}
+
+mlir::cir::FuncOp
+CIRGenFunction::generateCode(clang::GlobalDecl GD, mlir::cir::FuncOp Fn,
+                             const CIRGenFunctionInfo &FnInfo) {
+  assert(Fn && "generating code for a null function");
+  const auto FD = cast<FunctionDecl>(GD.getDecl());
+  CurGD = GD;
+
+  FnRetQualTy = FD->getReturnType();
+  if (!FnRetQualTy->isVoidType())
+    FnRetCIRTy = getCIRType(FnRetQualTy);
+
+  FunctionArgList Args;
+  QualType ResTy = buildFunctionArgList(GD, Args);
+
+  if (FD->isInlineBuiltinDeclaration()) {
+    llvm_unreachable("NYI");
+  } else {
+    // Detect the unusual situation where an inline version is shadowed by a
+    // non-inline version. In that case we should pick the external one
+    // everywhere. That's GCC behavior too. Unfortunately, I cannot find a way
+    // to detect that situation before we reach codegen, so do some late
+    // replacement.
+    for (const auto *PD = FD->getPreviousDecl(); PD;
+         PD = PD->getPreviousDecl()) {
+      if (LLVM_UNLIKELY(PD->isInlineBuiltinDeclaration())) {
+        llvm_unreachable("NYI");
+      }
+    }
+  }
+
+  // Check if we should generate debug info for this function.
+  if (FD->hasAttr<NoDebugAttr>()) {
+    assert(!MissingFeatures::noDebugInfo());
+  }
+
+  // The function might not have a body if we're generating thunks for a
+  // function declaration.
+  SourceRange BodyRange;
+  if (Stmt *Body = FD->getBody())
+    BodyRange = Body->getSourceRange();
+  else
+    BodyRange = FD->getLocation();
+  // TODO: CurEHLocation
+
+  // Use the location of the start of the function to determine where the
+  // function definition is located. By default we use the location of the
+  // declaration as the location for the subprogram. A function may lack a
+  // declaration in the source code if it is created by code gen. (examples:
+  // _GLOBAL__I_a, __cxx_global_array_dtor, thunk).
+  SourceLocation Loc = FD->getLocation();
+
+  // If this is a function specialization then use the pattern body as the
+  // location for the function.
+  if (const auto *SpecDecl = FD->getTemplateInstantiationPattern())
+    if (SpecDecl->hasBody(SpecDecl))
+      Loc = SpecDecl->getLocation();
+
+  Stmt *Body = FD->getBody();
+
+  if (Body) {
+    // LLVM codegen: Coroutines always emit lifetime markers
+    // Hide this under request for lifetime emission so that we can write
+    // tests when the time comes, but CIR should be intrinsically scope
+    // accurate, so no need to tie coroutines to such markers.
+    if (isa<CoroutineBodyStmt>(Body))
+      assert(!MissingFeatures::shouldEmitLifetimeMarkers() && "NYI");
+
+    // Initialize helper which will detect jumps which can cause invalid
+    // lifetime markers.
+    if (ShouldEmitLifetimeMarkers)
+      assert(!MissingFeatures::shouldEmitLifetimeMarkers() && "NYI");
+  }
+
+  // Create a scope in the symbol table to hold variable declarations.
+  SymTableScopeTy varScope(symbolTable);
+  // Compiler synthetized functions might have invalid slocs...
+  auto bSrcLoc = FD->getBody()->getBeginLoc();
+  auto eSrcLoc = FD->getBody()->getEndLoc();
+  auto unknownLoc = builder.getUnknownLoc();
+
+  auto FnBeginLoc = bSrcLoc.isValid() ? getLoc(bSrcLoc) : unknownLoc;
+  auto FnEndLoc = eSrcLoc.isValid() ? getLoc(eSrcLoc) : unknownLoc;
+  const auto fusedLoc =
+      mlir::FusedLoc::get(builder.getContext(), {FnBeginLoc, FnEndLoc});
+  SourceLocRAIIObject fnLoc{*this, Loc.isValid() ? getLoc(Loc) : unknownLoc};
+
+  assert(Fn.isDeclaration() && "Function already has body?");
+  mlir::Block *EntryBB = Fn.addEntryBlock();
+  builder.setInsertionPointToStart(EntryBB);
+
+  {
+    // Initialize lexical scope information.
+    LexicalScope lexScope{*this, fusedLoc, EntryBB};
+
+    // Emit the standard function prologue.
+    StartFunction(GD, ResTy, Fn, FnInfo, Args, Loc, BodyRange.getBegin());
+
+    // Save parameters for coroutine function.
+    if (Body && isa_and_nonnull<CoroutineBodyStmt>(Body))
+      llvm::append_range(FnArgs, FD->parameters());
+
+    // Generate the body of the function.
+    // TODO: PGO.assignRegionCounters
+    if (isa<CXXDestructorDecl>(FD))
+      buildDestructorBody(Args);
+    else if (isa<CXXConstructorDecl>(FD))
+      buildConstructorBody(Args);
+    else if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice &&
+             FD->hasAttr<CUDAGlobalAttr>())
+      llvm_unreachable("NYI");
+    else if (isa<CXXMethodDecl>(FD) &&
+             cast<CXXMethodDecl>(FD)->isLambdaStaticInvoker()) {
+      // The lambda static invoker function is special, because it forwards or
+      // clones the body of the function call operator (but is actually
+      // static).
+      buildLambdaStaticInvokeBody(cast<CXXMethodDecl>(FD));
+    } else if (FD->isDefaulted() && isa<CXXMethodDecl>(FD) &&
+               (cast<CXXMethodDecl>(FD)->isCopyAssignmentOperator() ||
+                cast<CXXMethodDecl>(FD)->isMoveAssignmentOperator())) {
+      // Implicit copy-assignment gets the same special treatment as implicit
+      // copy-constructors.
+      buildImplicitAssignmentOperatorBody(Args);
+    } else if (Body) {
+      if (mlir::failed(buildFunctionBody(Body))) {
+        Fn.erase();
+        return nullptr;
+      }
+    } else
+      llvm_unreachable("no definition for emitted function");
+
+    assert(builder.getInsertionBlock() && "Should be valid");
+  }
+
+  if (mlir::failed(Fn.verifyBody()))
+    return nullptr;
+
+  // Emit the standard function epilogue.
+  finishFunction(BodyRange.getEnd());
+
+  // If we haven't marked the function nothrow through other means, do a quick
+  // pass now to see if we can.
+  assert(!MissingFeatures::tryMarkNoThrow());
+
+  return Fn;
+}
+
+mlir::Value CIRGenFunction::createLoad(const VarDecl *VD, const char *Name) {
+  auto addr = GetAddrOfLocalVar(VD);
+  return builder.create<LoadOp>(getLoc(VD->getLocation()),
+                                addr.getElementType(), addr.getPointer());
+}
+
+static bool isMemcpyEquivalentSpecialMember(const CXXMethodDecl *D) {
+  auto *CD = llvm::dyn_cast<CXXConstructorDecl>(D);
+  if (!(CD && CD->isCopyOrMoveConstructor()) &&
+      !D->isCopyAssignmentOperator() && !D->isMoveAssignmentOperator())
+    return false;
+
+  // We can emit a memcpy for a trivial copy or move constructor/assignment
+  if (D->isTrivial() && !D->getParent()->mayInsertExtraPadding())
+    return true;
+
+  if (D->getParent()->isUnion() && D->isDefaulted())
+    return true;
+
+  return false;
+}
+
+void CIRGenFunction::buildCXXConstructorCall(const clang::CXXConstructorDecl *D,
+                                             clang::CXXCtorType Type,
+                                             bool ForVirtualBase,
+                                             bool Delegating,
+                                             AggValueSlot ThisAVS,
+                                             const clang::CXXConstructExpr *E) {
+  CallArgList Args;
+  Address This = ThisAVS.getAddress();
+  LangAS SlotAS = ThisAVS.getQualifiers().getAddressSpace();
+  QualType ThisType = D->getThisType();
+  LangAS ThisAS = ThisType.getTypePtr()->getPointeeType().getAddressSpace();
+  mlir::Value ThisPtr = This.getPointer();
+
+  assert(SlotAS == ThisAS && "This edge case NYI");
+
+  Args.add(RValue::get(ThisPtr), D->getThisType());
+
+  // In LLVM Codegen: If this is a trivial constructor, just emit what's needed.
+  // If this is a union copy constructor, we must emit a memcpy, because the AST
+  // does not model that copy.
+  if (isMemcpyEquivalentSpecialMember(D)) {
+    assert(!MissingFeatures::isMemcpyEquivalentSpecialMember());
+  }
+
+  const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
+  EvaluationOrder Order = E->isListInitialization()
+                              ? EvaluationOrder::ForceLeftToRight
+                              : EvaluationOrder::Default;
+
+  buildCallArgs(Args, FPT, E->arguments(), E->getConstructor(),
+                /*ParamsToSkip*/ 0, Order);
+
+  buildCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args,
+                          ThisAVS.mayOverlap(), E->getExprLoc(),
+                          ThisAVS.isSanitizerChecked());
+}
+
+void CIRGenFunction::buildCXXConstructorCall(
+    const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase,
+    bool Delegating, Address This, CallArgList &Args,
+    AggValueSlot::Overlap_t Overlap, SourceLocation Loc,
+    bool NewPointerIsChecked) {
+
+  const auto *ClassDecl = D->getParent();
+
+  if (!NewPointerIsChecked)
+    buildTypeCheck(CIRGenFunction::TCK_ConstructorCall, Loc, This.getPointer(),
+                   getContext().getRecordType(ClassDecl), CharUnits::Zero());
+
+  // If this is a call to a trivial default constructor:
+  // In LLVM: do nothing.
+  // In CIR: emit as a regular call, other later passes should lower the
+  // ctor call into trivial initialization.
+  assert(!MissingFeatures::isTrivialAndisDefaultConstructor());
+
+  if (isMemcpyEquivalentSpecialMember(D)) {
+    assert(!MissingFeatures::isMemcpyEquivalentSpecialMember());
+  }
+
+  bool PassPrototypeArgs = true;
+
+  assert(!D->getInheritedConstructor() && "inheritance NYI");
+
+  // Insert any ABI-specific implicit constructor arguments.
+  CIRGenCXXABI::AddedStructorArgCounts ExtraArgs =
+      CGM.getCXXABI().addImplicitConstructorArgs(*this, D, Type, ForVirtualBase,
+                                                 Delegating, Args);
+
+  // Emit the call.
+  auto CalleePtr = CGM.getAddrOfCXXStructor(GlobalDecl(D, Type));
+  const CIRGenFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall(
+      Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs);
+  CIRGenCallee Callee = CIRGenCallee::forDirect(CalleePtr, GlobalDecl(D, Type));
+  mlir::cir::CIRCallOpInterface C;
+  buildCall(Info, Callee, ReturnValueSlot(), Args, &C, false, getLoc(Loc));
+
+  assert(CGM.getCodeGenOpts().OptimizationLevel == 0 ||
+         ClassDecl->isDynamicClass() || Type == Ctor_Base ||
+         !CGM.getCodeGenOpts().StrictVTablePointers &&
+             "vtable assumption loads NYI");
+}
+
+void CIRGenFunction::buildConstructorBody(FunctionArgList &Args) {
+  // TODO: EmitAsanPrologueOrEpilogue(true);
+  const auto *Ctor = cast<CXXConstructorDecl>(CurGD.getDecl());
+  auto CtorType = CurGD.getCtorType();
+
+  assert((CGM.getTarget().getCXXABI().hasConstructorVariants() ||
+          CtorType == Ctor_Complete) &&
+         "can only generate complete ctor for this ABI");
+
+  // Before we go any further, try the complete->base constructor delegation
+  // optimization.
+  if (CtorType == Ctor_Complete && IsConstructorDelegationValid(Ctor) &&
+      CGM.getTarget().getCXXABI().hasConstructorVariants()) {
+    buildDelegateCXXConstructorCall(Ctor, Ctor_Base, Args, Ctor->getEndLoc());
+    return;
+  }
+
+  const FunctionDecl *Definition = nullptr;
+  Stmt *Body = Ctor->getBody(Definition);
+  assert(Definition == Ctor && "emitting wrong constructor body");
+
+  // Enter the function-try-block before the constructor prologue if
+  // applicable.
+  bool IsTryBody = (Body && isa<CXXTryStmt>(Body));
+  if (IsTryBody)
+    llvm_unreachable("NYI");
+
+  // TODO: incrementProfileCounter
+
+  // TODO: RunClenaupCcope RunCleanups(*this);
+
+  // TODO: in restricted cases, we can emit the vbase initializers of a
+  // complete ctor and then delegate to the base ctor.
+
+  // Emit the constructor prologue, i.e. the base and member initializers.
+  buildCtorPrologue(Ctor, CtorType, Args);
+
+  // Emit the body of the statement.
+  if (IsTryBody)
+    llvm_unreachable("NYI");
+  else {
+    // TODO: propagate this result via mlir::logical result. Just unreachable
+    // now just to have it handled.
+    if (mlir::failed(buildStmt(Body, true)))
+      llvm_unreachable("NYI");
+  }
+
+  // Emit any cleanup blocks associated with the member or base initializers,
+  // which inlcudes (along the exceptional path) the destructors for those
+  // members and bases that were fully constructed.
+  /// TODO: RunCleanups.ForceCleanup();
+
+  if (IsTryBody)
+    llvm_unreachable("NYI");
+}
+
+/// Given a value of type T* that may not be to a complete object, construct
+/// an l-vlaue withi the natural pointee alignment of T.
+LValue CIRGenFunction::MakeNaturalAlignPointeeAddrLValue(mlir::Value val,
+                                                         QualType ty) {
+  // FIXME(cir): is it safe to assume Op->getResult(0) is valid? Perhaps
+  // assert on the result type first.
+  LValueBaseInfo baseInfo;
+  TBAAAccessInfo tbaaInfo;
+  CharUnits align = CGM.getNaturalTypeAlignment(ty, &baseInfo, &tbaaInfo,
+                                                /* for PointeeType= */ true);
+  return makeAddrLValue(Address(val, align), ty, baseInfo);
+}
+
+LValue CIRGenFunction::MakeNaturalAlignAddrLValue(mlir::Value val,
+                                                  QualType ty) {
+  LValueBaseInfo baseInfo;
+  TBAAAccessInfo tbaaInfo;
+  assert(!MissingFeatures::tbaa());
+  CharUnits alignment = CGM.getNaturalTypeAlignment(ty, &baseInfo, &tbaaInfo);
+  Address addr(val, getTypes().convertTypeForMem(ty), alignment);
+  return LValue::makeAddr(addr, ty, getContext(), baseInfo, tbaaInfo);
+}
+
+// Map the LangOption for exception behavior into the corresponding enum in
+// the IR.
+cir::fp::ExceptionBehavior
+ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) {
+  switch (Kind) {
+  case LangOptions::FPE_Ignore:
+    return cir::fp::ebIgnore;
+  case LangOptions::FPE_MayTrap:
+    return cir::fp::ebMayTrap;
+  case LangOptions::FPE_Strict:
+    return cir::fp::ebStrict;
+  default:
+    llvm_unreachable("Unsupported FP Exception Behavior");
+  }
+}
+
+bool CIRGenFunction::ShouldSkipSanitizerInstrumentation() {
+  if (!CurFuncDecl)
+    return false;
+  return CurFuncDecl->hasAttr<DisableSanitizerInstrumentationAttr>();
+}
+
+/// Return true if the current function should be instrumented with XRay nop
+/// sleds.
+bool CIRGenFunction::ShouldXRayInstrumentFunction() const {
+  return CGM.getCodeGenOpts().XRayInstrumentFunctions;
+}
+
+static bool matchesStlAllocatorFn(const Decl *D, const ASTContext &Ctx) {
+  auto *MD = dyn_cast_or_null<CXXMethodDecl>(D);
+  if (!MD || !MD->getDeclName().getAsIdentifierInfo() ||
+      !MD->getDeclName().getAsIdentifierInfo()->isStr("allocate") ||
+      (MD->getNumParams() != 1 && MD->getNumParams() != 2))
+    return false;
+
+  if (MD->parameters()[0]->getType().getCanonicalType() != Ctx.getSizeType())
+    return false;
+
+  if (MD->getNumParams() == 2) {
+    auto *PT = MD->parameters()[1]->getType()->getAs<clang::PointerType>();
+    if (!PT || !PT->isVoidPointerType() ||
+        !PT->getPointeeType().isConstQualified())
+      return false;
+  }
+
+  return true;
+}
+
+void CIRGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
+                                   mlir::cir::FuncOp Fn,
+                                   const CIRGenFunctionInfo &FnInfo,
+                                   const FunctionArgList &Args,
+                                   SourceLocation Loc,
+                                   SourceLocation StartLoc) {
+  assert(!CurFn &&
+         "Do not use a CIRGenFunction object for more than one function");
+
+  const auto *D = GD.getDecl();
+
+  DidCallStackSave = false;
+  CurCodeDecl = D;
+  const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
+  if (FD && FD->usesSEHTry())
+    CurSEHParent = GD;
+  CurFuncDecl = (D ? D->getNonClosureContext() : nullptr);
+  FnRetTy = RetTy;
+  CurFn = Fn;
+  CurFnInfo = &FnInfo;
+
+  // If this function is ignored for any of the enabled sanitizers, disable
+  // the sanitizer for the function.
+  do {
+#define SANITIZER(NAME, ID)                                                    \
+  if (SanOpts.empty())                                                         \
+    break;                                                                     \
+  if (SanOpts.has(SanitizerKind::ID))                                          \
+    if (CGM.isInNoSanitizeList(SanitizerKind::ID, Fn, Loc))                    \
+      SanOpts.set(SanitizerKind::ID, false);
+
+#include "clang/Basic/Sanitizers.def"
+#undef SANITIZER
+  } while (0);
+
+  if (D) {
+    const bool SanitizeBounds = SanOpts.hasOneOf(SanitizerKind::Bounds);
+    SanitizerMask no_sanitize_mask;
+    bool NoSanitizeCoverage = false;
+
+    for (auto *Attr : D->specific_attrs<NoSanitizeAttr>()) {
+      no_sanitize_mask |= Attr->getMask();
+      // SanitizeCoverage is not handled by SanOpts.
+      if (Attr->hasCoverage())
+        NoSanitizeCoverage = true;
+    }
+
+    // Apply the no_sanitize* attributes to SanOpts.
+    SanOpts.Mask &= ~no_sanitize_mask;
+    if (no_sanitize_mask & SanitizerKind::Address)
+      SanOpts.set(SanitizerKind::KernelAddress, false);
+    if (no_sanitize_mask & SanitizerKind::KernelAddress)
+      SanOpts.set(SanitizerKind::Address, false);
+    if (no_sanitize_mask & SanitizerKind::HWAddress)
+      SanOpts.set(SanitizerKind::KernelHWAddress, false);
+    if (no_sanitize_mask & SanitizerKind::KernelHWAddress)
+      SanOpts.set(SanitizerKind::HWAddress, false);
+
+    // TODO(cir): set llvm::Attribute::NoSanitizeBounds
+    if (SanitizeBounds && !SanOpts.hasOneOf(SanitizerKind::Bounds))
+      assert(!MissingFeatures::sanitizeOther());
+
+    // TODO(cir): set llvm::Attribute::NoSanitizeCoverage
+    if (NoSanitizeCoverage && CGM.getCodeGenOpts().hasSanitizeCoverage())
+      assert(!MissingFeatures::sanitizeOther());
+
+    // Some passes need the non-negated no_sanitize attribute. Pass them on.
+    if (CGM.getCodeGenOpts().hasSanitizeBinaryMetadata()) {
+      // TODO(cir): set no_sanitize_thread
+      if (no_sanitize_mask & SanitizerKind::Thread)
+        assert(!MissingFeatures::sanitizeOther());
+    }
+  }
+
+  if (ShouldSkipSanitizerInstrumentation()) {
+    assert(!MissingFeatures::sanitizeOther());
+  } else {
+    // Apply sanitizer attributes to the function.
+    if (SanOpts.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress))
+      assert(!MissingFeatures::sanitizeOther());
+    if (SanOpts.hasOneOf(SanitizerKind::HWAddress |
+                         SanitizerKind::KernelHWAddress))
+      assert(!MissingFeatures::sanitizeOther());
+    if (SanOpts.has(SanitizerKind::MemtagStack))
+      assert(!MissingFeatures::sanitizeOther());
+    if (SanOpts.has(SanitizerKind::Thread))
+      assert(!MissingFeatures::sanitizeOther());
+    if (SanOpts.has(SanitizerKind::NumericalStability))
+      assert(!MissingFeatures::sanitizeOther());
+    if (SanOpts.hasOneOf(SanitizerKind::Memory | SanitizerKind::KernelMemory))
+      assert(!MissingFeatures::sanitizeOther());
+  }
+  if (SanOpts.has(SanitizerKind::SafeStack))
+    assert(!MissingFeatures::sanitizeOther());
+  if (SanOpts.has(SanitizerKind::ShadowCallStack))
+    assert(!MissingFeatures::sanitizeOther());
+
+  // Apply fuzzing attribute to the function.
+  if (SanOpts.hasOneOf(SanitizerKind::Fuzzer | SanitizerKind::FuzzerNoLink))
+    assert(!MissingFeatures::sanitizeOther());
+
+  // Ignore TSan memory acesses from within ObjC/ObjC++ dealloc, initialize,
+  // .cxx_destruct, __destroy_helper_block_ and all of their calees at run time.
+  if (SanOpts.has(SanitizerKind::Thread)) {
+    if (const auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(D)) {
+      llvm_unreachable("NYI");
+    }
+  }
+
+  // Ignore unrelated casts in STL allocate() since the allocator must cast
+  // from void* to T* before object initialization completes. Don't match on the
+  // namespace because not all allocators are in std::
+  if (D && SanOpts.has(SanitizerKind::CFIUnrelatedCast)) {
+    if (matchesStlAllocatorFn(D, getContext()))
+      SanOpts.Mask &= ~SanitizerKind::CFIUnrelatedCast;
+  }
+
+  // Ignore null checks in coroutine functions since the coroutines passes
+  // are not aware of how to move the extra UBSan instructions across the split
+  // coroutine boundaries.
+  if (D && SanOpts.has(SanitizerKind::Null))
+    if (FD && FD->getBody() &&
+        FD->getBody()->getStmtClass() == Stmt::CoroutineBodyStmtClass)
+      SanOpts.Mask &= ~SanitizerKind::Null;
+
+  // Apply xray attributes to the function (as a string, for now)
+  if (const auto *XRayAttr = D ? D->getAttr<XRayInstrumentAttr>() : nullptr) {
+    assert(!MissingFeatures::xray());
+  } else {
+    assert(!MissingFeatures::xray());
+  }
+
+  if (ShouldXRayInstrumentFunction()) {
+    assert(!MissingFeatures::xray());
+  }
+
+  if (CGM.getCodeGenOpts().getProfileInstr() != CodeGenOptions::ProfileNone) {
+    assert(!MissingFeatures::getProfileCount());
+  }
+
+  unsigned Count, Offset;
+  if (const auto *Attr =
+          D ? D->getAttr<PatchableFunctionEntryAttr>() : nullptr) {
+    llvm_unreachable("NYI");
+  } else {
+    Count = CGM.getCodeGenOpts().PatchableFunctionEntryCount;
+    Offset = CGM.getCodeGenOpts().PatchableFunctionEntryOffset;
+  }
+  if (Count && Offset <= Count) {
+    llvm_unreachable("NYI");
+  }
+
+  // Add no-jump-tables value.
+  if (CGM.getCodeGenOpts().NoUseJumpTables)
+    llvm_unreachable("NYI");
+
+  // Add no-inline-line-tables value.
+  if (CGM.getCodeGenOpts().NoInlineLineTables)
+    llvm_unreachable("NYI");
+
+  // Add profile-sample-accurate value.
+  if (CGM.getCodeGenOpts().ProfileSampleAccurate)
+    llvm_unreachable("NYI");
+
+  if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
+    llvm_unreachable("NYI");
+
+  if (D && D->hasAttr<CFICanonicalJumpTableAttr>())
+    llvm_unreachable("NYI");
+
+  if (D && D->hasAttr<NoProfileFunctionAttr>())
+    llvm_unreachable("NYI");
+
+  if (FD && getLangOpts().OpenCL) {
+    buildKernelMetadata(FD, Fn);
+  }
+
+  // If we are checking function types, emit a function type signature as
+  // prologue data.
+  if (FD && getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function)) {
+    llvm_unreachable("NYI");
+  }
+
+  // If we're checking nullability, we need to know whether we can check the
+  // return value. Initialize the falg to 'true' and refine it in
+  // buildParmDecl.
+  if (SanOpts.has(SanitizerKind::NullabilityReturn)) {
+    llvm_unreachable("NYI");
+  }
+
+  // If we're in C++ mode and the function name is "main", it is guaranteed to
+  // be norecurse by the standard (3.6.1.3 "The function main shall not be
+  // used within a program").
+  //
+  // OpenCL C 2.0 v2.2-11 s6.9.i:
+  //     Recursion is not supported.
+  //
+  // SYCL v1.2.1 s3.10:
+  //     kernels cannot include RTTI information, exception cases, recursive
+  //     code, virtual functions or make use of C++ libraries that are not
+  //     compiled for the device.
+  if (FD &&
+      ((getLangOpts().CPlusPlus && FD->isMain()) || getLangOpts().OpenCL ||
+       getLangOpts().SYCLIsDevice |
+           (getLangOpts().CUDA && FD->hasAttr<CUDAGlobalAttr>())))
+    ; // TODO: support norecurse attr
+
+  llvm::RoundingMode RM = getLangOpts().getDefaultRoundingMode();
+  cir::fp::ExceptionBehavior FPExceptionBehavior =
+      ToConstrainedExceptMD(getLangOpts().getDefaultExceptionMode());
+  builder.setDefaultConstrainedRounding(RM);
+  builder.setDefaultConstrainedExcept(FPExceptionBehavior);
+  if ((FD && (FD->UsesFPIntrin() || FD->hasAttr<StrictFPAttr>())) ||
+      (!FD && (FPExceptionBehavior != cir::fp::ebIgnore ||
+               RM != llvm::RoundingMode::NearestTiesToEven))) {
+    llvm_unreachable("NYI");
+  }
+
+  // TODO: stackrealign attr
+
+  mlir::Block *EntryBB = &Fn.getBlocks().front();
+
+  // TODO: allocapt insertion? probably don't need for CIR
+
+  // TODO: return value checking
+
+  if (getDebugInfo()) {
+    llvm_unreachable("NYI");
+  }
+
+  if (ShouldInstrumentFunction()) {
+    llvm_unreachable("NYI");
+  }
+
+  // Since emitting the mcount call here impacts optimizations such as
+  // function inlining, we just add an attribute to insert a mcount call in
+  // backend. The attribute "counting-function" is set to mcount function name
+  // which is architecture dependent.
+  if (CGM.getCodeGenOpts().InstrumentForProfiling) {
+    llvm_unreachable("NYI");
+  }
+
+  if (CGM.getCodeGenOpts().PackedStack) {
+    llvm_unreachable("NYI");
+  }
+
+  if (CGM.getCodeGenOpts().WarnStackSize != UINT_MAX) {
+    llvm_unreachable("NYI");
+  }
+
+  assert(!MissingFeatures::emitStartEHSpec() && "NYI");
+  // FIXME(cir): vla.c test currently crashes here.
+  // PrologueCleanupDepth = EHStack.stable_begin();
+
+  if (getLangOpts().OpenMP && CurCodeDecl)
+    CGM.getOpenMPRuntime().emitFunctionProlog(*this, CurCodeDecl);
+
+  // TODO: buildFunctionProlog
+
+  {
+    // Set the insertion point in the builder to the beginning of the
+    // function body, it will be used throughout the codegen to create
+    // operations in this function.
+    builder.setInsertionPointToStart(EntryBB);
+
+    // TODO: this should live in `buildFunctionProlog
+    // Declare all the function arguments in the symbol table.
+    for (const auto nameValue : llvm::zip(Args, EntryBB->getArguments())) {
+      auto *paramVar = std::get<0>(nameValue);
+      auto paramVal = std::get<1>(nameValue);
+      auto alignment = getContext().getDeclAlign(paramVar);
+      auto paramLoc = getLoc(paramVar->getSourceRange());
+      paramVal.setLoc(paramLoc);
+
+      mlir::Value addr;
+      if (failed(declare(paramVar, paramVar->getType(), paramLoc, alignment,
+                         addr, true /*param*/)))
+        return;
+
+      auto address = Address(addr, alignment);
+      setAddrOfLocalVar(paramVar, address);
+
+      // Location of the store to the param storage tracked as beginning of
+      // the function body.
+      auto fnBodyBegin = getLoc(FD->getBody()->getBeginLoc());
+      builder.CIRBaseBuilderTy::createStore(fnBodyBegin, paramVal, addr);
+    }
+    assert(builder.getInsertionBlock() && "Should be valid");
+
+    auto FnEndLoc = getLoc(FD->getBody()->getEndLoc());
+
+    // When the current function is not void, create an address to store the
+    // result value.
+    if (FnRetCIRTy.has_value())
+      buildAndUpdateRetAlloca(FnRetQualTy, FnEndLoc,
+                              CGM.getNaturalTypeAlignment(FnRetQualTy));
+  }
+
+  if (D && isa<CXXMethodDecl>(D) && cast<CXXMethodDecl>(D)->isInstance()) {
+    CGM.getCXXABI().buildInstanceFunctionProlog(*this);
+
+    const auto *MD = cast<CXXMethodDecl>(D);
+    if (MD->getParent()->isLambda() && MD->getOverloadedOperator() == OO_Call) {
+      // We're in a lambda.
+      auto Fn = dyn_cast<mlir::cir::FuncOp>(CurFn);
+      assert(Fn && "other callables NYI");
+      Fn.setLambdaAttr(mlir::UnitAttr::get(builder.getContext()));
+
+      // Figure out the captures.
+      MD->getParent()->getCaptureFields(LambdaCaptureFields,
+                                        LambdaThisCaptureField);
+      if (LambdaThisCaptureField) {
+        llvm_unreachable("NYI");
+      }
+      for (auto *FD : MD->getParent()->fields()) {
+        if (FD->hasCapturedVLAType()) {
+          llvm_unreachable("NYI");
+        }
+      }
+
+    } else {
+      // Not in a lambda; just use 'this' from the method.
+      // FIXME: Should we generate a new load for each use of 'this'? The fast
+      // register allocator would be happier...
+      CXXThisValue = CXXABIThisValue;
+    }
+
+    // Check the 'this' pointer once per function, if it's available
+    if (CXXABIThisValue) {
+      SanitizerSet SkippedChecks;
+      SkippedChecks.set(SanitizerKind::ObjectSize, true);
+      QualType ThisTy = MD->getThisType();
+      (void)ThisTy;
+
+      // If this is the call operator of a lambda with no capture-default, it
+      // may have a staic invoker function, which may call this operator with
+      // a null 'this' pointer.
+      if (isLambdaCallOperator(MD) &&
+          MD->getParent()->getLambdaCaptureDefault() == LCD_None)
+        SkippedChecks.set(SanitizerKind::Null, true);
+
+      assert(!MissingFeatures::buildTypeCheck() && "NYI");
+    }
+  }
+
+  // If any of the arguments have a variably modified type, make sure to emit
+  // the type size.
+  for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end(); i != e;
+       ++i) {
+    const VarDecl *VD = *i;
+
+    // Dig out the type as written from ParmVarDecls; it's unclear whether the
+    // standard (C99 6.9.1p10) requires this, but we're following the
+    // precedent set by gcc.
+    QualType Ty;
+    if (const auto *PVD = dyn_cast<ParmVarDecl>(VD))
+      Ty = PVD->getOriginalType();
+    else
+      Ty = VD->getType();
+
+    if (Ty->isVariablyModifiedType())
+      buildVariablyModifiedType(Ty);
+  }
+  // Emit a location at the end of the prologue.
+  if (getDebugInfo())
+    llvm_unreachable("NYI");
+
+  // TODO: Do we need to handle this in two places like we do with
+  // target-features/target-cpu?
+  if (CurFuncDecl)
+    if (const auto *VecWidth = CurFuncDecl->getAttr<MinVectorWidthAttr>())
+      llvm_unreachable("NYI");
+}
+
+/// Return true if the current function should be instrumented with
+/// __cyg_profile_func_* calls
+bool CIRGenFunction::ShouldInstrumentFunction() {
+  if (!CGM.getCodeGenOpts().InstrumentFunctions &&
+      !CGM.getCodeGenOpts().InstrumentFunctionsAfterInlining &&
+      !CGM.getCodeGenOpts().InstrumentFunctionEntryBare)
+    return false;
+
+  llvm_unreachable("NYI");
+}
+
+mlir::LogicalResult CIRGenFunction::buildFunctionBody(const clang::Stmt *Body) {
+  // TODO: incrementProfileCounter(Body);
+
+  // We start with function level scope for variables.
+  SymTableScopeTy varScope(symbolTable);
+
+  auto result = mlir::LogicalResult::success();
+  if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body))
+    buildCompoundStmtWithoutScope(*S);
+  else
+    result = buildStmt(Body, /*useCurrentScope*/ true);
+
+  // This is checked after emitting the function body so we know if there are
+  // any permitted infinite loops.
+  // TODO: if (checkIfFunctionMustProgress())
+  // CurFn->addFnAttr(llvm::Attribute::MustProgress);
+  return result;
+}
+
+clang::QualType CIRGenFunction::buildFunctionArgList(clang::GlobalDecl GD,
+                                                     FunctionArgList &Args) {
+  const auto *FD = cast<FunctionDecl>(GD.getDecl());
+  QualType ResTy = FD->getReturnType();
+
+  const auto *MD = dyn_cast<CXXMethodDecl>(FD);
+  if (MD && MD->isInstance()) {
+    if (CGM.getCXXABI().HasThisReturn(GD))
+      llvm_unreachable("NYI");
+    else if (CGM.getCXXABI().hasMostDerivedReturn(GD))
+      llvm_unreachable("NYI");
+    CGM.getCXXABI().buildThisParam(*this, Args);
+  }
+
+  // The base version of an inheriting constructor whose constructed base is a
+  // virtual base is not passed any arguments (because it doesn't actually
+  // call the inherited constructor).
+  bool PassedParams = true;
+  if (const auto *CD = dyn_cast<CXXConstructorDecl>(FD))
+    if (auto Inherited = CD->getInheritedConstructor())
+      PassedParams =
+          getTypes().inheritingCtorHasParams(Inherited, GD.getCtorType());
+
+  if (PassedParams) {
+    for (auto *Param : FD->parameters()) {
+      Args.push_back(Param);
+      if (!Param->hasAttr<PassObjectSizeAttr>())
+        continue;
+
+      auto *Implicit = ImplicitParamDecl::Create(
+          getContext(), Param->getDeclContext(), Param->getLocation(),
+          /*Id=*/nullptr, getContext().getSizeType(), ImplicitParamKind::Other);
+      SizeArguments[Param] = Implicit;
+      Args.push_back(Implicit);
+    }
+  }
+
+  if (MD && (isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD)))
+    CGM.getCXXABI().addImplicitStructorParams(*this, ResTy, Args);
+
+  return ResTy;
+}
+
+static std::string getVersionedTmpName(llvm::StringRef name, unsigned cnt) {
+  SmallString<256> Buffer;
+  llvm::raw_svector_ostream Out(Buffer);
+  Out << name << cnt;
+  return std::string(Out.str());
+}
+
+std::string CIRGenFunction::getCounterAggTmpAsString() {
+  return getVersionedTmpName("agg.tmp", CounterAggTmp++);
+}
+
+std::string CIRGenFunction::getCounterRefTmpAsString() {
+  return getVersionedTmpName("ref.tmp", CounterRefTmp++);
+}
+
+void CIRGenFunction::buildNullInitialization(mlir::Location loc,
+                                             Address DestPtr, QualType Ty) {
+  // Ignore empty classes in C++.
+  if (getLangOpts().CPlusPlus) {
+    if (const RecordType *RT = Ty->getAs<RecordType>()) {
+      if (cast<CXXRecordDecl>(RT->getDecl())->isEmpty())
+        return;
+    }
+  }
+
+  // Cast the dest ptr to the appropriate i8 pointer type.
+  if (builder.isInt8Ty(DestPtr.getElementType())) {
+    llvm_unreachable("NYI");
+  }
+
+  // Get size and alignment info for this aggregate.
+  CharUnits size = getContext().getTypeSizeInChars(Ty);
+  [[maybe_unused]] mlir::Attribute SizeVal{};
+  [[maybe_unused]] const VariableArrayType *vla = nullptr;
+
+  // Don't bother emitting a zero-byte memset.
+  if (size.isZero()) {
+    // But note that getTypeInfo returns 0 for a VLA.
+    if (const VariableArrayType *vlaType = dyn_cast_or_null<VariableArrayType>(
+            getContext().getAsArrayType(Ty))) {
+      llvm_unreachable("NYI");
+    } else {
+      return;
+    }
+  } else {
+    SizeVal = CGM.getSize(size);
+  }
+
+  // If the type contains a pointer to data member we can't memset it to zero.
+  // Instead, create a null constant and copy it to the destination.
+  // TODO: there are other patterns besides zero that we can usefully memset,
+  // like -1, which happens to be the pattern used by member-pointers.
+  if (!CGM.getTypes().isZeroInitializable(Ty)) {
+    llvm_unreachable("NYI");
+  }
+
+  // In LLVM Codegen: otherwise, just memset the whole thing to zero using
+  // Builder.CreateMemSet. In CIR just emit a store of #cir.zero to the
+  // respective address.
+  // Builder.CreateMemSet(DestPtr, Builder.getInt8(0), SizeVal, false);
+  builder.createStore(loc, builder.getZero(loc, getTypes().ConvertType(Ty)),
+                      DestPtr);
+}
+
+CIRGenFunction::CIRGenFPOptionsRAII::CIRGenFPOptionsRAII(CIRGenFunction &CGF,
+                                                         const clang::Expr *E)
+    : CGF(CGF) {
+  ConstructorHelper(E->getFPFeaturesInEffect(CGF.getLangOpts()));
+}
+
+CIRGenFunction::CIRGenFPOptionsRAII::CIRGenFPOptionsRAII(CIRGenFunction &CGF,
+                                                         FPOptions FPFeatures)
+    : CGF(CGF) {
+  ConstructorHelper(FPFeatures);
+}
+
+void CIRGenFunction::CIRGenFPOptionsRAII::ConstructorHelper(
+    FPOptions FPFeatures) {
+  OldFPFeatures = CGF.CurFPFeatures;
+  CGF.CurFPFeatures = FPFeatures;
+
+  OldExcept = CGF.builder.getDefaultConstrainedExcept();
+  OldRounding = CGF.builder.getDefaultConstrainedRounding();
+
+  if (OldFPFeatures == FPFeatures)
+    return;
+
+  // TODO(cir): create guard to restore fast math configurations.
+  assert(!MissingFeatures::fastMathGuard());
+
+  llvm::RoundingMode NewRoundingBehavior = FPFeatures.getRoundingMode();
+  // TODO(cir): override rounding behaviour once FM configs are guarded.
+  auto NewExceptionBehavior =
+      ToConstrainedExceptMD(static_cast<LangOptions::FPExceptionModeKind>(
+          FPFeatures.getExceptionMode()));
+  // TODO(cir): override exception behaviour once FM configs are guarded.
+
+  // TODO(cir): override FP flags once FM configs are guarded.
+  assert(!MissingFeatures::fastMathFlags());
+
+  assert((CGF.CurFuncDecl == nullptr || CGF.builder.getIsFPConstrained() ||
+          isa<CXXConstructorDecl>(CGF.CurFuncDecl) ||
+          isa<CXXDestructorDecl>(CGF.CurFuncDecl) ||
+          (NewExceptionBehavior == fp::ebIgnore &&
+           NewRoundingBehavior == llvm::RoundingMode::NearestTiesToEven)) &&
+         "FPConstrained should be enabled on entire function");
+
+  // TODO(cir): mark CIR function with fast math attributes.
+  assert(!MissingFeatures::fastMathFuncAttributes());
+}
+
+CIRGenFunction::CIRGenFPOptionsRAII::~CIRGenFPOptionsRAII() {
+  CGF.CurFPFeatures = OldFPFeatures;
+  CGF.builder.setDefaultConstrainedExcept(OldExcept);
+  CGF.builder.setDefaultConstrainedRounding(OldRounding);
+}
+
+// TODO(cir): should be shared with LLVM codegen.
+bool CIRGenFunction::shouldNullCheckClassCastValue(const CastExpr *CE) {
+  const Expr *E = CE->getSubExpr();
+
+  if (CE->getCastKind() == CK_UncheckedDerivedToBase)
+    return false;
+
+  if (isa<CXXThisExpr>(E->IgnoreParens())) {
+    // We always assume that 'this' is never null.
+    return false;
+  }
+
+  if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(CE)) {
+    // And that glvalue casts are never null.
+    if (ICE->isGLValue())
+      return false;
+  }
+
+  return true;
+}
+
+void CIRGenFunction::buildDeclRefExprDbgValue(const DeclRefExpr *E,
+                                              const APValue &Init) {
+  assert(!MissingFeatures::generateDebugInfo());
+}
+
+Address CIRGenFunction::buildVAListRef(const Expr *E) {
+  if (getContext().getBuiltinVaListType()->isArrayType())
+    return buildPointerWithAlignment(E);
+  return buildLValue(E).getAddress();
+}
+
+// Emits an error if we don't have a valid set of target features for the
+// called function.
+void CIRGenFunction::checkTargetFeatures(const CallExpr *E,
+                                         const FunctionDecl *TargetDecl) {
+  return checkTargetFeatures(E->getBeginLoc(), TargetDecl);
+}
+
+// Emits an error if we don't have a valid set of target features for the
+// called function.
+void CIRGenFunction::checkTargetFeatures(SourceLocation Loc,
+                                         const FunctionDecl *TargetDecl) {
+  // Early exit if this is an indirect call.
+  if (!TargetDecl)
+    return;
+
+  // Get the current enclosing function if it exists. If it doesn't
+  // we can't check the target features anyhow.
+  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
+  if (!FD)
+    return;
+
+  // Grab the required features for the call. For a builtin this is listed in
+  // the td file with the default cpu, for an always_inline function this is any
+  // listed cpu and any listed features.
+  unsigned BuiltinID = TargetDecl->getBuiltinID();
+  std::string MissingFeature;
+  llvm::StringMap<bool> CallerFeatureMap;
+  CGM.getASTContext().getFunctionFeatureMap(CallerFeatureMap, FD);
+  if (BuiltinID) {
+    StringRef FeatureList(
+        getContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
+    if (!Builtin::evaluateRequiredTargetFeatures(FeatureList,
+                                                 CallerFeatureMap)) {
+      CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature)
+          << TargetDecl->getDeclName() << FeatureList;
+    }
+  } else if (!TargetDecl->isMultiVersion() &&
+             TargetDecl->hasAttr<TargetAttr>()) {
+    // Get the required features for the callee.
+
+    const TargetAttr *TD = TargetDecl->getAttr<TargetAttr>();
+    ParsedTargetAttr ParsedAttr = getContext().filterFunctionTargetAttrs(TD);
+
+    SmallVector<StringRef, 1> ReqFeatures;
+    llvm::StringMap<bool> CalleeFeatureMap;
+    getContext().getFunctionFeatureMap(CalleeFeatureMap, TargetDecl);
+
+    for (const auto &F : ParsedAttr.Features) {
+      if (F[0] == '+' && CalleeFeatureMap.lookup(F.substr(1)))
+        ReqFeatures.push_back(StringRef(F).substr(1));
+    }
+
+    for (const auto &F : CalleeFeatureMap) {
+      // Only positive features are "required".
+      if (F.getValue())
+        ReqFeatures.push_back(F.getKey());
+    }
+    if (!llvm::all_of(ReqFeatures, [&](StringRef Feature) {
+          if (!CallerFeatureMap.lookup(Feature)) {
+            MissingFeature = Feature.str();
+            return false;
+          }
+          return true;
+        }))
+      CGM.getDiags().Report(Loc, diag::err_function_needs_feature)
+          << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature;
+  } else if (!FD->isMultiVersion() && FD->hasAttr<TargetAttr>()) {
+    llvm::StringMap<bool> CalleeFeatureMap;
+    getContext().getFunctionFeatureMap(CalleeFeatureMap, TargetDecl);
+
+    for (const auto &F : CalleeFeatureMap) {
+      if (F.getValue() && (!CallerFeatureMap.lookup(F.getKey()) ||
+                           !CallerFeatureMap.find(F.getKey())->getValue()))
+        CGM.getDiags().Report(Loc, diag::err_function_needs_feature)
+            << FD->getDeclName() << TargetDecl->getDeclName() << F.getKey();
+    }
+  }
+}
+
+CIRGenFunction::VlaSizePair CIRGenFunction::getVLASize(QualType type) {
+  const VariableArrayType *vla =
+      CGM.getASTContext().getAsVariableArrayType(type);
+  assert(vla && "type was not a variable array type!");
+  return getVLASize(vla);
+}
+
+CIRGenFunction::VlaSizePair
+CIRGenFunction::getVLASize(const VariableArrayType *type) {
+  // The number of elements so far; always size_t.
+  mlir::Value numElements;
+
+  QualType elementType;
+  do {
+    elementType = type->getElementType();
+    mlir::Value vlaSize = VLASizeMap[type->getSizeExpr()];
+    assert(vlaSize && "no size for VLA!");
+    assert(vlaSize.getType() == SizeTy);
+
+    if (!numElements) {
+      numElements = vlaSize;
+    } else {
+      // It's undefined behavior if this wraps around, so mark it that way.
+      // FIXME: Teach -fsanitize=undefined to trap this.
+
+      numElements = builder.createMul(numElements, vlaSize);
+    }
+  } while ((type = getContext().getAsVariableArrayType(elementType)));
+
+  assert(numElements && "Undefined elements number");
+  return {numElements, elementType};
+}
+
+// TODO(cir): most part of this function can be shared between CIRGen
+// and traditional LLVM codegen
+void CIRGenFunction::buildVariablyModifiedType(QualType type) {
+  assert(type->isVariablyModifiedType() &&
+         "Must pass variably modified type to EmitVLASizes!");
+
+  // We're going to walk down into the type and look for VLA
+  // expressions.
+  do {
+    assert(type->isVariablyModifiedType());
+
+    const Type *ty = type.getTypePtr();
+    switch (ty->getTypeClass()) {
+    case clang::Type::CountAttributed:
+    case clang::Type::PackIndexing:
+    case clang::Type::ArrayParameter:
+      llvm_unreachable("NYI");
+
+#define TYPE(Class, Base)
+#define ABSTRACT_TYPE(Class, Base)
+#define NON_CANONICAL_TYPE(Class, Base)
+#define DEPENDENT_TYPE(Class, Base) case Type::Class:
+#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base)
+#include "clang/AST/TypeNodes.inc"
+      llvm_unreachable("unexpected dependent type!");
+
+    // These types are never variably-modified.
+    case Type::Builtin:
+    case Type::Complex:
+    case Type::Vector:
+    case Type::ExtVector:
+    case Type::ConstantMatrix:
+    case Type::Record:
+    case Type::Enum:
+    case Type::Using:
+    case Type::TemplateSpecialization:
+    case Type::ObjCTypeParam:
+    case Type::ObjCObject:
+    case Type::ObjCInterface:
+    case Type::ObjCObjectPointer:
+    case Type::BitInt:
+      llvm_unreachable("type class is never variably-modified!");
+
+    case Type::Elaborated:
+      type = cast<clang::ElaboratedType>(ty)->getNamedType();
+      break;
+
+    case Type::Adjusted:
+      type = cast<clang::AdjustedType>(ty)->getAdjustedType();
+      break;
+
+    case Type::Decayed:
+      type = cast<clang::DecayedType>(ty)->getPointeeType();
+      break;
+
+    case Type::Pointer:
+      type = cast<clang::PointerType>(ty)->getPointeeType();
+      break;
+
+    case Type::BlockPointer:
+      type = cast<clang::BlockPointerType>(ty)->getPointeeType();
+      break;
+
+    case Type::LValueReference:
+    case Type::RValueReference:
+      type = cast<clang::ReferenceType>(ty)->getPointeeType();
+      break;
+
+    case Type::MemberPointer:
+      type = cast<clang::MemberPointerType>(ty)->getPointeeType();
+      break;
+
+    case Type::ConstantArray:
+    case Type::IncompleteArray:
+      // Losing element qualification here is fine.
+      type = cast<clang::ArrayType>(ty)->getElementType();
+      break;
+
+    case Type::VariableArray: {
+      // Losing element qualification here is fine.
+      const VariableArrayType *vat = cast<clang::VariableArrayType>(ty);
+
+      // Unknown size indication requires no size computation.
+      // Otherwise, evaluate and record it.
+      if (const Expr *sizeExpr = vat->getSizeExpr()) {
+        // It's possible that we might have emitted this already,
+        // e.g. with a typedef and a pointer to it.
+        mlir::Value &entry = VLASizeMap[sizeExpr];
+        if (!entry) {
+          mlir::Value size = buildScalarExpr(sizeExpr);
+          assert(!MissingFeatures::sanitizeVLABound());
+
+          // Always zexting here would be wrong if it weren't
+          // undefined behavior to have a negative bound.
+          // FIXME: What about when size's type is larger than size_t?
+          entry = builder.createIntCast(size, SizeTy);
+        }
+      }
+      type = vat->getElementType();
+      break;
+    }
+
+    case Type::FunctionProto:
+    case Type::FunctionNoProto:
+      type = cast<clang::FunctionType>(ty)->getReturnType();
+      break;
+
+    case Type::Paren:
+    case Type::TypeOf:
+    case Type::UnaryTransform:
+    case Type::Attributed:
+    case Type::BTFTagAttributed:
+    case Type::SubstTemplateTypeParm:
+    case Type::MacroQualified:
+      // Keep walking after single level desugaring.
+      type = type.getSingleStepDesugaredType(getContext());
+      break;
+
+    case Type::Typedef:
+    case Type::Decltype:
+    case Type::Auto:
+    case Type::DeducedTemplateSpecialization:
+      // Stop walking: nothing to do.
+      return;
+
+    case Type::TypeOfExpr:
+      // Stop walking: emit typeof expression.
+      buildIgnoredExpr(cast<clang::TypeOfExprType>(ty)->getUnderlyingExpr());
+      return;
+
+    case Type::Atomic:
+      type = cast<clang::AtomicType>(ty)->getValueType();
+      break;
+
+    case Type::Pipe:
+      type = cast<clang::PipeType>(ty)->getElementType();
+      break;
+    }
+  } while (type->isVariablyModifiedType());
+}
+
+/// Computes the length of an array in elements, as well as the base
+/// element type and a properly-typed first element pointer.
+mlir::Value
+CIRGenFunction::buildArrayLength(const clang::ArrayType *origArrayType,
+                                 QualType &baseType, Address &addr) {
+  const auto *arrayType = origArrayType;
+
+  // If it's a VLA, we have to load the stored size.  Note that
+  // this is the size of the VLA in bytes, not its size in elements.
+  mlir::Value numVLAElements{};
+  if (isa<VariableArrayType>(arrayType)) {
+    llvm_unreachable("NYI");
+  }
+
+  uint64_t countFromCLAs = 1;
+  QualType eltType;
+
+  // llvm::ArrayType *llvmArrayType =
+  //     dyn_cast<llvm::ArrayType>(addr.getElementType());
+  auto cirArrayType =
+      mlir::dyn_cast<mlir::cir::ArrayType>(addr.getElementType());
+
+  while (cirArrayType) {
+    assert(isa<ConstantArrayType>(arrayType));
+    countFromCLAs *= cirArrayType.getSize();
+    eltType = arrayType->getElementType();
+
+    cirArrayType =
+        mlir::dyn_cast<mlir::cir::ArrayType>(cirArrayType.getEltType());
+
+    arrayType = getContext().getAsArrayType(arrayType->getElementType());
+    assert((!cirArrayType || arrayType) &&
+           "CIR and Clang types are out-of-synch");
+  }
+
+  if (arrayType) {
+    // From this point onwards, the Clang array type has been emitted
+    // as some other type (probably a packed struct). Compute the array
+    // size, and just emit the 'begin' expression as a bitcast.
+    llvm_unreachable("NYI");
+  }
+
+  baseType = eltType;
+  auto numElements = builder.getConstInt(*currSrcLoc, SizeTy, countFromCLAs);
+
+  // If we had any VLA dimensions, factor them in.
+  if (numVLAElements)
+    llvm_unreachable("NYI");
+
+  return numElements;
+}
+
+mlir::Value CIRGenFunction::buildAlignmentAssumption(
+    mlir::Value ptrValue, QualType ty, SourceLocation loc,
+    SourceLocation assumptionLoc, mlir::IntegerAttr alignment,
+    mlir::Value offsetValue) {
+  if (SanOpts.has(SanitizerKind::Alignment))
+    llvm_unreachable("NYI");
+  return builder.create<mlir::cir::AssumeAlignedOp>(
+      getLoc(assumptionLoc), ptrValue, alignment, offsetValue);
+}
+
+mlir::Value CIRGenFunction::buildAlignmentAssumption(
+    mlir::Value ptrValue, const Expr *expr, SourceLocation assumptionLoc,
+    mlir::IntegerAttr alignment, mlir::Value offsetValue) {
+  QualType ty = expr->getType();
+  SourceLocation loc = expr->getExprLoc();
+  return buildAlignmentAssumption(ptrValue, ty, loc, assumptionLoc, alignment,
+                                  offsetValue);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
new file mode 100644
index 000000000000..8156d8fad059
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -0,0 +1,2418 @@
+//===-- CIRGenFunction.h - Per-Function state for CIR gen -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the internal per-function state used for CIR translation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CIRGENFUNCTION_H
+#define LLVM_CLANG_LIB_CIR_CIRGENFUNCTION_H
+
+#include "CIRGenBuilder.h"
+#include "CIRGenCall.h"
+#include "CIRGenDebugInfo.h"
+#include "CIRGenModule.h"
+#include "CIRGenTBAA.h"
+#include "CIRGenTypeCache.h"
+#include "CIRGenValue.h"
+#include "EHScopeStack.h"
+
+#include "clang/AST/BaseSubobject.h"
+#include "clang/AST/CurrentSourceLocExprScope.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/ABI.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/CIR/TypeEvaluationKind.h"
+
+#include "mlir/IR/TypeRange.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Support/LogicalResult.h"
+
+namespace clang {
+class Expr;
+} // namespace clang
+
+namespace mlir {
+namespace func {
+class CallOp;
+}
+} // namespace mlir
+
+namespace {
+class ScalarExprEmitter;
+class AggExprEmitter;
+} // namespace
+
+namespace cir {
+
+struct CGCoroData;
+
+class CIRGenFunction : public CIRGenTypeCache {
+public:
+  CIRGenModule &CGM;
+
+private:
+  friend class ::ScalarExprEmitter;
+  friend class ::AggExprEmitter;
+
+  /// The builder is a helper class to create IR inside a function. The
+  /// builder is stateful, in particular it keeps an "insertion point": this
+  /// is where the next operations will be introduced.
+  CIRGenBuilderTy &builder;
+
+  /// -------
+  /// Goto
+  /// -------
+
+  /// A jump destination is an abstract label, branching to which may
+  /// require a jump out through normal cleanups.
+  struct JumpDest {
+    JumpDest() = default;
+    JumpDest(mlir::Block *Block) : Block(Block) {}
+
+    bool isValid() const { return Block != nullptr; }
+    mlir::Block *getBlock() const { return Block; }
+    mlir::Block *Block = nullptr;
+  };
+
+  /// Track mlir Blocks for each C/C++ label.
+  llvm::DenseMap<const clang::LabelDecl *, JumpDest> LabelMap;
+  JumpDest &getJumpDestForLabel(const clang::LabelDecl *D);
+
+  // ---------------------
+  // Opaque value handling
+  // ---------------------
+
+  /// Keeps track of the current set of opaque value expressions.
+  llvm::DenseMap<const OpaqueValueExpr *, LValue> OpaqueLValues;
+  llvm::DenseMap<const OpaqueValueExpr *, RValue> OpaqueRValues;
+
+  // This keeps track of the associated size for each VLA type.
+  // We track this by the size expression rather than the type itself because
+  // in certain situations, like a const qualifier applied to an VLA typedef,
+  // multiple VLA types can share the same size expression.
+  // FIXME: Maybe this could be a stack of maps that is pushed/popped as we
+  // enter/leave scopes.
+  llvm::DenseMap<const Expr *, mlir::Value> VLASizeMap;
+
+  /// Add OpenCL kernel arg metadata and the kernel attribute metadata to
+  /// the function metadata.
+  void buildKernelMetadata(const FunctionDecl *FD, mlir::cir::FuncOp Fn);
+
+public:
+  /// A non-RAII class containing all the information about a bound
+  /// opaque value.  OpaqueValueMapping, below, is a RAII wrapper for
+  /// this which makes individual mappings very simple; using this
+  /// class directly is useful when you have a variable number of
+  /// opaque values or don't want the RAII functionality for some
+  /// reason.
+  class OpaqueValueMappingData {
+    const OpaqueValueExpr *OpaqueValue;
+    bool BoundLValue;
+
+    OpaqueValueMappingData(const OpaqueValueExpr *ov, bool boundLValue)
+        : OpaqueValue(ov), BoundLValue(boundLValue) {}
+
+  public:
+    OpaqueValueMappingData() : OpaqueValue(nullptr) {}
+
+    static bool shouldBindAsLValue(const Expr *expr) {
+      // gl-values should be bound as l-values for obvious reasons.
+      // Records should be bound as l-values because IR generation
+      // always keeps them in memory.  Expressions of function type
+      // act exactly like l-values but are formally required to be
+      // r-values in C.
+      return expr->isGLValue() || expr->getType()->isFunctionType() ||
+             hasAggregateEvaluationKind(expr->getType());
+    }
+
+    static OpaqueValueMappingData
+    bind(CIRGenFunction &CGF, const OpaqueValueExpr *ov, const Expr *e) {
+      if (shouldBindAsLValue(ov))
+        return bind(CGF, ov, CGF.buildLValue(e));
+      return bind(CGF, ov, CGF.buildAnyExpr(e));
+    }
+
+    static OpaqueValueMappingData
+    bind(CIRGenFunction &CGF, const OpaqueValueExpr *ov, const LValue &lv) {
+      assert(shouldBindAsLValue(ov));
+      CGF.OpaqueLValues.insert(std::make_pair(ov, lv));
+      return OpaqueValueMappingData(ov, true);
+    }
+
+    static OpaqueValueMappingData
+    bind(CIRGenFunction &CGF, const OpaqueValueExpr *ov, const RValue &rv) {
+      assert(!shouldBindAsLValue(ov));
+      CGF.OpaqueRValues.insert(std::make_pair(ov, rv));
+
+      OpaqueValueMappingData data(ov, false);
+
+      // Work around an extremely aggressive peephole optimization in
+      // EmitScalarConversion which assumes that all other uses of a
+      // value are extant.
+      assert(!MissingFeatures::peepholeProtection() && "NYI");
+      return data;
+    }
+
+    bool isValid() const { return OpaqueValue != nullptr; }
+    void clear() { OpaqueValue = nullptr; }
+
+    void unbind(CIRGenFunction &CGF) {
+      assert(OpaqueValue && "no data to unbind!");
+
+      if (BoundLValue) {
+        CGF.OpaqueLValues.erase(OpaqueValue);
+      } else {
+        CGF.OpaqueRValues.erase(OpaqueValue);
+        assert(!MissingFeatures::peepholeProtection() && "NYI");
+      }
+    }
+  };
+
+  /// An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
+  class OpaqueValueMapping {
+    CIRGenFunction &CGF;
+    OpaqueValueMappingData Data;
+
+  public:
+    static bool shouldBindAsLValue(const Expr *expr) {
+      return OpaqueValueMappingData::shouldBindAsLValue(expr);
+    }
+
+    /// Build the opaque value mapping for the given conditional
+    /// operator if it's the GNU ?: extension.  This is a common
+    /// enough pattern that the convenience operator is really
+    /// helpful.
+    ///
+    OpaqueValueMapping(CIRGenFunction &CGF,
+                       const AbstractConditionalOperator *op)
+        : CGF(CGF) {
+      if (isa<ConditionalOperator>(op))
+        // Leave Data empty.
+        return;
+
+      const BinaryConditionalOperator *e = cast<BinaryConditionalOperator>(op);
+      Data = OpaqueValueMappingData::bind(CGF, e->getOpaqueValue(),
+                                          e->getCommon());
+    }
+
+    /// Build the opaque value mapping for an OpaqueValueExpr whose source
+    /// expression is set to the expression the OVE represents.
+    OpaqueValueMapping(CIRGenFunction &CGF, const OpaqueValueExpr *OV)
+        : CGF(CGF) {
+      if (OV) {
+        assert(OV->getSourceExpr() && "wrong form of OpaqueValueMapping used "
+                                      "for OVE with no source expression");
+        Data = OpaqueValueMappingData::bind(CGF, OV, OV->getSourceExpr());
+      }
+    }
+
+    OpaqueValueMapping(CIRGenFunction &CGF, const OpaqueValueExpr *opaqueValue,
+                       LValue lvalue)
+        : CGF(CGF),
+          Data(OpaqueValueMappingData::bind(CGF, opaqueValue, lvalue)) {}
+
+    OpaqueValueMapping(CIRGenFunction &CGF, const OpaqueValueExpr *opaqueValue,
+                       RValue rvalue)
+        : CGF(CGF),
+          Data(OpaqueValueMappingData::bind(CGF, opaqueValue, rvalue)) {}
+
+    void pop() {
+      Data.unbind(CGF);
+      Data.clear();
+    }
+
+    ~OpaqueValueMapping() {
+      if (Data.isValid())
+        Data.unbind(CGF);
+    }
+  };
+
+private:
+  /// Declare a variable in the current scope, return success if the variable
+  /// wasn't declared yet.
+  mlir::LogicalResult declare(const clang::Decl *var, clang::QualType ty,
+                              mlir::Location loc, clang::CharUnits alignment,
+                              mlir::Value &addr, bool isParam = false);
+
+  /// Declare a variable in the current scope but take an Address as input.
+  mlir::LogicalResult declare(Address addr, const clang::Decl *var,
+                              clang::QualType ty, mlir::Location loc,
+                              clang::CharUnits alignment, mlir::Value &addrVal,
+                              bool isParam = false);
+
+public:
+  // FIXME(cir): move this to CIRGenBuider.h
+  mlir::Value buildAlloca(llvm::StringRef name, clang::QualType ty,
+                          mlir::Location loc, clang::CharUnits alignment,
+                          bool insertIntoFnEntryBlock = false,
+                          mlir::Value arraySize = nullptr);
+  mlir::Value buildAlloca(llvm::StringRef name, mlir::Type ty,
+                          mlir::Location loc, clang::CharUnits alignment,
+                          bool insertIntoFnEntryBlock = false,
+                          mlir::Value arraySize = nullptr);
+  mlir::Value buildAlloca(llvm::StringRef name, mlir::Type ty,
+                          mlir::Location loc, clang::CharUnits alignment,
+                          mlir::OpBuilder::InsertPoint ip,
+                          mlir::Value arraySize = nullptr);
+
+private:
+  void buildAndUpdateRetAlloca(clang::QualType ty, mlir::Location loc,
+                               clang::CharUnits alignment);
+
+  // Track current variable initialization (if there's one)
+  const clang::VarDecl *currVarDecl = nullptr;
+  class VarDeclContext {
+    CIRGenFunction &P;
+    const clang::VarDecl *OldVal = nullptr;
+
+  public:
+    VarDeclContext(CIRGenFunction &p, const VarDecl *Value) : P(p) {
+      if (P.currVarDecl)
+        OldVal = P.currVarDecl;
+      P.currVarDecl = Value;
+    }
+
+    /// Can be used to restore the state early, before the dtor
+    /// is run.
+    void restore() { P.currVarDecl = OldVal; }
+    ~VarDeclContext() { restore(); }
+  };
+
+  /// -------
+  /// Source Location tracking
+  /// -------
+
+public:
+  /// Use to track source locations across nested visitor traversals.
+  /// Always use a `SourceLocRAIIObject` to change currSrcLoc.
+  std::optional<mlir::Location> currSrcLoc;
+  class SourceLocRAIIObject {
+    CIRGenFunction &P;
+    std::optional<mlir::Location> OldVal;
+
+  public:
+    SourceLocRAIIObject(CIRGenFunction &p, mlir::Location Value) : P(p) {
+      if (P.currSrcLoc)
+        OldVal = P.currSrcLoc;
+      P.currSrcLoc = Value;
+    }
+
+    /// Can be used to restore the state early, before the dtor
+    /// is run.
+    void restore() { P.currSrcLoc = OldVal; }
+    ~SourceLocRAIIObject() { restore(); }
+  };
+
+  using SymTableScopeTy =
+      llvm::ScopedHashTableScope<const clang::Decl *, mlir::Value>;
+
+  enum class EvaluationOrder {
+    ///! No langauge constraints on evaluation order.
+    Default,
+    ///! Language semantics require left-to-right evaluation
+    ForceLeftToRight,
+    ///! Language semantics require right-to-left evaluation.
+    ForceRightToLeft
+  };
+
+  /// Situations in which we might emit a check for the suitability of a pointer
+  /// or glvalue. Needs to be kept in sync with ubsan_handlers.cpp in
+  /// compiler-rt.
+  enum TypeCheckKind {
+    /// Checking the operand of a load. Must be suitably sized and aligned.
+    TCK_Load,
+    /// Checking the destination of a store. Must be suitably sized and aligned.
+    TCK_Store,
+    /// Checking the bound value in a reference binding. Must be suitably sized
+    /// and aligned, but is not required to refer to an object (until the
+    /// reference is used), per core issue 453.
+    TCK_ReferenceBinding,
+    /// Checking the object expression in a non-static data member access. Must
+    /// be an object within its lifetime.
+    TCK_MemberAccess,
+    /// Checking the 'this' pointer for a call to a non-static member function.
+    /// Must be an object within its lifetime.
+    TCK_MemberCall,
+    /// Checking the 'this' pointer for a constructor call.
+    TCK_ConstructorCall,
+    /// Checking the operand of a dynamic_cast or a typeid expression.  Must be
+    /// null or an object within its lifetime.
+    TCK_DynamicOperation
+  };
+
+  // Holds coroutine data if the current function is a coroutine. We use a
+  // wrapper to manage its lifetime, so that we don't have to define CGCoroData
+  // in this header.
+  struct CGCoroInfo {
+    std::unique_ptr<CGCoroData> Data;
+    CGCoroInfo();
+    ~CGCoroInfo();
+  };
+  CGCoroInfo CurCoro;
+
+  bool isCoroutine() const { return CurCoro.Data != nullptr; }
+
+  /// The GlobalDecl for the current function being compiled.
+  clang::GlobalDecl CurGD;
+
+  /// Unified return block.
+  /// Not that for LLVM codegen this is a memeber variable instead.
+  JumpDest ReturnBlock() {
+    return JumpDest(currLexScope->getOrCreateCleanupBlock(builder));
+  }
+
+  /// The temporary alloca to hold the return value. This is
+  /// invalid iff the function has no return value.
+  Address ReturnValue = Address::invalid();
+
+  /// Tracks function scope overall cleanup handling.
+  EHScopeStack EHStack;
+  llvm::SmallVector<char, 256> LifetimeExtendedCleanupStack;
+
+  // A stack of cleanups which were added to EHStack but have to be deactivated
+  // later before being popped or emitted. These are usually deactivated on
+  // exiting a `CleanupDeactivationScope` scope. For instance, after a
+  // full-expr.
+  //
+  // These are specially useful for correctly emitting cleanups while
+  // encountering branches out of expression (through stmt-expr or coroutine
+  // suspensions).
+  struct DeferredDeactivateCleanup {
+    EHScopeStack::stable_iterator Cleanup;
+    mlir::Operation *DominatingIP;
+  };
+  llvm::SmallVector<DeferredDeactivateCleanup> DeferredDeactivationCleanupStack;
+
+  // Enters a new scope for capturing cleanups which are deferred to be
+  // deactivated, all of which will be deactivated once the scope is exited.
+  struct CleanupDeactivationScope {
+    CIRGenFunction &CGF;
+    size_t OldDeactivateCleanupStackSize;
+    bool Deactivated;
+    CleanupDeactivationScope(CIRGenFunction &CGF)
+        : CGF(CGF), OldDeactivateCleanupStackSize(
+                        CGF.DeferredDeactivationCleanupStack.size()),
+          Deactivated(false) {}
+
+    void ForceDeactivate() {
+      assert(!Deactivated && "Deactivating already deactivated scope");
+      auto &Stack = CGF.DeferredDeactivationCleanupStack;
+      for (size_t I = Stack.size(); I > OldDeactivateCleanupStackSize; I--) {
+        CGF.DeactivateCleanupBlock(Stack[I - 1].Cleanup,
+                                   Stack[I - 1].DominatingIP);
+        Stack[I - 1].DominatingIP->erase();
+      }
+      Stack.resize(OldDeactivateCleanupStackSize);
+      Deactivated = true;
+    }
+
+    ~CleanupDeactivationScope() {
+      if (Deactivated)
+        return;
+      ForceDeactivate();
+    }
+  };
+
+  /// A mapping from NRVO variables to the flags used to indicate
+  /// when the NRVO has been applied to this variable.
+  llvm::DenseMap<const VarDecl *, mlir::Value> NRVOFlags;
+
+  /// Counts of the number return expressions in the function.
+  unsigned NumReturnExprs = 0;
+
+  clang::QualType FnRetQualTy;
+  std::optional<mlir::Type> FnRetCIRTy;
+  std::optional<mlir::Value> FnRetAlloca;
+
+  llvm::DenseMap<const clang::ValueDecl *, clang::FieldDecl *>
+      LambdaCaptureFields;
+  clang::FieldDecl *LambdaThisCaptureField = nullptr;
+
+  void buildForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator,
+                                   CallArgList &CallArgs);
+  void buildLambdaDelegatingInvokeBody(const CXXMethodDecl *MD);
+  void buildLambdaStaticInvokeBody(const CXXMethodDecl *MD);
+
+  LValue buildPredefinedLValue(const PredefinedExpr *E);
+
+  /// When generating code for a C++ member function, this will
+  /// hold the implicit 'this' declaration.
+  clang::ImplicitParamDecl *CXXABIThisDecl = nullptr;
+  mlir::Value CXXABIThisValue = nullptr;
+  mlir::Value CXXThisValue = nullptr;
+  clang::CharUnits CXXABIThisAlignment;
+  clang::CharUnits CXXThisAlignment;
+
+  /// When generating code for a constructor or destructor, this will hold the
+  /// implicit argument (e.g. VTT).
+  ImplicitParamDecl *CXXStructorImplicitParamDecl{};
+  mlir::Value CXXStructorImplicitParamValue{};
+
+  /// The value of 'this' to sue when evaluating CXXDefaultInitExprs within this
+  /// expression.
+  Address CXXDefaultInitExprThis = Address::invalid();
+
+  // Holds the Decl for the current outermost non-closure context
+  const clang::Decl *CurFuncDecl = nullptr;
+  /// This is the inner-most code context, which includes blocks.
+  const clang::Decl *CurCodeDecl = nullptr;
+  const CIRGenFunctionInfo *CurFnInfo = nullptr;
+  clang::QualType FnRetTy;
+
+  /// This is the current function or global initializer that is generated code
+  /// for.
+  mlir::Operation *CurFn = nullptr;
+
+  /// Save Parameter Decl for coroutine.
+  llvm::SmallVector<const ParmVarDecl *, 4> FnArgs;
+
+  // The CallExpr within the current statement that the musttail attribute
+  // applies to. nullptr if there is no 'musttail' on the current statement.
+  const clang::CallExpr *MustTailCall = nullptr;
+
+  /// The attributes of cases collected during emitting the body of a switch
+  /// stmt.
+  llvm::SmallVector<llvm::SmallVector<mlir::Attribute, 4>, 2> caseAttrsStack;
+
+  /// The type of the condition for the emitting switch statement.
+  llvm::SmallVector<mlir::Type, 2> condTypeStack;
+
+  clang::ASTContext &getContext() const;
+
+  CIRGenBuilderTy &getBuilder() { return builder; }
+
+  CIRGenModule &getCIRGenModule() { return CGM; }
+  const CIRGenModule &getCIRGenModule() const { return CGM; }
+
+  mlir::Block *getCurFunctionEntryBlock() {
+    auto Fn = dyn_cast<mlir::cir::FuncOp>(CurFn);
+    assert(Fn && "other callables NYI");
+    return &Fn.getRegion().front();
+  }
+
+  /// Sanitizers enabled for this function.
+  clang::SanitizerSet SanOpts;
+
+  class CIRGenFPOptionsRAII {
+  public:
+    CIRGenFPOptionsRAII(CIRGenFunction &CGF, FPOptions FPFeatures);
+    CIRGenFPOptionsRAII(CIRGenFunction &CGF, const clang::Expr *E);
+    ~CIRGenFPOptionsRAII();
+
+  private:
+    void ConstructorHelper(clang::FPOptions FPFeatures);
+    CIRGenFunction &CGF;
+    clang::FPOptions OldFPFeatures;
+    fp::ExceptionBehavior OldExcept;
+    llvm::RoundingMode OldRounding;
+  };
+  clang::FPOptions CurFPFeatures;
+
+  /// The symbol table maps a variable name to a value in the current scope.
+  /// Entering a function creates a new scope, and the function arguments are
+  /// added to the mapping. When the processing of a function is terminated,
+  /// the scope is destroyed and the mappings created in this scope are
+  /// dropped.
+  using SymTableTy = llvm::ScopedHashTable<const clang::Decl *, mlir::Value>;
+  SymTableTy symbolTable;
+  /// True if we need to emit the life-time markers. This is initially set in
+  /// the constructor, but could be overwrriten to true if this is a coroutine.
+  bool ShouldEmitLifetimeMarkers;
+
+  using DeclMapTy = llvm::DenseMap<const clang::Decl *, Address>;
+  /// This keeps track of the CIR allocas or globals for local C
+  /// delcs.
+  DeclMapTy LocalDeclMap;
+
+  CIRGenDebugInfo *debugInfo = nullptr;
+
+  /// Whether llvm.stacksave has been called. Used to avoid
+  /// calling llvm.stacksave for multiple VLAs in the same scope.
+  /// TODO: Translate to MLIR
+  bool DidCallStackSave = false;
+
+  /// Whether we processed a Microsoft-style asm block during CIRGen. These can
+  /// potentially set the return value.
+  bool SawAsmBlock = false;
+
+  /// True if CodeGen currently emits code inside preserved access index region.
+  bool IsInPreservedAIRegion = false;
+
+  /// In C++, whether we are code generating a thunk. This controls whether we
+  /// should emit cleanups.
+  bool CurFuncIsThunk = false;
+
+  /// Hold counters for incrementally naming temporaries
+  unsigned CounterRefTmp = 0;
+  unsigned CounterAggTmp = 0;
+  std::string getCounterRefTmpAsString();
+  std::string getCounterAggTmpAsString();
+
+  mlir::Type convertTypeForMem(QualType T);
+
+  mlir::Type ConvertType(clang::QualType T);
+  mlir::Type ConvertType(const TypeDecl *T) {
+    return ConvertType(getContext().getTypeDeclType(T));
+  }
+
+  ///  Return the TypeEvaluationKind of QualType \c T.
+  static TypeEvaluationKind getEvaluationKind(clang::QualType T);
+
+  static bool hasScalarEvaluationKind(clang::QualType T) {
+    return getEvaluationKind(T) == TEK_Scalar;
+  }
+
+  static bool hasAggregateEvaluationKind(clang::QualType T) {
+    return getEvaluationKind(T) == TEK_Aggregate;
+  }
+
+  CIRGenFunction(CIRGenModule &CGM, CIRGenBuilderTy &builder,
+                 bool suppressNewContext = false);
+  ~CIRGenFunction();
+
+  CIRGenTypes &getTypes() const { return CGM.getTypes(); }
+
+  const TargetInfo &getTarget() const { return CGM.getTarget(); }
+
+  const TargetCIRGenInfo &getTargetHooks() const {
+    return CGM.getTargetCIRGenInfo();
+  }
+
+  /// Helpers to convert Clang's SourceLocation to a MLIR Location.
+  mlir::Location getLoc(clang::SourceLocation SLoc);
+
+  mlir::Location getLoc(clang::SourceRange SLoc);
+
+  mlir::Location getLoc(mlir::Location lhs, mlir::Location rhs);
+
+  const clang::LangOptions &getLangOpts() const { return CGM.getLangOpts(); }
+
+  CIRGenDebugInfo *getDebugInfo() { return debugInfo; }
+
+  void buildReturnOfRValue(mlir::Location loc, RValue RV, QualType Ty);
+
+  /// Set the address of a local variable.
+  void setAddrOfLocalVar(const clang::VarDecl *VD, Address Addr) {
+    assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!");
+    LocalDeclMap.insert({VD, Addr});
+    // Add to the symbol table if not there already.
+    if (symbolTable.count(VD))
+      return;
+    symbolTable.insert(VD, Addr.getPointer());
+  }
+
+  /// True if an insertion point is defined. If not, this indicates that the
+  /// current code being emitted is unreachable.
+  /// FIXME(cir): we need to inspect this and perhaps use a cleaner mechanism
+  /// since we don't yet force null insertion point to designate behavior (like
+  /// LLVM's codegen does) and we probably shouldn't.
+  bool HaveInsertPoint() const {
+    return builder.getInsertionBlock() != nullptr;
+  }
+
+  /// Whether any type-checking sanitizers are enabled. If \c false, calls to
+  /// buildTypeCheck can be skipped.
+  bool sanitizePerformTypeCheck() const;
+
+  void buildTypeCheck(TypeCheckKind TCK, clang::SourceLocation Loc,
+                      mlir::Value V, clang::QualType Type,
+                      clang::CharUnits Alignment = clang::CharUnits::Zero(),
+                      clang::SanitizerSet SkippedChecks = clang::SanitizerSet(),
+                      std::optional<mlir::Value> ArraySize = std::nullopt);
+
+  void buildAggExpr(const clang::Expr *E, AggValueSlot Slot);
+
+  /// Emit the computation of the specified expression of complex type,
+  /// returning the result.
+  mlir::Value buildComplexExpr(const Expr *E);
+
+  void buildComplexExprIntoLValue(const Expr *E, LValue dest, bool isInit);
+
+  void buildStoreOfComplex(mlir::Location Loc, mlir::Value V, LValue dest,
+                           bool isInit);
+
+  Address buildAddrOfRealComponent(mlir::Location loc, Address complex,
+                                   QualType complexType);
+  Address buildAddrOfImagComponent(mlir::Location loc, Address complex,
+                                   QualType complexType);
+
+  LValue buildComplexAssignmentLValue(const BinaryOperator *E);
+  LValue buildComplexCompoundAssignmentLValue(const CompoundAssignOperator *E);
+
+  /// Emits a reference binding to the passed in expression.
+  RValue buildReferenceBindingToExpr(const Expr *E);
+
+  LValue buildCastLValue(const CastExpr *E);
+
+  void buildCXXConstructExpr(const clang::CXXConstructExpr *E,
+                             AggValueSlot Dest);
+
+  void buildCXXConstructorCall(const clang::CXXConstructorDecl *D,
+                               clang::CXXCtorType Type, bool ForVirtualBase,
+                               bool Delegating, AggValueSlot ThisAVS,
+                               const clang::CXXConstructExpr *E);
+
+  void buildCXXConstructorCall(const clang::CXXConstructorDecl *D,
+                               clang::CXXCtorType Type, bool ForVirtualBase,
+                               bool Delegating, Address This, CallArgList &Args,
+                               AggValueSlot::Overlap_t Overlap,
+                               clang::SourceLocation Loc,
+                               bool NewPointerIsChecked);
+
+  RValue buildCXXMemberOrOperatorCall(
+      const clang::CXXMethodDecl *Method, const CIRGenCallee &Callee,
+      ReturnValueSlot ReturnValue, mlir::Value This, mlir::Value ImplicitParam,
+      clang::QualType ImplicitParamTy, const clang::CallExpr *E,
+      CallArgList *RtlArgs);
+
+  RValue buildCXXMemberCallExpr(const clang::CXXMemberCallExpr *E,
+                                ReturnValueSlot ReturnValue);
+  RValue buildCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
+                                       ReturnValueSlot ReturnValue);
+  RValue buildCXXMemberOrOperatorMemberCallExpr(
+      const clang::CallExpr *CE, const clang::CXXMethodDecl *MD,
+      ReturnValueSlot ReturnValue, bool HasQualifier,
+      clang::NestedNameSpecifier *Qualifier, bool IsArrow,
+      const clang::Expr *Base);
+  RValue buildCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E,
+                                        const CXXMethodDecl *MD,
+                                        ReturnValueSlot ReturnValue);
+  void buildNullInitialization(mlir::Location loc, Address DestPtr,
+                               QualType Ty);
+  bool shouldNullCheckClassCastValue(const CastExpr *CE);
+
+  void buildCXXTemporary(const CXXTemporary *Temporary, QualType TempType,
+                         Address Ptr);
+  mlir::Value buildCXXNewExpr(const CXXNewExpr *E);
+  void buildCXXDeleteExpr(const CXXDeleteExpr *E);
+
+  void buildCXXAggrConstructorCall(const CXXConstructorDecl *D,
+                                   const clang::ArrayType *ArrayTy,
+                                   Address ArrayPtr, const CXXConstructExpr *E,
+                                   bool NewPointerIsChecked,
+                                   bool ZeroInitialization = false);
+
+  void buildCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
+                                   mlir::Value numElements, Address arrayBase,
+                                   const CXXConstructExpr *E,
+                                   bool NewPointerIsChecked,
+                                   bool zeroInitialize);
+
+  /// Compute the length of an array, even if it's a VLA, and drill down to the
+  /// base element type.
+  mlir::Value buildArrayLength(const clang::ArrayType *arrayType,
+                               QualType &baseType, Address &addr);
+
+  void buildDeleteCall(const FunctionDecl *DeleteFD, mlir::Value Ptr,
+                       QualType DeleteTy, mlir::Value NumElements = nullptr,
+                       CharUnits CookieSize = CharUnits());
+
+  mlir::Value buildDynamicCast(Address ThisAddr, const CXXDynamicCastExpr *DCE);
+
+  mlir::Value createLoad(const clang::VarDecl *VD, const char *Name);
+
+  mlir::Value buildScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
+                                       bool isInc, bool isPre);
+  mlir::Value buildComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
+                                        bool isInc, bool isPre);
+
+  // Wrapper for function prototype sources. Wraps either a FunctionProtoType or
+  // an ObjCMethodDecl.
+  struct PrototypeWrapper {
+    llvm::PointerUnion<const clang::FunctionProtoType *,
+                       const clang::ObjCMethodDecl *>
+        P;
+
+    PrototypeWrapper(const clang::FunctionProtoType *FT) : P(FT) {}
+    PrototypeWrapper(const clang::ObjCMethodDecl *MD) : P(MD) {}
+  };
+
+  bool LValueIsSuitableForInlineAtomic(LValue Src);
+
+  /// An abstract representation of regular/ObjC call/message targets.
+  class AbstractCallee {
+    /// The function declaration of the callee.
+    const clang::Decl *CalleeDecl;
+
+  public:
+    AbstractCallee() : CalleeDecl(nullptr) {}
+    AbstractCallee(const clang::FunctionDecl *FD) : CalleeDecl(FD) {}
+    AbstractCallee(const clang::ObjCMethodDecl *OMD) : CalleeDecl(OMD) {}
+    bool hasFunctionDecl() const {
+      return llvm::isa_and_nonnull<clang::FunctionDecl>(CalleeDecl);
+    }
+    const clang::Decl *getDecl() const { return CalleeDecl; }
+    unsigned getNumParams() const {
+      if (const auto *FD = llvm::dyn_cast<clang::FunctionDecl>(CalleeDecl))
+        return FD->getNumParams();
+      return llvm::cast<clang::ObjCMethodDecl>(CalleeDecl)->param_size();
+    }
+    const clang::ParmVarDecl *getParamDecl(unsigned I) const {
+      if (const auto *FD = llvm::dyn_cast<clang::FunctionDecl>(CalleeDecl))
+        return FD->getParamDecl(I);
+      return *(llvm::cast<clang::ObjCMethodDecl>(CalleeDecl)->param_begin() +
+               I);
+    }
+  };
+
+  RValue convertTempToRValue(Address addr, clang::QualType type,
+                             clang::SourceLocation Loc);
+
+  /// If a ParmVarDecl had the pass_object_size attribute, this
+  /// will contain a mapping from said ParmVarDecl to its implicit "object_size"
+  /// parameter.
+  llvm::SmallDenseMap<const ParmVarDecl *, const ImplicitParamDecl *, 2>
+      SizeArguments;
+
+  // Build a "reference" to a va_list; this is either the address or the value
+  // of the expression, depending on how va_list is defined.
+  Address buildVAListRef(const Expr *E);
+
+  /// Emits a CIR variable-argument operation, either
+  /// \c cir.va.start or \c cir.va.end.
+  ///
+  /// \param ArgValue A reference to the \c va_list as emitted by either
+  /// \c buildVAListRef or \c buildMSVAListRef.
+  ///
+  /// \param IsStart If \c true, emits \c cir.va.start, otherwise \c cir.va.end.
+  void buildVAStartEnd(mlir::Value ArgValue, bool IsStart);
+
+  /// Generate code to get an argument from the passed in pointer
+  /// and update it accordingly.
+  ///
+  /// \param VE The \c VAArgExpr for which to generate code.
+  ///
+  /// \param VAListAddr Receives a reference to the \c va_list as emitted by
+  /// either \c buildVAListRef or \c buildMSVAListRef.
+  ///
+  /// \returns SSA value with the argument.
+  mlir::Value buildVAArg(VAArgExpr *VE, Address &VAListAddr);
+
+  void buildVariablyModifiedType(QualType Ty);
+
+  struct VlaSizePair {
+    mlir::Value NumElts;
+    QualType Type;
+
+    VlaSizePair(mlir::Value NE, QualType T) : NumElts(NE), Type(T) {}
+  };
+
+  /// Returns an MLIR value that corresponds to the size,
+  /// in non-variably-sized elements, of a variable length array type,
+  /// plus that largest non-variably-sized element type.  Assumes that
+  /// the type has already been emitted with buildVariablyModifiedType.
+  VlaSizePair getVLASize(const VariableArrayType *vla);
+  VlaSizePair getVLASize(QualType vla);
+
+  mlir::Value emitBuiltinObjectSize(const Expr *E, unsigned Type,
+                                    mlir::cir::IntType ResType,
+                                    mlir::Value EmittedE, bool IsDynamic);
+  mlir::Value evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
+                                              mlir::cir::IntType ResType,
+                                              mlir::Value EmittedE,
+                                              bool IsDynamic);
+
+  /// Given an expression that represents a value lvalue, this method emits
+  /// the address of the lvalue, then loads the result as an rvalue,
+  /// returning the rvalue.
+  RValue buildLoadOfLValue(LValue LV, SourceLocation Loc);
+  mlir::Value buildLoadOfScalar(Address addr, bool isVolatile,
+                                clang::QualType ty, clang::SourceLocation loc,
+                                LValueBaseInfo baseInfo,
+                                TBAAAccessInfo tbaaInfo,
+                                bool isNontemporal = false);
+  mlir::Value buildLoadOfScalar(Address addr, bool isVolatile,
+                                clang::QualType ty, mlir::Location loc,
+                                LValueBaseInfo baseInfo,
+                                TBAAAccessInfo tbaaInfo,
+                                bool isNontemporal = false);
+
+  int64_t getAccessedFieldNo(unsigned idx, const mlir::ArrayAttr elts);
+
+  RValue buildLoadOfExtVectorElementLValue(LValue LV);
+
+  void buildStoreThroughExtVectorComponentLValue(RValue Src, LValue Dst);
+
+  RValue buildLoadOfBitfieldLValue(LValue LV, SourceLocation Loc);
+
+  /// Load a scalar value from an address, taking care to appropriately convert
+  /// from the memory representation to CIR value representation.
+  mlir::Value buildLoadOfScalar(Address addr, bool isVolatile,
+                                clang::QualType ty, clang::SourceLocation loc,
+                                AlignmentSource source = AlignmentSource::Type,
+                                bool isNontemporal = false) {
+    return buildLoadOfScalar(addr, isVolatile, ty, loc, LValueBaseInfo(source),
+                             CGM.getTBAAAccessInfo(ty), isNontemporal);
+  }
+
+  /// Load a scalar value from an address, taking care to appropriately convert
+  /// form the memory representation to the CIR value representation. The
+  /// l-value must be a simple l-value.
+  mlir::Value buildLoadOfScalar(LValue lvalue, clang::SourceLocation Loc);
+  mlir::Value buildLoadOfScalar(LValue lvalue, mlir::Location Loc);
+
+  /// Load a complex number from the specified l-value.
+  mlir::Value buildLoadOfComplex(LValue src, SourceLocation loc);
+
+  Address buildLoadOfReference(LValue refLVal, mlir::Location loc,
+                               LValueBaseInfo *pointeeBaseInfo = nullptr,
+                               TBAAAccessInfo *pointeeTBAAInfo = nullptr);
+  LValue buildLoadOfReferenceLValue(LValue RefLVal, mlir::Location Loc);
+  LValue
+  buildLoadOfReferenceLValue(Address RefAddr, mlir::Location Loc,
+                             QualType RefTy,
+                             AlignmentSource Source = AlignmentSource::Type) {
+    LValue RefLVal = makeAddrLValue(RefAddr, RefTy, LValueBaseInfo(Source));
+    return buildLoadOfReferenceLValue(RefLVal, Loc);
+  }
+  void buildImplicitAssignmentOperatorBody(FunctionArgList &Args);
+
+  void buildAggregateStore(mlir::Value Val, Address Dest, bool DestIsVolatile);
+
+  void buildCallArgs(
+      CallArgList &Args, PrototypeWrapper Prototype,
+      llvm::iterator_range<clang::CallExpr::const_arg_iterator> ArgRange,
+      AbstractCallee AC = AbstractCallee(), unsigned ParamsToSkip = 0,
+      EvaluationOrder Order = EvaluationOrder::Default);
+
+  void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl);
+  void checkTargetFeatures(SourceLocation Loc, const FunctionDecl *TargetDecl);
+
+  LValue buildStmtExprLValue(const StmtExpr *E);
+
+  LValue buildPointerToDataMemberBinaryExpr(const BinaryOperator *E);
+
+  /// TODO: Add TBAAAccessInfo
+  Address buildCXXMemberDataPointerAddress(
+      const Expr *E, Address base, mlir::Value memberPtr,
+      const MemberPointerType *memberPtrType, LValueBaseInfo *baseInfo);
+
+  /// Generate a call of the given function, expecting the given
+  /// result type, and using the given argument list which specifies both the
+  /// LLVM arguments and the types they were derived from.
+  RValue buildCall(const CIRGenFunctionInfo &CallInfo,
+                   const CIRGenCallee &Callee, ReturnValueSlot ReturnValue,
+                   const CallArgList &Args,
+                   mlir::cir::CIRCallOpInterface *callOrTryCall,
+                   bool IsMustTail, mlir::Location loc,
+                   std::optional<const clang::CallExpr *> E = std::nullopt);
+  RValue buildCall(const CIRGenFunctionInfo &CallInfo,
+                   const CIRGenCallee &Callee, ReturnValueSlot ReturnValue,
+                   const CallArgList &Args,
+                   mlir::cir::CIRCallOpInterface *callOrTryCall = nullptr,
+                   bool IsMustTail = false) {
+    assert(currSrcLoc && "source location must have been set");
+    return buildCall(CallInfo, Callee, ReturnValue, Args, callOrTryCall,
+                     IsMustTail, *currSrcLoc, std::nullopt);
+  }
+  RValue buildCall(clang::QualType FnType, const CIRGenCallee &Callee,
+                   const clang::CallExpr *E, ReturnValueSlot returnValue,
+                   mlir::Value Chain = nullptr);
+
+  RValue buildCallExpr(const clang::CallExpr *E,
+                       ReturnValueSlot ReturnValue = ReturnValueSlot());
+
+  mlir::Value buildRuntimeCall(mlir::Location loc, mlir::cir::FuncOp callee,
+                               ArrayRef<mlir::Value> args = {});
+
+  void buildInvariantStart(CharUnits Size);
+
+  /// Create a check for a function parameter that may potentially be
+  /// declared as non-null.
+  void buildNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc,
+                            AbstractCallee AC, unsigned ParmNum);
+
+  void buildCallArg(CallArgList &args, const clang::Expr *E,
+                    clang::QualType ArgType);
+
+  LValue buildCallExprLValue(const CallExpr *E);
+
+  /// Similarly to buildAnyExpr(), however, the result will always be accessible
+  /// even if no aggregate location is provided.
+  RValue buildAnyExprToTemp(const clang::Expr *E);
+
+  CIRGenCallee buildCallee(const clang::Expr *E);
+
+  void finishFunction(SourceLocation EndLoc);
+
+  /// Emit code to compute the specified expression which can have any type. The
+  /// result is returned as an RValue struct. If this is an aggregate
+  /// expression, the aggloc/agglocvolatile arguments indicate where the result
+  /// should be returned.
+  RValue buildAnyExpr(const clang::Expr *E,
+                      AggValueSlot aggSlot = AggValueSlot::ignored(),
+                      bool ignoreResult = false);
+
+  mlir::LogicalResult buildFunctionBody(const clang::Stmt *Body);
+  mlir::LogicalResult buildCoroutineBody(const CoroutineBodyStmt &S);
+  mlir::LogicalResult buildCoreturnStmt(const CoreturnStmt &S);
+
+  mlir::cir::CallOp buildCoroIDBuiltinCall(mlir::Location loc,
+                                           mlir::Value nullPtr);
+  mlir::cir::CallOp buildCoroAllocBuiltinCall(mlir::Location loc);
+  mlir::cir::CallOp buildCoroBeginBuiltinCall(mlir::Location loc,
+                                              mlir::Value coroframeAddr);
+  mlir::cir::CallOp buildCoroEndBuiltinCall(mlir::Location loc,
+                                            mlir::Value nullPtr);
+
+  RValue buildCoawaitExpr(const CoawaitExpr &E,
+                          AggValueSlot aggSlot = AggValueSlot::ignored(),
+                          bool ignoreResult = false);
+  RValue buildCoyieldExpr(const CoyieldExpr &E,
+                          AggValueSlot aggSlot = AggValueSlot::ignored(),
+                          bool ignoreResult = false);
+  RValue buildCoroutineIntrinsic(const CallExpr *E, unsigned int IID);
+  RValue buildCoroutineFrame();
+
+  enum class MSVCIntrin;
+
+  mlir::Value buildARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                     ReturnValueSlot ReturnValue,
+                                     llvm::Triple::ArchType Arch);
+  mlir::Value buildARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                     ReturnValueSlot ReturnValue,
+                                     llvm::Triple::ArchType Arch);
+
+  mlir::Value buildAlignmentAssumption(mlir::Value ptrValue, QualType ty,
+                                       SourceLocation loc,
+                                       SourceLocation assumptionLoc,
+                                       mlir::IntegerAttr alignment,
+                                       mlir::Value offsetValue = nullptr);
+
+  mlir::Value buildAlignmentAssumption(mlir::Value ptrValue, const Expr *expr,
+                                       SourceLocation assumptionLoc,
+                                       mlir::IntegerAttr alignment,
+                                       mlir::Value offsetValue = nullptr);
+
+  /// Build a debug stoppoint if we are emitting debug info.
+  void buildStopPoint(const Stmt *S);
+
+  // Build CIR for a statement. useCurrentScope should be true if no
+  // new scopes need be created when finding a compound statement.
+  mlir::LogicalResult buildStmt(const clang::Stmt *S, bool useCurrentScope,
+                                ArrayRef<const Attr *> Attrs = std::nullopt);
+
+  mlir::LogicalResult buildSimpleStmt(const clang::Stmt *S,
+                                      bool useCurrentScope);
+
+  mlir::LogicalResult buildForStmt(const clang::ForStmt &S);
+  mlir::LogicalResult buildWhileStmt(const clang::WhileStmt &S);
+  mlir::LogicalResult buildDoStmt(const clang::DoStmt &S);
+  mlir::LogicalResult
+  buildCXXForRangeStmt(const CXXForRangeStmt &S,
+                       ArrayRef<const Attr *> Attrs = std::nullopt);
+  mlir::LogicalResult buildSwitchStmt(const clang::SwitchStmt &S);
+
+  mlir::LogicalResult buildCXXTryStmtUnderScope(const clang::CXXTryStmt &S);
+  mlir::LogicalResult buildCXXTryStmt(const clang::CXXTryStmt &S);
+  void enterCXXTryStmt(const CXXTryStmt &S, mlir::cir::TryOp catchOp,
+                       bool IsFnTryBlock = false);
+  void exitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false);
+
+  Address buildCompoundStmt(const clang::CompoundStmt &S, bool getLast = false,
+                            AggValueSlot slot = AggValueSlot::ignored());
+
+  Address
+  buildCompoundStmtWithoutScope(const clang::CompoundStmt &S,
+                                bool getLast = false,
+                                AggValueSlot slot = AggValueSlot::ignored());
+  GlobalDecl CurSEHParent;
+  bool currentFunctionUsesSEHTry() const { return !!CurSEHParent; }
+
+  /// Returns true inside SEH __try blocks.
+  bool isSEHTryScope() const { return MissingFeatures::isSEHTryScope(); }
+
+  mlir::Operation *CurrentFuncletPad = nullptr;
+
+  /// Returns true while emitting a cleanuppad.
+  bool isCleanupPadScope() const {
+    assert(!CurrentFuncletPad && "NYI");
+    return false;
+  }
+
+  /// Return a landing pad that just calls terminate.
+  mlir::Operation *getTerminateLandingPad();
+
+  /// Emit code to compute the specified expression,
+  /// ignoring the result.
+  void buildIgnoredExpr(const clang::Expr *E);
+
+  LValue buildArraySubscriptExpr(const clang::ArraySubscriptExpr *E,
+                                 bool Accessed = false);
+
+  mlir::LogicalResult buildDeclStmt(const clang::DeclStmt &S);
+
+  /// Determine whether a return value slot may overlap some other object.
+  AggValueSlot::Overlap_t getOverlapForReturnValue() {
+    // FIXME: Assuming no overlap here breaks guaranteed copy elision for base
+    // class subobjects. These cases may need to be revisited depending on the
+    // resolution of the relevant core issue.
+    return AggValueSlot::DoesNotOverlap;
+  }
+
+  /// Determine whether a base class initialization may overlap some other
+  /// object.
+  AggValueSlot::Overlap_t getOverlapForBaseInit(const CXXRecordDecl *RD,
+                                                const CXXRecordDecl *BaseRD,
+                                                bool IsVirtual);
+
+  /// Get an appropriate 'undef' rvalue for the given type.
+  /// TODO: What's the equivalent for MLIR? Currently we're only using this for
+  /// void types so it just returns RValue::get(nullptr) but it'll need
+  /// addressed later.
+  RValue GetUndefRValue(clang::QualType Ty);
+
+  mlir::Value buildFromMemory(mlir::Value Value, clang::QualType Ty);
+
+  mlir::Type convertType(clang::QualType T);
+
+  mlir::LogicalResult buildAsmStmt(const clang::AsmStmt &S);
+
+  std::pair<mlir::Value, mlir::Type>
+  buildAsmInputLValue(const TargetInfo::ConstraintInfo &Info, LValue InputValue,
+                      QualType InputType, std::string &ConstraintStr,
+                      SourceLocation Loc);
+
+  std::pair<mlir::Value, mlir::Type>
+  buildAsmInput(const TargetInfo::ConstraintInfo &Info, const Expr *InputExpr,
+                std::string &ConstraintStr);
+
+  mlir::LogicalResult buildIfStmt(const clang::IfStmt &S);
+
+  mlir::LogicalResult buildReturnStmt(const clang::ReturnStmt &S);
+
+  mlir::LogicalResult buildGotoStmt(const clang::GotoStmt &S);
+
+  mlir::LogicalResult buildLabel(const clang::LabelDecl *D);
+  mlir::LogicalResult buildLabelStmt(const clang::LabelStmt &S);
+
+  mlir::LogicalResult buildAttributedStmt(const AttributedStmt &S);
+
+  mlir::LogicalResult buildBreakStmt(const clang::BreakStmt &S);
+  mlir::LogicalResult buildContinueStmt(const clang::ContinueStmt &S);
+
+  // OpenMP gen functions:
+  mlir::LogicalResult buildOMPParallelDirective(const OMPParallelDirective &S);
+  mlir::LogicalResult buildOMPTaskwaitDirective(const OMPTaskwaitDirective &S);
+  mlir::LogicalResult
+  buildOMPTaskyieldDirective(const OMPTaskyieldDirective &S);
+  mlir::LogicalResult buildOMPBarrierDirective(const OMPBarrierDirective &S);
+
+  LValue buildOpaqueValueLValue(const OpaqueValueExpr *e);
+
+  /// Emit code to compute a designator that specifies the location
+  /// of the expression.
+  /// FIXME: document this function better.
+  LValue buildLValue(const clang::Expr *E);
+
+  void buildDecl(const clang::Decl &D);
+
+  /// If the specified expression does not fold to a constant, or if it does but
+  /// contains a label, return false.  If it constant folds return true and set
+  /// the boolean result in Result.
+  bool ConstantFoldsToSimpleInteger(const clang::Expr *Cond, bool &ResultBool,
+                                    bool AllowLabels = false);
+  bool ConstantFoldsToSimpleInteger(const clang::Expr *Cond,
+                                    llvm::APSInt &ResultInt,
+                                    bool AllowLabels = false);
+
+  /// Return true if the statement contains a label in it.  If
+  /// this statement is not executed normally, it not containing a label means
+  /// that we can just remove the code.
+  bool ContainsLabel(const clang::Stmt *S, bool IgnoreCaseStmts = false);
+
+  /// Emit an if on a boolean condition to the specified blocks.
+  /// FIXME: Based on the condition, this might try to simplify the codegen of
+  /// the conditional based on the branch. TrueCount should be the number of
+  /// times we expect the condition to evaluate to true based on PGO data. We
+  /// might decide to leave this as a separate pass (see EmitBranchOnBoolExpr
+  /// for extra ideas).
+  mlir::LogicalResult buildIfOnBoolExpr(const clang::Expr *cond,
+                                        const clang::Stmt *thenS,
+                                        const clang::Stmt *elseS);
+  mlir::cir::IfOp buildIfOnBoolExpr(
+      const clang::Expr *cond,
+      llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)> thenBuilder,
+      mlir::Location thenLoc,
+      llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)> elseBuilder,
+      std::optional<mlir::Location> elseLoc = {});
+  mlir::Value buildTernaryOnBoolExpr(const clang::Expr *cond,
+                                     mlir::Location loc,
+                                     const clang::Stmt *thenS,
+                                     const clang::Stmt *elseS);
+  mlir::Value buildOpOnBoolExpr(mlir::Location loc, const clang::Expr *cond);
+
+  class ConstantEmission {
+    // Cannot use mlir::TypedAttr directly here because of bit availability.
+    llvm::PointerIntPair<mlir::Attribute, 1, bool> ValueAndIsReference;
+    ConstantEmission(mlir::TypedAttr C, bool isReference)
+        : ValueAndIsReference(C, isReference) {}
+
+  public:
+    ConstantEmission() {}
+    static ConstantEmission forReference(mlir::TypedAttr C) {
+      return ConstantEmission(C, true);
+    }
+    static ConstantEmission forValue(mlir::TypedAttr C) {
+      return ConstantEmission(C, false);
+    }
+
+    explicit operator bool() const {
+      return ValueAndIsReference.getOpaqueValue() != nullptr;
+    }
+
+    bool isReference() const { return ValueAndIsReference.getInt(); }
+    LValue getReferenceLValue(CIRGenFunction &CGF, Expr *refExpr) const {
+      assert(isReference());
+      // create<mlir::cir::ConstantOp>(loc, ty, getZeroAttr(ty));
+      // CGF.getBuilder().const
+      // return CGF.MakeNaturalAlignAddrLValue(ValueAndIsReference.getPointer(),
+      //                                       refExpr->getType());
+      llvm_unreachable("NYI");
+    }
+
+    mlir::TypedAttr getValue() const {
+      assert(!isReference());
+      return mlir::cast<mlir::TypedAttr>(ValueAndIsReference.getPointer());
+    }
+  };
+
+  ConstantEmission tryEmitAsConstant(DeclRefExpr *refExpr);
+  ConstantEmission tryEmitAsConstant(const MemberExpr *ME);
+
+  /// Emit the computation of the specified expression of scalar type,
+  /// ignoring the result.
+  mlir::Value buildScalarExpr(const clang::Expr *E);
+  mlir::Value buildScalarConstant(const ConstantEmission &Constant, Expr *E);
+
+  mlir::Value buildPromotedComplexExpr(const Expr *E, QualType PromotionType);
+  mlir::Value buildPromotedScalarExpr(const clang::Expr *E,
+                                      QualType PromotionType);
+  mlir::Value buildPromotedValue(mlir::Value result, QualType PromotionType);
+  mlir::Value buildUnPromotedValue(mlir::Value result, QualType PromotionType);
+
+  mlir::Type getCIRType(const clang::QualType &type);
+
+  const CaseStmt *foldCaseStmt(const clang::CaseStmt &S, mlir::Type condType,
+                               SmallVector<mlir::Attribute, 4> &caseAttrs);
+
+  template <typename T>
+  mlir::LogicalResult
+  buildCaseDefaultCascade(const T *stmt, mlir::Type condType,
+                          SmallVector<mlir::Attribute, 4> &caseAttrs);
+
+  mlir::LogicalResult buildCaseStmt(const clang::CaseStmt &S,
+                                    mlir::Type condType,
+                                    SmallVector<mlir::Attribute, 4> &caseAttrs);
+
+  mlir::LogicalResult
+  buildDefaultStmt(const clang::DefaultStmt &S, mlir::Type condType,
+                   SmallVector<mlir::Attribute, 4> &caseAttrs);
+
+  mlir::LogicalResult buildSwitchCase(const clang::SwitchCase &S);
+
+  mlir::LogicalResult buildSwitchBody(const clang::Stmt *S);
+
+  mlir::cir::FuncOp generateCode(clang::GlobalDecl GD, mlir::cir::FuncOp Fn,
+                                 const CIRGenFunctionInfo &FnInfo);
+
+  clang::QualType buildFunctionArgList(clang::GlobalDecl GD,
+                                       FunctionArgList &Args);
+  struct AutoVarEmission {
+    const clang::VarDecl *Variable;
+    /// The address of the alloca for languages with explicit address space
+    /// (e.g. OpenCL) or alloca casted to generic pointer for address space
+    /// agnostic languages (e.g. C++). Invalid if the variable was emitted
+    /// as a global constant.
+    Address Addr;
+
+    /// True if the variable is of aggregate type and has a constant
+    /// initializer.
+    bool IsConstantAggregate = false;
+
+    /// True if the variable is a __block variable that is captured by an
+    /// escaping block.
+    bool IsEscapingByRef = false;
+
+    mlir::Value NRVOFlag{};
+
+    struct Invalid {};
+    AutoVarEmission(Invalid) : Variable(nullptr), Addr(Address::invalid()) {}
+
+    AutoVarEmission(const clang::VarDecl &variable)
+        : Variable(&variable), Addr(Address::invalid()) {}
+
+    static AutoVarEmission invalid() { return AutoVarEmission(Invalid()); }
+
+    bool wasEmittedAsGlobal() const { return !Addr.isValid(); }
+
+    /// Returns the raw, allocated address, which is not necessarily
+    /// the address of the object itself. It is casted to default
+    /// address space for address space agnostic languages.
+    Address getAllocatedAddress() const { return Addr; }
+
+    /// Returns the address of the object within this declaration.
+    /// Note that this does not chase the forwarding pointer for
+    /// __block decls.
+    Address getObjectAddress(CIRGenFunction &CGF) const {
+      if (!IsEscapingByRef)
+        return Addr;
+
+      llvm_unreachable("NYI");
+    }
+  };
+
+  LValue buildMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E);
+
+  /// Emit the alloca and debug information for a
+  /// local variable.  Does not emit initialization or destruction.
+  AutoVarEmission buildAutoVarAlloca(const clang::VarDecl &D,
+                                     mlir::OpBuilder::InsertPoint = {});
+
+  void buildAutoVarInit(const AutoVarEmission &emission);
+  void buildAutoVarCleanups(const AutoVarEmission &emission);
+  void buildAutoVarTypeCleanup(const AutoVarEmission &emission,
+                               clang::QualType::DestructionKind dtorKind);
+
+  void buildStoreOfScalar(mlir::Value value, LValue lvalue);
+  void buildStoreOfScalar(mlir::Value value, Address addr, bool isVolatile,
+                          clang::QualType ty, LValueBaseInfo baseInfo,
+                          TBAAAccessInfo tbaaInfo, bool isInit = false,
+                          bool isNontemporal = false);
+  void buildStoreOfScalar(mlir::Value value, Address addr, bool isVolatile,
+                          QualType ty,
+                          AlignmentSource source = AlignmentSource::Type,
+                          bool isInit = false, bool isNontemporal = false) {
+    buildStoreOfScalar(value, addr, isVolatile, ty, LValueBaseInfo(source),
+                       CGM.getTBAAAccessInfo(ty), isInit, isNontemporal);
+  }
+  void buildStoreOfScalar(mlir::Value value, LValue lvalue, bool isInit);
+
+  mlir::Value buildToMemory(mlir::Value Value, clang::QualType Ty);
+  void buildDeclRefExprDbgValue(const DeclRefExpr *E, const APValue &Init);
+
+  /// Store the specified rvalue into the specified
+  /// lvalue, where both are guaranteed to the have the same type, and that type
+  /// is 'Ty'.
+  void buildStoreThroughLValue(RValue Src, LValue Dst, bool isInit = false);
+
+  void buildStoreThroughBitfieldLValue(RValue Src, LValue Dst,
+                                       mlir::Value &Result);
+
+  mlir::cir::BrOp buildBranchThroughCleanup(mlir::Location Loc, JumpDest Dest);
+
+  /// Given an assignment `*LHS = RHS`, emit a test that checks if \p RHS is
+  /// nonnull, if 1\p LHS is marked _Nonnull.
+  void buildNullabilityCheck(LValue LHS, mlir::Value RHS,
+                             clang::SourceLocation Loc);
+
+  /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to
+  /// detect undefined behavior when the pointer overflow sanitizer is enabled.
+  /// \p SignedIndices indicates whether any of the GEP indices are signed.
+  /// \p IsSubtraction indicates whether the expression used to form the GEP
+  /// is a subtraction.
+  mlir::Value buildCheckedInBoundsGEP(mlir::Type ElemTy, mlir::Value Ptr,
+                                      ArrayRef<mlir::Value> IdxList,
+                                      bool SignedIndices, bool IsSubtraction,
+                                      SourceLocation Loc);
+
+  void buildScalarInit(const clang::Expr *init, mlir::Location loc,
+                       LValue lvalue, bool capturedByInit = false);
+
+  LValue buildDeclRefLValue(const clang::DeclRefExpr *E);
+  LValue buildExtVectorElementExpr(const ExtVectorElementExpr *E);
+  LValue buildBinaryOperatorLValue(const clang::BinaryOperator *E);
+  LValue buildCompoundAssignmentLValue(const clang::CompoundAssignOperator *E);
+  LValue buildUnaryOpLValue(const clang::UnaryOperator *E);
+  LValue buildStringLiteralLValue(const StringLiteral *E);
+  RValue buildBuiltinExpr(const clang::GlobalDecl GD, unsigned BuiltinID,
+                          const clang::CallExpr *E,
+                          ReturnValueSlot ReturnValue);
+  RValue buildRotate(const CallExpr *E, bool IsRotateRight);
+  mlir::Value buildTargetBuiltinExpr(unsigned BuiltinID,
+                                     const clang::CallExpr *E,
+                                     ReturnValueSlot ReturnValue);
+
+  // Target specific builtin emission
+  mlir::Value buildScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
+                                           const CallExpr *E);
+  mlir::Value buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                      ReturnValueSlot ReturnValue,
+                                      llvm::Triple::ArchType Arch);
+  mlir::Value buildAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  mlir::Value buildAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  mlir::Value buildX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+
+  /// Given an expression with a pointer type, emit the value and compute our
+  /// best estimate of the alignment of the pointee.
+  ///
+  /// \param BaseInfo - If non-null, this will be initialized with
+  /// information about the source of the alignment and the may-alias
+  /// attribute.  Note that this function will conservatively fall back on
+  /// the type when it doesn't recognize the expression and may-alias will
+  /// be set to false.
+  ///
+  /// One reasonable way to use this information is when there's a language
+  /// guarantee that the pointer must be aligned to some stricter value, and
+  /// we're simply trying to ensure that sufficiently obvious uses of under-
+  /// aligned objects don't get miscompiled; for example, a placement new
+  /// into the address of a local variable.  In such a case, it's quite
+  /// reasonable to just ignore the returned alignment when it isn't from an
+  /// explicit source.
+  Address
+  buildPointerWithAlignment(const clang::Expr *expr,
+                            LValueBaseInfo *baseInfo = nullptr,
+                            TBAAAccessInfo *tbaaInfo = nullptr,
+                            KnownNonNull_t isKnownNonNull = NotKnownNonNull);
+
+  LValue
+  buildConditionalOperatorLValue(const AbstractConditionalOperator *expr);
+
+  /// Emit an expression as an initializer for an object (variable, field, etc.)
+  /// at the given location.  The expression is not necessarily the normal
+  /// initializer for the object, and the address is not necessarily
+  /// its normal location.
+  ///
+  /// \param init the initializing expression
+  /// \param D the object to act as if we're initializing
+  /// \param lvalue the lvalue to initialize
+  /// \param capturedByInit true if \p D is a __block variable whose address is
+  /// potentially changed by the initializer
+  void buildExprAsInit(const clang::Expr *init, const clang::ValueDecl *D,
+                       LValue lvalue, bool capturedByInit = false);
+
+  /// Emit code and set up symbol table for a variable declaration with auto,
+  /// register, or no storage class specifier. These turn into simple stack
+  /// objects, globals depending on target.
+  void buildAutoVarDecl(const clang::VarDecl &D);
+
+  /// This method handles emission of any variable declaration
+  /// inside a function, including static vars etc.
+  void buildVarDecl(const clang::VarDecl &D);
+
+  mlir::cir::GlobalOp
+  addInitializerToStaticVarDecl(const VarDecl &D, mlir::cir::GlobalOp GV,
+                                mlir::cir::GetGlobalOp GVAddr);
+
+  void buildStaticVarDecl(const VarDecl &D,
+                          mlir::cir::GlobalLinkageKind Linkage);
+
+  /// Perform the usual unary conversions on the specified
+  /// expression and compare the result against zero, returning an Int1Ty value.
+  mlir::Value evaluateExprAsBool(const clang::Expr *E);
+
+  void buildCtorPrologue(const clang::CXXConstructorDecl *CD,
+                         clang::CXXCtorType Type, FunctionArgList &Args);
+  void buildConstructorBody(FunctionArgList &Args);
+  void buildDestructorBody(FunctionArgList &Args);
+  void buildCXXDestructorCall(const CXXDestructorDecl *D, CXXDtorType Type,
+                              bool ForVirtualBase, bool Delegating,
+                              Address This, QualType ThisTy);
+  RValue buildCXXDestructorCall(GlobalDecl Dtor, const CIRGenCallee &Callee,
+                                mlir::Value This, QualType ThisTy,
+                                mlir::Value ImplicitParam,
+                                QualType ImplicitParamTy, const CallExpr *E);
+
+  /// Enter the cleanups necessary to complete the given phase of destruction
+  /// for a destructor. The end result should call destructors on members and
+  /// base classes in reverse order of their construction.
+  void EnterDtorCleanups(const CXXDestructorDecl *Dtor, CXXDtorType Type);
+
+  /// Determines whether an EH cleanup is required to destroy a type
+  /// with the given destruction kind.
+  /// TODO(cir): could be shared with Clang LLVM codegen
+  bool needsEHCleanup(QualType::DestructionKind kind) {
+    switch (kind) {
+    case QualType::DK_none:
+      return false;
+    case QualType::DK_cxx_destructor:
+    case QualType::DK_objc_weak_lifetime:
+    case QualType::DK_nontrivial_c_struct:
+      return getLangOpts().Exceptions;
+    case QualType::DK_objc_strong_lifetime:
+      return getLangOpts().Exceptions &&
+             CGM.getCodeGenOpts().ObjCAutoRefCountExceptions;
+    }
+    llvm_unreachable("bad destruction kind");
+  }
+
+  CleanupKind getCleanupKind(QualType::DestructionKind kind) {
+    return (needsEHCleanup(kind) ? NormalAndEHCleanup : NormalCleanup);
+  }
+
+  void pushEHDestroy(QualType::DestructionKind dtorKind, Address addr,
+                     QualType type);
+
+  void pushStackRestore(CleanupKind kind, Address SPMem);
+
+  static bool
+  IsConstructorDelegationValid(const clang::CXXConstructorDecl *Ctor);
+
+  struct VPtr {
+    clang::BaseSubobject Base;
+    const clang::CXXRecordDecl *NearestVBase;
+    clang::CharUnits OffsetFromNearestVBase;
+    const clang::CXXRecordDecl *VTableClass;
+  };
+
+  using VisitedVirtualBasesSetTy =
+      llvm::SmallPtrSet<const clang::CXXRecordDecl *, 4>;
+
+  using VPtrsVector = llvm::SmallVector<VPtr, 4>;
+  VPtrsVector getVTablePointers(const clang::CXXRecordDecl *VTableClass);
+  void getVTablePointers(clang::BaseSubobject Base,
+                         const clang::CXXRecordDecl *NearestVBase,
+                         clang::CharUnits OffsetFromNearestVBase,
+                         bool BaseIsNonVirtualPrimaryBase,
+                         const clang::CXXRecordDecl *VTableClass,
+                         VisitedVirtualBasesSetTy &VBases, VPtrsVector &vptrs);
+  /// Return the Value of the vtable pointer member pointed to by This.
+  mlir::Value getVTablePtr(mlir::Location Loc, Address This,
+                           mlir::Type VTableTy,
+                           const CXXRecordDecl *VTableClass);
+
+  /// Returns whether we should perform a type checked load when loading a
+  /// virtual function for virtual calls to members of RD. This is generally
+  /// true when both vcall CFI and whole-program-vtables are enabled.
+  bool shouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD);
+
+  /// If whole-program virtual table optimization is enabled, emit an assumption
+  /// that VTable is a member of RD's type identifier. Or, if vptr CFI is
+  /// enabled, emit a check that VTable is a member of RD's type identifier.
+  void buildTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
+                                     mlir::Value VTable, SourceLocation Loc);
+
+  /// Return the VTT parameter that should be passed to a base
+  /// constructor/destructor with virtual bases.
+  /// FIXME: VTTs are Itanium ABI-specific, so the definition should move
+  /// to CIRGenItaniumCXXABI.cpp together with all the references to VTT.
+  mlir::Value GetVTTParameter(GlobalDecl GD, bool ForVirtualBase,
+                              bool Delegating);
+
+  /// Source location information about the default argument or member
+  /// initializer expression we're evaluating, if any.
+  clang::CurrentSourceLocExprScope CurSourceLocExprScope;
+  using SourceLocExprScopeGuard =
+      clang::CurrentSourceLocExprScope::SourceLocExprScopeGuard;
+
+  /// A scoep within which we are constructing the fields of an object which
+  /// might use a CXXDefaultInitExpr. This stashes away a 'this' value to use if
+  /// we need to evaluate the CXXDefaultInitExpr within the evaluation.
+  class FieldConstructionScope {
+  public:
+    FieldConstructionScope(CIRGenFunction &CGF, Address This)
+        : CGF(CGF), OldCXXDefaultInitExprThis(CGF.CXXDefaultInitExprThis) {
+      CGF.CXXDefaultInitExprThis = This;
+    }
+    ~FieldConstructionScope() {
+      CGF.CXXDefaultInitExprThis = OldCXXDefaultInitExprThis;
+    }
+
+  private:
+    CIRGenFunction &CGF;
+    Address OldCXXDefaultInitExprThis;
+  };
+
+  /// The scope of a CXXDefaultInitExpr. Within this scope, the value of 'this'
+  /// is overridden to be the object under construction.
+  class CXXDefaultInitExprScope {
+  public:
+    CXXDefaultInitExprScope(CIRGenFunction &CGF,
+                            const clang::CXXDefaultInitExpr *E)
+        : CGF{CGF}, OldCXXThisValue(CGF.CXXThisValue),
+          OldCXXThisAlignment(CGF.CXXThisAlignment),
+          SourceLocScope(E, CGF.CurSourceLocExprScope) {
+      CGF.CXXThisValue = CGF.CXXDefaultInitExprThis.getPointer();
+      CGF.CXXThisAlignment = CGF.CXXDefaultInitExprThis.getAlignment();
+    }
+    ~CXXDefaultInitExprScope() {
+      CGF.CXXThisValue = OldCXXThisValue;
+      CGF.CXXThisAlignment = OldCXXThisAlignment;
+    }
+
+  public:
+    CIRGenFunction &CGF;
+    mlir::Value OldCXXThisValue;
+    clang::CharUnits OldCXXThisAlignment;
+    SourceLocExprScopeGuard SourceLocScope;
+  };
+
+  struct CXXDefaultArgExprScope : SourceLocExprScopeGuard {
+    CXXDefaultArgExprScope(CIRGenFunction &CGF, const CXXDefaultArgExpr *E)
+        : SourceLocExprScopeGuard(E, CGF.CurSourceLocExprScope) {}
+  };
+
+  LValue MakeNaturalAlignPointeeAddrLValue(mlir::Value V, clang::QualType T);
+  LValue MakeNaturalAlignAddrLValue(mlir::Value val, QualType ty);
+
+  /// Construct an address with the natural alignment of T. If a pointer to T
+  /// is expected to be signed, the pointer passed to this function must have
+  /// been signed, and the returned Address will have the pointer authentication
+  /// information needed to authenticate the signed pointer.
+  Address makeNaturalAddressForPointer(
+      mlir::Value ptr, QualType t, CharUnits alignment = CharUnits::Zero(),
+      bool forPointeeType = false, LValueBaseInfo *baseInfo = nullptr,
+      TBAAAccessInfo *tbaaInfo = nullptr,
+      KnownNonNull_t isKnownNonNull = NotKnownNonNull) {
+    if (alignment.isZero())
+      alignment =
+          CGM.getNaturalTypeAlignment(t, baseInfo, tbaaInfo, forPointeeType);
+    return Address(ptr, convertTypeForMem(t), alignment, isKnownNonNull);
+  }
+
+  /// Load the value for 'this'. This function is only valid while generating
+  /// code for an C++ member function.
+  /// FIXME(cir): this should return a mlir::Value!
+  mlir::Value LoadCXXThis() {
+    assert(CXXThisValue && "no 'this' value for this function");
+    return CXXThisValue;
+  }
+  Address LoadCXXThisAddress();
+
+  /// Convert the given pointer to a complete class to the given direct base.
+  Address getAddressOfDirectBaseInCompleteClass(mlir::Location loc,
+                                                Address Value,
+                                                const CXXRecordDecl *Derived,
+                                                const CXXRecordDecl *Base,
+                                                bool BaseIsVirtual);
+
+  Address getAddressOfBaseClass(Address Value, const CXXRecordDecl *Derived,
+                                CastExpr::path_const_iterator PathBegin,
+                                CastExpr::path_const_iterator PathEnd,
+                                bool NullCheckValue, SourceLocation Loc);
+
+  /// Emit code for the start of a function.
+  /// \param Loc       The location to be associated with the function.
+  /// \param StartLoc  The location of the function body.
+  void StartFunction(clang::GlobalDecl GD, clang::QualType RetTy,
+                     mlir::cir::FuncOp Fn, const CIRGenFunctionInfo &FnInfo,
+                     const FunctionArgList &Args, clang::SourceLocation Loc,
+                     clang::SourceLocation StartLoc);
+
+  /// Emit a conversion from the specified type to the specified destination
+  /// type, both of which are CIR scalar types.
+  mlir::Value buildScalarConversion(mlir::Value Src, clang::QualType SrcTy,
+                                    clang::QualType DstTy,
+                                    clang::SourceLocation Loc);
+
+  /// Emit a conversion from the specified complex type to the specified
+  /// destination type, where the destination type is an LLVM scalar type.
+  mlir::Value buildComplexToScalarConversion(mlir::Value Src, QualType SrcTy,
+                                             QualType DstTy,
+                                             SourceLocation Loc);
+
+  LValue makeAddrLValue(Address addr, clang::QualType ty,
+                        LValueBaseInfo baseInfo) {
+    return LValue::makeAddr(addr, ty, getContext(), baseInfo,
+                            CGM.getTBAAAccessInfo(ty));
+  }
+
+  LValue makeAddrLValue(Address addr, clang::QualType ty,
+                        AlignmentSource source = AlignmentSource::Type) {
+    return LValue::makeAddr(addr, ty, getContext(), LValueBaseInfo(source),
+                            CGM.getTBAAAccessInfo(ty));
+  }
+
+  void initializeVTablePointers(mlir::Location loc,
+                                const clang::CXXRecordDecl *RD);
+  void initializeVTablePointer(mlir::Location loc, const VPtr &Vptr);
+
+  AggValueSlot::Overlap_t getOverlapForFieldInit(const FieldDecl *FD);
+  LValue buildLValueForField(LValue Base, const clang::FieldDecl *Field);
+  LValue buildLValueForBitField(LValue base, const FieldDecl *field);
+
+  /// Like buildLValueForField, excpet that if the Field is a reference, this
+  /// will return the address of the reference and not the address of the value
+  /// stored in the reference.
+  LValue buildLValueForFieldInitialization(LValue Base,
+                                           const clang::FieldDecl *Field,
+                                           llvm::StringRef FieldName);
+
+  void buildInitializerForField(clang::FieldDecl *Field, LValue LHS,
+                                clang::Expr *Init);
+
+  /// Determine whether the given initializer is trivial in the sense
+  /// that it requires no code to be generated.
+  bool isTrivialInitializer(const clang::Expr *Init);
+
+  // TODO: this can also be abstrated into common AST helpers
+  bool hasBooleanRepresentation(clang::QualType Ty);
+
+  void buildCXXThrowExpr(const CXXThrowExpr *E);
+
+  RValue buildAtomicExpr(AtomicExpr *E);
+  void buildAtomicStore(RValue rvalue, LValue lvalue, bool isInit);
+  void buildAtomicStore(RValue rvalue, LValue lvalue, mlir::cir::MemOrder MO,
+                        bool IsVolatile, bool isInit);
+  void buildAtomicInit(Expr *init, LValue dest);
+
+  /// Return the address of a local variable.
+  Address GetAddrOfLocalVar(const clang::VarDecl *VD) {
+    auto it = LocalDeclMap.find(VD);
+    assert(it != LocalDeclMap.end() &&
+           "Invalid argument to GetAddrOfLocalVar(), no decl!");
+    return it->second;
+  }
+
+  Address getAddrOfBitFieldStorage(LValue base, const clang::FieldDecl *field,
+                                   mlir::Type fieldType, unsigned index);
+
+  /// Given an opaque value expression, return its LValue mapping if it exists,
+  /// otherwise create one.
+  LValue getOrCreateOpaqueLValueMapping(const OpaqueValueExpr *e);
+
+  /// Given an opaque value expression, return its RValue mapping if it exists,
+  /// otherwise create one.
+  RValue getOrCreateOpaqueRValueMapping(const OpaqueValueExpr *e);
+
+  /// Check if \p E is a C++ "this" pointer wrapped in value-preserving casts.
+  static bool isWrappedCXXThis(const clang::Expr *E);
+
+  void buildDelegateCXXConstructorCall(const clang::CXXConstructorDecl *Ctor,
+                                       clang::CXXCtorType CtorType,
+                                       const FunctionArgList &Args,
+                                       clang::SourceLocation Loc);
+
+  // It's important not to confuse this and the previous function. Delegating
+  // constructors are the C++11 feature. The constructor delegate optimization
+  // is used to reduce duplication in the base and complete constructors where
+  // they are substantially the same.
+  void buildDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor,
+                                         const FunctionArgList &Args);
+
+  /// We are performing a delegate call; that is, the current function is
+  /// delegating to another one. Produce a r-value suitable for passing the
+  /// given parameter.
+  void buildDelegateCallArg(CallArgList &args, const clang::VarDecl *param,
+                            clang::SourceLocation loc);
+
+  /// Return true if the current function should not be instrumented with
+  /// sanitizers.
+  bool ShouldSkipSanitizerInstrumentation();
+  bool ShouldXRayInstrumentFunction() const;
+
+  /// Return true if the current function should be instrumented with
+  /// __cyg_profile_func_* calls
+  bool ShouldInstrumentFunction();
+
+  /// TODO(cir): add TBAAAccessInfo
+  Address buildArrayToPointerDecay(const Expr *Array,
+                                   LValueBaseInfo *BaseInfo = nullptr);
+
+  /// Emits the code necessary to evaluate an arbitrary expression into the
+  /// given memory location.
+  void buildAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals,
+                         bool IsInitializer);
+  void buildAnyExprToExn(const Expr *E, Address Addr);
+
+  LValue buildCheckedLValue(const Expr *E, TypeCheckKind TCK);
+  LValue buildMemberExpr(const MemberExpr *E);
+  LValue buildCompoundLiteralLValue(const CompoundLiteralExpr *E);
+
+  /// Specifies which type of sanitizer check to apply when handling a
+  /// particular builtin.
+  enum BuiltinCheckKind {
+    BCK_CTZPassedZero,
+    BCK_CLZPassedZero,
+  };
+
+  /// Emits an argument for a call to a builtin. If the builtin sanitizer is
+  /// enabled, a runtime check specified by \p Kind is also emitted.
+  mlir::Value buildCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind);
+
+  /// returns true if aggregate type has a volatile member.
+  /// TODO(cir): this could be a common AST helper between LLVM / CIR.
+  bool hasVolatileMember(QualType T) {
+    if (const RecordType *RT = T->getAs<RecordType>()) {
+      const RecordDecl *RD = cast<RecordDecl>(RT->getDecl());
+      return RD->hasVolatileMember();
+    }
+    return false;
+  }
+
+  /// Emit an aggregate assignment.
+  void buildAggregateAssign(LValue Dest, LValue Src, QualType EltTy) {
+    bool IsVolatile = hasVolatileMember(EltTy);
+    buildAggregateCopy(Dest, Src, EltTy, AggValueSlot::MayOverlap, IsVolatile);
+  }
+
+  LValue buildAggExprToLValue(const Expr *E);
+
+  /// Emit an aggregate copy.
+  ///
+  /// \param isVolatile \c true iff either the source or the destination is
+  ///        volatile.
+  /// \param MayOverlap Whether the tail padding of the destination might be
+  ///        occupied by some other object. More efficient code can often be
+  ///        generated if not.
+  void buildAggregateCopy(LValue Dest, LValue Src, QualType EltTy,
+                          AggValueSlot::Overlap_t MayOverlap,
+                          bool isVolatile = false);
+
+  /// Emit a reached-unreachable diagnostic if \p Loc is valid and runtime
+  /// checking is enabled. Otherwise, just emit an unreachable instruction.
+  void buildUnreachable(SourceLocation Loc);
+
+  ///
+  /// Cleanups
+  /// --------
+
+  /// Header for data within LifetimeExtendedCleanupStack.
+  struct LifetimeExtendedCleanupHeader {
+    /// The size of the following cleanup object.
+    unsigned Size;
+    /// The kind of cleanup to push: a value from the CleanupKind enumeration.
+    unsigned Kind : 31;
+    /// Whether this is a conditional cleanup.
+    unsigned IsConditional : 1;
+
+    size_t getSize() const { return Size; }
+    CleanupKind getKind() const { return (CleanupKind)Kind; }
+    bool isConditional() const { return IsConditional; }
+  };
+
+  /// Emits try/catch information for the current EH stack.
+  mlir::cir::CallOp callWithExceptionCtx = nullptr;
+  mlir::Operation *buildLandingPad(mlir::cir::TryOp tryOp);
+  mlir::Block *getEHResumeBlock(bool isCleanup, mlir::cir::TryOp tryOp);
+  mlir::Block *getEHDispatchBlock(EHScopeStack::stable_iterator scope,
+                                  mlir::cir::TryOp tryOp);
+  /// Unified block containing a call to cir.resume
+  mlir::Block *ehResumeBlock = nullptr;
+  llvm::DenseMap<mlir::Block *, mlir::Block *> cleanupsToPatch;
+
+  /// The cleanup depth enclosing all the cleanups associated with the
+  /// parameters.
+  EHScopeStack::stable_iterator PrologueCleanupDepth;
+
+  mlir::Operation *getInvokeDestImpl(mlir::cir::TryOp tryOp);
+  mlir::Operation *getInvokeDest(mlir::cir::TryOp tryOp) {
+    if (!EHStack.requiresLandingPad())
+      return nullptr;
+    // Return the respective cir.try, this can be used to compute
+    // any other relevant information.
+    return getInvokeDestImpl(tryOp);
+  }
+  bool isInvokeDest();
+
+  /// Takes the old cleanup stack size and emits the cleanup blocks
+  /// that have been added.
+  void
+  PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize,
+                   std::initializer_list<mlir::Value *> ValuesToReload = {});
+
+  /// Takes the old cleanup stack size and emits the cleanup blocks
+  /// that have been added, then adds all lifetime-extended cleanups from
+  /// the given position to the stack.
+  void
+  PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize,
+                   size_t OldLifetimeExtendedStackSize,
+                   std::initializer_list<mlir::Value *> ValuesToReload = {});
+
+  /// Will pop the cleanup entry on the stack and process all branch fixups.
+  void PopCleanupBlock(bool FallThroughIsBranchThrough = false);
+
+  /// Deactivates the given cleanup block. The block cannot be reactivated. Pops
+  /// it if it's the top of the stack.
+  ///
+  /// \param DominatingIP - An instruction which is known to
+  ///   dominate the current IP (if set) and which lies along
+  ///   all paths of execution between the current IP and the
+  ///   the point at which the cleanup comes into scope.
+  void DeactivateCleanupBlock(EHScopeStack::stable_iterator Cleanup,
+                              mlir::Operation *DominatingIP);
+
+  typedef void Destroyer(CIRGenFunction &CGF, Address addr, QualType ty);
+
+  static Destroyer destroyCXXObject;
+
+  void pushDestroy(QualType::DestructionKind dtorKind, Address addr,
+                   QualType type);
+
+  void pushDestroy(CleanupKind kind, Address addr, QualType type,
+                   Destroyer *destroyer, bool useEHCleanupForArray);
+
+  Destroyer *getDestroyer(QualType::DestructionKind kind);
+
+  void emitDestroy(Address addr, QualType type, Destroyer *destroyer,
+                   bool useEHCleanupForArray);
+
+  /// An object to manage conditionally-evaluated expressions.
+  class ConditionalEvaluation {
+    mlir::OpBuilder::InsertPoint insertPt;
+
+  public:
+    ConditionalEvaluation(CIRGenFunction &CGF)
+        : insertPt(CGF.builder.saveInsertionPoint()) {}
+    ConditionalEvaluation(mlir::OpBuilder::InsertPoint ip) : insertPt(ip) {}
+
+    void begin(CIRGenFunction &CGF) {
+      assert(CGF.OutermostConditional != this);
+      if (!CGF.OutermostConditional)
+        CGF.OutermostConditional = this;
+    }
+
+    void end(CIRGenFunction &CGF) {
+      assert(CGF.OutermostConditional != nullptr);
+      if (CGF.OutermostConditional == this)
+        CGF.OutermostConditional = nullptr;
+    }
+
+    /// Returns the insertion point which will be executed prior to each
+    /// evaluation of the conditional code. In LLVM OG, this method
+    /// is called getStartingBlock.
+    mlir::OpBuilder::InsertPoint getInsertPoint() const { return insertPt; }
+  };
+
+  struct ConditionalInfo {
+    std::optional<LValue> LHS{}, RHS{};
+    mlir::Value Result{};
+  };
+
+  template <typename FuncTy>
+  ConditionalInfo buildConditionalBlocks(const AbstractConditionalOperator *E,
+                                         const FuncTy &BranchGenFunc);
+
+  // Return true if we're currently emitting one branch or the other of a
+  // conditional expression.
+  bool isInConditionalBranch() const { return OutermostConditional != nullptr; }
+
+  void setBeforeOutermostConditional(mlir::Value value, Address addr) {
+    assert(isInConditionalBranch());
+    {
+      mlir::OpBuilder::InsertionGuard guard(builder);
+      builder.restoreInsertionPoint(OutermostConditional->getInsertPoint());
+      builder.createStore(
+          value.getLoc(), value, addr,
+          /*volatile*/ false,
+          mlir::IntegerAttr::get(
+              mlir::IntegerType::get(value.getContext(), 64),
+              (uint64_t)addr.getAlignment().getAsAlign().value()));
+    }
+  }
+
+  void pushIrregularPartialArrayCleanup(mlir::Value arrayBegin,
+                                        Address arrayEndPointer,
+                                        QualType elementType,
+                                        CharUnits elementAlign,
+                                        Destroyer *destroyer);
+  void pushRegularPartialArrayCleanup(mlir::Value arrayBegin,
+                                      mlir::Value arrayEnd,
+                                      QualType elementType,
+                                      CharUnits elementAlign,
+                                      Destroyer *destroyer);
+  void pushDestroyAndDeferDeactivation(QualType::DestructionKind dtorKind,
+                                       Address addr, QualType type);
+  void pushDestroyAndDeferDeactivation(CleanupKind cleanupKind, Address addr,
+                                       QualType type, Destroyer *destroyer,
+                                       bool useEHCleanupForArray);
+  void buildArrayDestroy(mlir::Value begin, mlir::Value end,
+                         QualType elementType, CharUnits elementAlign,
+                         Destroyer *destroyer, bool checkZeroLength,
+                         bool useEHCleanup);
+
+  // Points to the outermost active conditional control. This is used so that
+  // we know if a temporary should be destroyed conditionally.
+  ConditionalEvaluation *OutermostConditional = nullptr;
+
+  template <class T>
+  typename DominatingValue<T>::saved_type saveValueInCond(T value) {
+    return DominatingValue<T>::save(*this, value);
+  }
+
+  /// Push a cleanup to be run at the end of the current full-expression.  Safe
+  /// against the possibility that we're currently inside a
+  /// conditionally-evaluated expression.
+  template <class T, class... As>
+  void pushFullExprCleanup(CleanupKind kind, As... A) {
+    // If we're not in a conditional branch, or if none of the
+    // arguments requires saving, then use the unconditional cleanup.
+    if (!isInConditionalBranch())
+      return EHStack.pushCleanup<T>(kind, A...);
+
+    // Stash values in a tuple so we can guarantee the order of saves.
+    typedef std::tuple<typename DominatingValue<As>::saved_type...> SavedTuple;
+    SavedTuple Saved{saveValueInCond(A)...};
+
+    typedef EHScopeStack::ConditionalCleanup<T, As...> CleanupType;
+    EHStack.pushCleanupTuple<CleanupType>(kind, Saved);
+    initFullExprCleanup();
+  }
+
+  /// Set up the last cleanup that was pushed as a conditional
+  /// full-expression cleanup.
+  void initFullExprCleanup() {
+    initFullExprCleanupWithFlag(createCleanupActiveFlag());
+  }
+
+  void initFullExprCleanupWithFlag(Address ActiveFlag);
+  Address createCleanupActiveFlag();
+
+  /// Enters a new scope for capturing cleanups, all of which
+  /// will be executed once the scope is exited.
+  class RunCleanupsScope {
+    EHScopeStack::stable_iterator CleanupStackDepth, OldCleanupScopeDepth;
+    size_t LifetimeExtendedCleanupStackSize;
+    bool OldDidCallStackSave;
+
+  protected:
+    bool PerformCleanup;
+
+  private:
+    RunCleanupsScope(const RunCleanupsScope &) = delete;
+    void operator=(const RunCleanupsScope &) = delete;
+
+  protected:
+    CIRGenFunction &CGF;
+
+  public:
+    /// Enter a new cleanup scope.
+    explicit RunCleanupsScope(CIRGenFunction &CGF)
+        : PerformCleanup(true), CGF(CGF) {
+      CleanupStackDepth = CGF.EHStack.stable_begin();
+      LifetimeExtendedCleanupStackSize =
+          CGF.LifetimeExtendedCleanupStack.size();
+      OldDidCallStackSave = CGF.DidCallStackSave;
+      CGF.DidCallStackSave = false;
+      OldCleanupScopeDepth = CGF.CurrentCleanupScopeDepth;
+      CGF.CurrentCleanupScopeDepth = CleanupStackDepth;
+    }
+
+    /// Exit this cleanup scope, emitting any accumulated cleanups.
+    ~RunCleanupsScope() {
+      if (PerformCleanup)
+        ForceCleanup();
+    }
+
+    /// Determine whether this scope requires any cleanups.
+    bool requiresCleanups() const {
+      return CGF.EHStack.stable_begin() != CleanupStackDepth;
+    }
+
+    /// Force the emission of cleanups now, instead of waiting
+    /// until this object is destroyed.
+    /// \param ValuesToReload - A list of values that need to be available at
+    /// the insertion point after cleanup emission. If cleanup emission created
+    /// a shared cleanup block, these value pointers will be rewritten.
+    /// Otherwise, they not will be modified.
+    void
+    ForceCleanup(std::initializer_list<mlir::Value *> ValuesToReload = {}) {
+      assert(PerformCleanup && "Already forced cleanup");
+      CGF.DidCallStackSave = OldDidCallStackSave;
+      CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize,
+                           ValuesToReload);
+      PerformCleanup = false;
+      CGF.CurrentCleanupScopeDepth = OldCleanupScopeDepth;
+    }
+  };
+
+  // Cleanup stack depth of the RunCleanupsScope that was pushed most recently.
+  EHScopeStack::stable_iterator CurrentCleanupScopeDepth =
+      EHScopeStack::stable_end();
+
+  /// -------
+  /// Lexical Scope: to be read as in the meaning in CIR, a scope is always
+  /// related with initialization and destruction of objects.
+  /// -------
+
+public:
+  // Represents a cir.scope, cir.if, and then/else regions. I.e. lexical
+  // scopes that require cleanups.
+  struct LexicalScope : public RunCleanupsScope {
+  private:
+    // Block containing cleanup code for things initialized in this
+    // lexical context (scope).
+    mlir::Block *CleanupBlock = nullptr;
+
+    // Points to scope entry block. This is useful, for instance, for
+    // helping to insert allocas before finalizing any recursive codegen
+    // from switches.
+    mlir::Block *EntryBlock;
+
+    // On a coroutine body, the OnFallthrough sub stmt holds the handler
+    // (CoreturnStmt) for control flow falling off the body. Keep track
+    // of emitted co_return in this scope and allow OnFallthrough to be
+    // skipeed.
+    bool HasCoreturn = false;
+
+    LexicalScope *ParentScope = nullptr;
+
+    // Holds actual value for ScopeKind::Try
+    mlir::cir::TryOp tryOp = nullptr;
+
+    // FIXME: perhaps we can use some info encoded in operations.
+    enum Kind {
+      Regular,   // cir.if, cir.scope, if_regions
+      Ternary,   // cir.ternary
+      Switch,    // cir.switch
+      Try,       // cir.try
+      GlobalInit // cir.global initalization code
+    } ScopeKind = Regular;
+
+    // Track scope return value.
+    mlir::Value retVal = nullptr;
+
+  public:
+    unsigned Depth = 0;
+    bool HasReturn = false;
+
+    LexicalScope(CIRGenFunction &CGF, mlir::Location loc, mlir::Block *eb)
+        : RunCleanupsScope(CGF), EntryBlock(eb), ParentScope(CGF.currLexScope),
+          BeginLoc(loc), EndLoc(loc) {
+
+      CGF.currLexScope = this;
+      if (ParentScope)
+        Depth++;
+
+      // Has multiple locations: overwrite with separate start and end locs.
+      if (const auto fusedLoc = mlir::dyn_cast<mlir::FusedLoc>(loc)) {
+        assert(fusedLoc.getLocations().size() == 2 && "too many locations");
+        BeginLoc = fusedLoc.getLocations()[0];
+        EndLoc = fusedLoc.getLocations()[1];
+      }
+
+      assert(EntryBlock && "expected valid block");
+    }
+
+    void setRetVal(mlir::Value v) { retVal = v; }
+
+    void cleanup();
+    void restore() { CGF.currLexScope = ParentScope; }
+
+    ~LexicalScope() {
+      // EmitLexicalBlockEnd
+      assert(!MissingFeatures::generateDebugInfo());
+      // If we should perform a cleanup, force them now.  Note that
+      // this ends the cleanup scope before rescoping any labels.
+      cleanup();
+      restore();
+    }
+
+    /// Force the emission of cleanups now, instead of waiting
+    /// until this object is destroyed.
+    void ForceCleanup() {
+      RunCleanupsScope::ForceCleanup();
+      // TODO(cir): something akin to rescopeLabels if it makes sense to CIR.
+    }
+
+    // ---
+    // Coroutine tracking
+    // ---
+    bool hasCoreturn() const { return HasCoreturn; }
+    void setCoreturn() { HasCoreturn = true; }
+
+    // ---
+    // Kind
+    // ---
+    bool isGlobalInit() { return ScopeKind == Kind::GlobalInit; }
+    bool isRegular() { return ScopeKind == Kind::Regular; }
+    bool isSwitch() { return ScopeKind == Kind::Switch; }
+    bool isTernary() { return ScopeKind == Kind::Ternary; }
+    bool isTry() { return ScopeKind == Kind::Try; }
+    mlir::cir::TryOp getTry() {
+      assert(isTry());
+      return tryOp;
+    }
+    mlir::cir::TryOp getClosestTryParent();
+
+    void setAsGlobalInit() { ScopeKind = Kind::GlobalInit; }
+    void setAsSwitch() { ScopeKind = Kind::Switch; }
+    void setAsTernary() { ScopeKind = Kind::Ternary; }
+    void setAsTry(mlir::cir::TryOp op) {
+      ScopeKind = Kind::Try;
+      tryOp = op;
+    }
+
+    // ---
+    // Goto handling
+    // ---
+
+    // Lazy create cleanup block or return what's available.
+    mlir::Block *getOrCreateCleanupBlock(mlir::OpBuilder &builder) {
+      if (CleanupBlock)
+        return getCleanupBlock(builder);
+      return createCleanupBlock(builder);
+    }
+
+    mlir::Block *getCleanupBlock(mlir::OpBuilder &builder) {
+      return CleanupBlock;
+    }
+    mlir::Block *createCleanupBlock(mlir::OpBuilder &builder) {
+      {
+        // Create the cleanup block but dont hook it up around just yet.
+        mlir::OpBuilder::InsertionGuard guard(builder);
+        CleanupBlock = builder.createBlock(builder.getBlock()->getParent());
+      }
+      assert(builder.getInsertionBlock() && "Should be valid");
+      return CleanupBlock;
+    }
+
+    // ---
+    // Return handling
+    // ---
+
+  private:
+    // On switches we need one return block per region, since cases don't
+    // have their own scopes but are distinct regions nonetheless.
+    llvm::SmallVector<mlir::Block *> RetBlocks;
+    llvm::SmallVector<std::optional<mlir::Location>> RetLocs;
+    llvm::SmallVector<std::unique_ptr<mlir::Region>> SwitchRegions;
+
+    // There's usually only one ret block per scope, but this needs to be
+    // get or create because of potential unreachable return statements, note
+    // that for those, all source location maps to the first one found.
+    mlir::Block *createRetBlock(CIRGenFunction &CGF, mlir::Location loc) {
+      assert((isSwitch() || RetBlocks.size() == 0) &&
+             "only switches can hold more than one ret block");
+
+      // Create the cleanup block but dont hook it up around just yet.
+      mlir::OpBuilder::InsertionGuard guard(CGF.builder);
+      auto *b = CGF.builder.createBlock(CGF.builder.getBlock()->getParent());
+      RetBlocks.push_back(b);
+      RetLocs.push_back(loc);
+      return b;
+    }
+
+    mlir::cir::ReturnOp buildReturn(mlir::Location loc);
+    void buildImplicitReturn();
+
+  public:
+    llvm::ArrayRef<mlir::Block *> getRetBlocks() { return RetBlocks; }
+    llvm::ArrayRef<std::optional<mlir::Location>> getRetLocs() {
+      return RetLocs;
+    }
+    llvm::MutableArrayRef<std::unique_ptr<mlir::Region>> getSwitchRegions() {
+      assert(isSwitch() && "expected switch scope");
+      return SwitchRegions;
+    }
+
+    mlir::Region *createSwitchRegion() {
+      assert(isSwitch() && "expected switch scope");
+      SwitchRegions.push_back(std::make_unique<mlir::Region>());
+      return SwitchRegions.back().get();
+    }
+
+    mlir::Block *getOrCreateRetBlock(CIRGenFunction &CGF, mlir::Location loc) {
+      unsigned int regionIdx = 0;
+      if (isSwitch())
+        regionIdx = SwitchRegions.size() - 1;
+      if (regionIdx >= RetBlocks.size())
+        return createRetBlock(CGF, loc);
+      return &*RetBlocks.back();
+    }
+
+    // Scope entry block tracking
+    mlir::Block *getEntryBlock() { return EntryBlock; }
+
+    mlir::Location BeginLoc, EndLoc;
+  };
+
+  LexicalScope *currLexScope = nullptr;
+
+  /// CIR build helpers
+  /// -----------------
+
+  /// This creates an alloca and inserts it into the entry block if \p ArraySize
+  /// is nullptr,
+  ///
+  /// TODO(cir): ... otherwise inserts it at the current insertion point of
+  ///            the builder.
+  /// The caller is responsible for setting an appropriate alignment on
+  /// the alloca.
+  ///
+  /// \p ArraySize is the number of array elements to be allocated if it
+  ///    is not nullptr.
+  ///
+  /// LangAS::Default is the address space of pointers to local variables and
+  /// temporaries, as exposed in the source language. In certain
+  /// configurations, this is not the same as the alloca address space, and a
+  /// cast is needed to lift the pointer from the alloca AS into
+  /// LangAS::Default. This can happen when the target uses a restricted
+  /// address space for the stack but the source language requires
+  /// LangAS::Default to be a generic address space. The latter condition is
+  /// common for most programming languages; OpenCL is an exception in that
+  /// LangAS::Default is the private address space, which naturally maps
+  /// to the stack.
+  ///
+  /// Because the address of a temporary is often exposed to the program in
+  /// various ways, this function will perform the cast. The original alloca
+  /// instruction is returned through \p Alloca if it is not nullptr.
+  ///
+  /// The cast is not performaed in CreateTempAllocaWithoutCast. This is
+  /// more efficient if the caller knows that the address will not be exposed.
+  mlir::cir::AllocaOp CreateTempAlloca(mlir::Type Ty, mlir::Location Loc,
+                                       const Twine &Name = "tmp",
+                                       mlir::Value ArraySize = nullptr,
+                                       bool insertIntoFnEntryBlock = false);
+  mlir::cir::AllocaOp
+  CreateTempAllocaInFnEntryBlock(mlir::Type Ty, mlir::Location Loc,
+                                 const Twine &Name = "tmp",
+                                 mlir::Value ArraySize = nullptr);
+  mlir::cir::AllocaOp CreateTempAlloca(mlir::Type Ty, mlir::Location Loc,
+                                       const Twine &Name = "tmp",
+                                       mlir::OpBuilder::InsertPoint ip = {},
+                                       mlir::Value ArraySize = nullptr);
+  Address CreateTempAlloca(mlir::Type Ty, CharUnits align, mlir::Location Loc,
+                           const Twine &Name = "tmp",
+                           mlir::Value ArraySize = nullptr,
+                           Address *Alloca = nullptr,
+                           mlir::OpBuilder::InsertPoint ip = {});
+  Address CreateTempAllocaWithoutCast(mlir::Type Ty, CharUnits align,
+                                      mlir::Location Loc,
+                                      const Twine &Name = "tmp",
+                                      mlir::Value ArraySize = nullptr,
+                                      mlir::OpBuilder::InsertPoint ip = {});
+
+  /// Create a temporary memory object of the given type, with
+  /// appropriate alignmen and cast it to the default address space. Returns
+  /// the original alloca instruction by \p Alloca if it is not nullptr.
+  Address CreateMemTemp(QualType T, mlir::Location Loc,
+                        const Twine &Name = "tmp", Address *Alloca = nullptr,
+                        mlir::OpBuilder::InsertPoint ip = {});
+  Address CreateMemTemp(QualType T, CharUnits Align, mlir::Location Loc,
+                        const Twine &Name = "tmp", Address *Alloca = nullptr,
+                        mlir::OpBuilder::InsertPoint ip = {});
+
+  /// Create a temporary memory object of the given type, with
+  /// appropriate alignment without casting it to the default address space.
+  Address CreateMemTempWithoutCast(QualType T, mlir::Location Loc,
+                                   const Twine &Name = "tmp");
+  Address CreateMemTempWithoutCast(QualType T, CharUnits Align,
+                                   mlir::Location Loc,
+                                   const Twine &Name = "tmp");
+
+  /// Create a temporary memory object for the given
+  /// aggregate type.
+  AggValueSlot CreateAggTemp(QualType T, mlir::Location Loc,
+                             const Twine &Name = "tmp",
+                             Address *Alloca = nullptr) {
+    return AggValueSlot::forAddr(
+        CreateMemTemp(T, Loc, Name, Alloca), T.getQualifiers(),
+        AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers,
+        AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap);
+  }
+
+private:
+  QualType getVarArgType(const Expr *Arg);
+};
+
+/// Helper class with most of the code for saving a value for a
+/// conditional expression cleanup.
+struct DominatingCIRValue {
+  typedef llvm::PointerIntPair<mlir::Value, 1, bool> saved_type;
+
+  /// Answer whether the given value needs extra work to be saved.
+  static bool needsSaving(mlir::Value value) {
+    if (!value)
+      return false;
+
+    // If it's a block argument, we don't need to save.
+    mlir::Operation *definingOp = value.getDefiningOp();
+    if (!definingOp)
+      return false;
+
+    // If value is defined the function or a global init entry block, we don't
+    // need to save.
+    mlir::Block *currBlock = definingOp->getBlock();
+    if (!currBlock->isEntryBlock() || !definingOp->getParentOp())
+      return false;
+
+    if (auto fnOp = definingOp->getParentOfType<mlir::cir::FuncOp>()) {
+      if (&fnOp.getBody().front() == currBlock)
+        return true;
+      return false;
+    }
+
+    if (auto globalOp = definingOp->getParentOfType<mlir::cir::GlobalOp>()) {
+      assert(globalOp.getNumRegions() == 2 && "other regions NYI");
+      if (&globalOp.getCtorRegion().front() == currBlock)
+        return true;
+      if (&globalOp.getDtorRegion().front() == currBlock)
+        return true;
+      return false;
+    }
+
+    return false;
+  }
+
+  static saved_type save(CIRGenFunction &CGF, mlir::Value value);
+  static mlir::Value restore(CIRGenFunction &CGF, saved_type value);
+};
+
+inline DominatingCIRValue::saved_type
+DominatingCIRValue::save(CIRGenFunction &CGF, mlir::Value value) {
+  if (!needsSaving(value))
+    return saved_type(value, false);
+
+  // Otherwise, we need an alloca.
+  auto align = CharUnits::fromQuantity(
+      CGF.CGM.getDataLayout().getPrefTypeAlign(value.getType()));
+  mlir::Location loc = value.getLoc();
+  Address alloca =
+      CGF.CreateTempAlloca(value.getType(), align, loc, "cond-cleanup.save");
+  CGF.getBuilder().createStore(loc, value, alloca);
+
+  return saved_type(alloca.emitRawPointer(), true);
+}
+
+inline mlir::Value DominatingCIRValue::restore(CIRGenFunction &CGF,
+                                               saved_type value) {
+  llvm_unreachable("NYI");
+}
+
+/// A specialization of DominatingValue for RValue.
+template <> struct DominatingValue<RValue> {
+  typedef RValue type;
+  class saved_type {
+    enum Kind {
+      ScalarLiteral,
+      ScalarAddress,
+      AggregateLiteral,
+      AggregateAddress,
+      ComplexAddress
+    };
+    union {
+      struct {
+        DominatingCIRValue::saved_type first, second;
+      } Vals;
+      DominatingValue<Address>::saved_type AggregateAddr;
+    };
+    LLVM_PREFERRED_TYPE(Kind)
+    unsigned K : 3;
+
+    saved_type(DominatingCIRValue::saved_type Val1, unsigned K)
+        : Vals{Val1, DominatingCIRValue::saved_type()}, K(K) {}
+
+    saved_type(DominatingCIRValue::saved_type Val1,
+               DominatingCIRValue::saved_type Val2)
+        : Vals{Val1, Val2}, K(ComplexAddress) {}
+
+    saved_type(DominatingValue<Address>::saved_type AggregateAddr, unsigned K)
+        : AggregateAddr(AggregateAddr), K(K) {}
+
+  public:
+    static bool needsSaving(RValue value);
+    static saved_type save(CIRGenFunction &CGF, RValue value);
+    RValue restore(CIRGenFunction &CGF);
+  };
+
+  static bool needsSaving(type value) { return saved_type::needsSaving(value); }
+  static saved_type save(CIRGenFunction &CGF, type value) {
+    return saved_type::save(CGF, value);
+  }
+  static type restore(CIRGenFunction &CGF, saved_type value) {
+    return value.restore(CGF);
+  }
+};
+
+} // namespace cir
+
+#endif // LLVM_CLANG_LIB_CIR_CIRGENFUNCTION_H
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunctionInfo.h b/clang/lib/CIR/CodeGen/CIRGenFunctionInfo.h
new file mode 100644
index 000000000000..a07f62fe28d7
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenFunctionInfo.h
@@ -0,0 +1,286 @@
+//==-- CIRGenFunctionInfo.h - Representation of fn argument/return types ---==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines CIRGenFunctionInfo and associated types used in representing the
+// CIR source types and ABI-coerced types for function arguments and
+// return values.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_CIR_CIRGENFUNCTIONINFO_H
+#define LLVM_CLANG_CIR_CIRGENFUNCTIONINFO_H
+
+#include "clang/AST/CanonicalType.h"
+#include "clang/CIR/ABIArgInfo.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/TrailingObjects.h"
+
+namespace cir {
+
+struct CIRGenFunctionInfoArgInfo {
+  clang::CanQualType type;
+  ABIArgInfo info;
+};
+
+/// A class for recording the number of arguments that a function signature
+/// requires.
+class RequiredArgs {
+  /// The number of required arguments, or ~0 if the signature does not permit
+  /// optional arguments.
+  unsigned NumRequired;
+
+public:
+  enum All_t { All };
+
+  RequiredArgs(All_t _) : NumRequired(~0U) {}
+  explicit RequiredArgs(unsigned n) : NumRequired(n) { assert(n != ~0U); }
+
+  unsigned getOpaqueData() const { return NumRequired; }
+
+  bool allowsOptionalArgs() const { return NumRequired != ~0U; }
+
+  /// Compute the arguments required by the given formal prototype, given that
+  /// there may be some additional, non-formal arguments in play.
+  ///
+  /// If FD is not null, this will consider pass_object_size params in FD.
+  static RequiredArgs
+  forPrototypePlus(const clang::FunctionProtoType *prototype,
+                   unsigned additional) {
+    if (!prototype->isVariadic())
+      return All;
+
+    if (prototype->hasExtParameterInfos())
+      additional += llvm::count_if(
+          prototype->getExtParameterInfos(),
+          [](const clang::FunctionProtoType::ExtParameterInfo &ExtInfo) {
+            return ExtInfo.hasPassObjectSize();
+          });
+
+    return RequiredArgs(prototype->getNumParams() + additional);
+  }
+
+  static RequiredArgs
+  forPrototypePlus(clang::CanQual<clang::FunctionProtoType> prototype,
+                   unsigned additional) {
+    return forPrototypePlus(prototype.getTypePtr(), additional);
+  }
+
+  unsigned getNumRequiredArgs() const {
+    assert(allowsOptionalArgs());
+    return NumRequired;
+  }
+};
+
+class CIRGenFunctionInfo final
+    : public llvm::FoldingSetNode,
+      private llvm::TrailingObjects<
+          CIRGenFunctionInfo, CIRGenFunctionInfoArgInfo,
+          clang::FunctionProtoType::ExtParameterInfo> {
+
+  typedef CIRGenFunctionInfoArgInfo ArgInfo;
+  typedef clang::FunctionProtoType::ExtParameterInfo ExtParameterInfo;
+
+  /// The cir::CallingConv to use for this function (as specified by the user).
+  mlir::cir::CallingConv CallingConvention : 8;
+
+  /// The cir::CallingConv to actually use for this function, which may depend
+  /// on the ABI.
+  mlir::cir::CallingConv EffectiveCallingConvention : 8;
+
+  /// The clang::CallingConv that this was originally created with.
+  unsigned ASTCallingConvention : 6;
+
+  /// Whether this is an instance method.
+  unsigned InstanceMethod : 1;
+
+  /// Whether this is a chain call.
+  unsigned ChainCall : 1;
+
+  /// Whether this function is a CMSE nonsecure call
+  unsigned CmseNSCall : 1;
+
+  /// Whether this function is noreturn.
+  unsigned NoReturn : 1;
+
+  /// Whether this function is returns-retained.
+  unsigned ReturnsRetained : 1;
+
+  /// Whether this function saved caller registers.
+  unsigned NoCallerSavedRegs : 1;
+
+  /// How many arguments to pass inreg.
+  unsigned HasRegParm : 1;
+  unsigned RegParm : 3;
+
+  /// Whether this function has nocf_check attribute.
+  unsigned NoCfCheck : 1;
+
+  RequiredArgs Required;
+
+  /// The struct representing all arguments passed in memory. Only used when
+  /// passing non-trivial types with inalloca. Not part of the profile.
+  /// TODO: think about modeling this properly, this is just a dumb subsitution
+  /// for now since we arent supporting anything other than arguments in
+  /// registers atm
+  mlir::cir::StructType *ArgStruct;
+  unsigned ArgStructAlign : 31;
+  unsigned HasExtParameterInfos : 1;
+
+  unsigned NumArgs;
+
+  ArgInfo *getArgsBuffer() { return getTrailingObjects<ArgInfo>(); }
+
+  const ArgInfo *getArgsBuffer() const { return getTrailingObjects<ArgInfo>(); }
+
+  ExtParameterInfo *getExtParameterInfosBuffer() {
+    return getTrailingObjects<ExtParameterInfo>();
+  }
+
+  const ExtParameterInfo *getExtParameterInfosBuffer() const {
+    return getTrailingObjects<ExtParameterInfo>();
+  }
+
+  CIRGenFunctionInfo() : Required(RequiredArgs::All) {}
+
+public:
+  static CIRGenFunctionInfo *create(mlir::cir::CallingConv cirCC, bool instanceMethod,
+                                    bool chainCall,
+                                    const clang::FunctionType::ExtInfo &extInfo,
+                                    llvm::ArrayRef<ExtParameterInfo> paramInfos,
+                                    clang::CanQualType resultType,
+                                    llvm::ArrayRef<clang::CanQualType> argTypes,
+                                    RequiredArgs required);
+  void operator delete(void *p) { ::operator delete(p); }
+
+  // Friending class TrailingObjects is apparantly not good enough for MSVC, so
+  // these have to be public.
+  friend class TrailingObjects;
+  size_t numTrailingObjects(OverloadToken<ArgInfo>) const {
+    return NumArgs + 1;
+  }
+  size_t numTrailingObjects(OverloadToken<ExtParameterInfo>) const {
+    return (HasExtParameterInfos ? NumArgs : 0);
+  }
+
+  using const_arg_iterator = const ArgInfo *;
+  using arg_iterator = ArgInfo *;
+
+  static void Profile(llvm::FoldingSetNodeID &ID, bool InstanceMethod,
+                      bool ChainCall, const clang::FunctionType::ExtInfo &info,
+                      llvm::ArrayRef<ExtParameterInfo> paramInfos,
+                      RequiredArgs required, clang::CanQualType resultType,
+                      llvm::ArrayRef<clang::CanQualType> argTypes) {
+    ID.AddInteger(info.getCC());
+    ID.AddBoolean(InstanceMethod);
+    ID.AddBoolean(info.getNoReturn());
+    ID.AddBoolean(info.getProducesResult());
+    ID.AddBoolean(info.getNoCallerSavedRegs());
+    ID.AddBoolean(info.getHasRegParm());
+    ID.AddBoolean(info.getRegParm());
+    ID.AddBoolean(info.getNoCfCheck());
+    ID.AddBoolean(info.getCmseNSCall());
+    ID.AddBoolean(required.getOpaqueData());
+    ID.AddBoolean(!paramInfos.empty());
+    if (!paramInfos.empty()) {
+      for (auto paramInfo : paramInfos)
+        ID.AddInteger(paramInfo.getOpaqueValue());
+    }
+    resultType.Profile(ID);
+    for (auto i : argTypes)
+      i.Profile(ID);
+  }
+
+  /// getASTCallingConvention() - Return the AST-specified calling convention
+  clang::CallingConv getASTCallingConvention() const {
+    return clang::CallingConv(ASTCallingConvention);
+  }
+
+  void Profile(llvm::FoldingSetNodeID &ID) {
+    ID.AddInteger(getASTCallingConvention());
+    ID.AddBoolean(InstanceMethod);
+    ID.AddBoolean(ChainCall);
+    ID.AddBoolean(NoReturn);
+    ID.AddBoolean(ReturnsRetained);
+    ID.AddBoolean(NoCallerSavedRegs);
+    ID.AddBoolean(HasRegParm);
+    ID.AddBoolean(RegParm);
+    ID.AddBoolean(NoCfCheck);
+    ID.AddBoolean(CmseNSCall);
+    ID.AddInteger(Required.getOpaqueData());
+    ID.AddBoolean(HasExtParameterInfos);
+    if (HasExtParameterInfos) {
+      for (auto paramInfo : getExtParameterInfos())
+        ID.AddInteger(paramInfo.getOpaqueValue());
+    }
+    getReturnType().Profile(ID);
+    for (const auto &I : arguments())
+      I.type.Profile(ID);
+  }
+
+  llvm::MutableArrayRef<ArgInfo> arguments() {
+    return llvm::MutableArrayRef<ArgInfo>(arg_begin(), NumArgs);
+  }
+  llvm::ArrayRef<ArgInfo> arguments() const {
+    return llvm::ArrayRef<ArgInfo>(arg_begin(), NumArgs);
+  }
+
+  const_arg_iterator arg_begin() const { return getArgsBuffer() + 1; }
+  const_arg_iterator arg_end() const { return getArgsBuffer() + 1 + NumArgs; }
+  arg_iterator arg_begin() { return getArgsBuffer() + 1; }
+  arg_iterator arg_end() { return getArgsBuffer() + 1 + NumArgs; }
+
+  unsigned arg_size() const { return NumArgs; }
+
+  llvm::ArrayRef<ExtParameterInfo> getExtParameterInfos() const {
+    if (!HasExtParameterInfos)
+      return {};
+    return llvm::ArrayRef(getExtParameterInfosBuffer(), NumArgs);
+  }
+  ExtParameterInfo getExtParameterInfo(unsigned argIndex) const {
+    assert(argIndex <= NumArgs);
+    if (!HasExtParameterInfos)
+      return ExtParameterInfo();
+    return getExtParameterInfos()[argIndex];
+  }
+
+  /// getCallingConvention - Return the user specified calling convention, which
+  /// has been translated into a CIR CC.
+  mlir::cir::CallingConv getCallingConvention() const {
+    return CallingConvention;
+  }
+
+  /// getEffectiveCallingConvention - Return the actual calling convention to
+  /// use, which may depend on the ABI.
+  mlir::cir::CallingConv getEffectiveCallingConvention() const {
+    return EffectiveCallingConvention;
+  }
+
+  clang::CanQualType getReturnType() const { return getArgsBuffer()[0].type; }
+
+  ABIArgInfo &getReturnInfo() { return getArgsBuffer()[0].info; }
+  const ABIArgInfo &getReturnInfo() const { return getArgsBuffer()[0].info; }
+
+  bool isChainCall() const { return ChainCall; }
+
+  bool isVariadic() const { return Required.allowsOptionalArgs(); }
+  RequiredArgs getRequiredArgs() const { return Required; }
+  unsigned getNumRequiredArgs() const {
+    return isVariadic() ? getRequiredArgs().getNumRequiredArgs() : arg_size();
+  }
+
+  mlir::cir::StructType *getArgStruct() const { return ArgStruct; }
+
+  /// Return true if this function uses inalloca arguments.
+  bool usesInAlloca() const { return ArgStruct; }
+};
+
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
new file mode 100644
index 000000000000..0c1279beea19
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
@@ -0,0 +1,2542 @@
+//===----- CIRGenItaniumCXXABI.cpp - Emit CIR from ASTs for a Module ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides C++ code generation targeting the Itanium C++ ABI.  The class
+// in this file generates structures that follow the Itanium C++ ABI, which is
+// documented at:
+//  https://itanium-cxx-abi.github.io/cxx-abi/abi.html
+//  https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html
+//
+// It also supports the closely-related ARM ABI, documented at:
+// https://developer.arm.com/documentation/ihi0041/g/
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenCXXABI.h"
+#include "CIRGenCleanup.h"
+#include "CIRGenFunctionInfo.h"
+#include "ConstantInitBuilder.h"
+
+#include "clang/AST/GlobalDecl.h"
+#include "clang/AST/Mangle.h"
+#include "clang/AST/VTableBuilder.h"
+#include "clang/Basic/Linkage.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace cir;
+using namespace clang;
+
+namespace {
+class CIRGenItaniumCXXABI : public cir::CIRGenCXXABI {
+  /// All the vtables which have been defined.
+  llvm::DenseMap<const CXXRecordDecl *, mlir::cir::GlobalOp> VTables;
+
+protected:
+  bool UseARMMethodPtrABI;
+  bool UseARMGuardVarABI;
+  bool Use32BitVTableOffsetABI;
+
+  ItaniumMangleContext &getMangleContext() {
+    return cast<ItaniumMangleContext>(cir::CIRGenCXXABI::getMangleContext());
+  }
+
+  bool isVTableHidden(const CXXRecordDecl *RD) const {
+    const auto &VtableLayout =
+        CGM.getItaniumVTableContext().getVTableLayout(RD);
+
+    for (const auto &VtableComponent : VtableLayout.vtable_components()) {
+      if (VtableComponent.isRTTIKind()) {
+        const CXXRecordDecl *RTTIDecl = VtableComponent.getRTTIDecl();
+        if (RTTIDecl->getVisibility() == Visibility::HiddenVisibility)
+          return true;
+      } else if (VtableComponent.isUsedFunctionPointerKind()) {
+        const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
+        if (Method->getVisibility() == Visibility::HiddenVisibility &&
+            !Method->isDefined())
+          return true;
+      }
+    }
+    return false;
+  }
+
+  bool hasAnyUnusedVirtualInlineFunction(const CXXRecordDecl *RD) const {
+    const auto &VtableLayout =
+        CGM.getItaniumVTableContext().getVTableLayout(RD);
+
+    for (const auto &VtableComponent : VtableLayout.vtable_components()) {
+      // Skip empty slot.
+      if (!VtableComponent.isUsedFunctionPointerKind())
+        continue;
+
+      const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
+      if (!Method->getCanonicalDecl()->isInlined())
+        continue;
+
+      StringRef Name = CGM.getMangledName(VtableComponent.getGlobalDecl());
+      auto *op = CGM.getGlobalValue(Name);
+      if (auto globalOp = dyn_cast_or_null<mlir::cir::GlobalOp>(op))
+        llvm_unreachable("NYI");
+
+      if (auto funcOp = dyn_cast_or_null<mlir::cir::FuncOp>(op)) {
+        // This checks if virtual inline function has already been emitted.
+        // Note that it is possible that this inline function would be emitted
+        // after trying to emit vtable speculatively. Because of this we do
+        // an extra pass after emitting all deferred vtables to find and emit
+        // these vtables opportunistically.
+        if (!funcOp || funcOp.isDeclaration())
+          return true;
+      }
+    }
+    return false;
+  }
+
+public:
+  CIRGenItaniumCXXABI(CIRGenModule &CGM, bool UseARMMethodPtrABI = false,
+                      bool UseARMGuardVarABI = false)
+      : CIRGenCXXABI(CGM), UseARMMethodPtrABI{UseARMMethodPtrABI},
+        UseARMGuardVarABI{UseARMGuardVarABI}, Use32BitVTableOffsetABI{false} {
+    assert(!UseARMMethodPtrABI && "NYI");
+    assert(!UseARMGuardVarABI && "NYI");
+  }
+  AddedStructorArgs getImplicitConstructorArgs(CIRGenFunction &CGF,
+                                               const CXXConstructorDecl *D,
+                                               CXXCtorType Type,
+                                               bool ForVirtualBase,
+                                               bool Delegating) override;
+
+  bool NeedsVTTParameter(GlobalDecl GD) override;
+
+  RecordArgABI getRecordArgABI(const clang::CXXRecordDecl *RD) const override {
+    // If C++ prohibits us from making a copy, pass by address.
+    if (!RD->canPassInRegisters())
+      return RecordArgABI::Indirect;
+    else
+      return RecordArgABI::Default;
+  }
+
+  bool classifyReturnType(CIRGenFunctionInfo &FI) const override;
+
+  AddedStructorArgCounts
+  buildStructorSignature(GlobalDecl GD,
+                         llvm::SmallVectorImpl<CanQualType> &ArgTys) override;
+
+  bool isThisCompleteObject(GlobalDecl GD) const override {
+    // The Itanium ABI has separate complete-object vs. base-object variants of
+    // both constructors and destructors.
+    if (isa<CXXDestructorDecl>(GD.getDecl())) {
+      llvm_unreachable("NYI");
+    }
+    if (isa<CXXConstructorDecl>(GD.getDecl())) {
+      switch (GD.getCtorType()) {
+      case Ctor_Complete:
+        return true;
+
+      case Ctor_Base:
+        return false;
+
+      case Ctor_CopyingClosure:
+      case Ctor_DefaultClosure:
+        llvm_unreachable("closure ctors in Itanium ABI?");
+
+      case Ctor_Comdat:
+        llvm_unreachable("emitting ctor comdat as function?");
+      }
+      llvm_unreachable("bad dtor kind");
+    }
+
+    // No other kinds.
+    return false;
+  }
+
+  void buildInstanceFunctionProlog(CIRGenFunction &CGF) override;
+
+  void addImplicitStructorParams(CIRGenFunction &CGF, QualType &ResTy,
+                                 FunctionArgList &Params) override;
+
+  mlir::Value getCXXDestructorImplicitParam(CIRGenFunction &CGF,
+                                            const CXXDestructorDecl *DD,
+                                            CXXDtorType Type,
+                                            bool ForVirtualBase,
+                                            bool Delegating) override;
+  void buildCXXConstructors(const clang::CXXConstructorDecl *D) override;
+  void buildCXXDestructors(const clang::CXXDestructorDecl *D) override;
+  void buildCXXStructor(clang::GlobalDecl GD) override;
+  void buildDestructorCall(CIRGenFunction &CGF, const CXXDestructorDecl *DD,
+                           CXXDtorType Type, bool ForVirtualBase,
+                           bool Delegating, Address This,
+                           QualType ThisTy) override;
+  void registerGlobalDtor(CIRGenFunction &CGF, const VarDecl *D,
+                          mlir::cir::FuncOp dtor,
+                          mlir::Attribute Addr) override;
+  virtual void buildRethrow(CIRGenFunction &CGF, bool isNoReturn) override;
+  virtual void buildThrow(CIRGenFunction &CGF, const CXXThrowExpr *E) override;
+  CatchTypeInfo
+  getAddrOfCXXCatchHandlerType(mlir::Location loc, QualType Ty,
+                               QualType CatchHandlerType) override {
+    auto rtti =
+        dyn_cast<mlir::cir::GlobalViewAttr>(getAddrOfRTTIDescriptor(loc, Ty));
+    assert(rtti && "expected GlobalViewAttr");
+    return CatchTypeInfo{rtti, 0};
+  }
+
+  void emitBeginCatch(CIRGenFunction &CGF, const CXXCatchStmt *C) override;
+
+  bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const override;
+  mlir::cir::GlobalOp getAddrOfVTable(const CXXRecordDecl *RD,
+                                      CharUnits VPtrOffset) override;
+  CIRGenCallee getVirtualFunctionPointer(CIRGenFunction &CGF, GlobalDecl GD,
+                                         Address This, mlir::Type Ty,
+                                         SourceLocation Loc) override;
+  mlir::Value getVTableAddressPoint(BaseSubobject Base,
+                                    const CXXRecordDecl *VTableClass) override;
+  bool isVirtualOffsetNeededForVTableField(CIRGenFunction &CGF,
+                                           CIRGenFunction::VPtr Vptr) override;
+  bool canSpeculativelyEmitVTableAsBaseClass(const CXXRecordDecl *RD) const;
+  mlir::Value getVTableAddressPointInStructor(
+      CIRGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
+      const CXXRecordDecl *NearestVBase) override;
+  void emitVTableDefinitions(CIRGenVTables &CGVT,
+                             const CXXRecordDecl *RD) override;
+  void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
+  mlir::Attribute getAddrOfRTTIDescriptor(mlir::Location loc,
+                                          QualType Ty) override;
+  bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
+                              CXXDtorType DT) const override {
+    // Itanium does not emit any destructor variant as an inline thunk.
+    // Delegating may occur as an optimization, but all variants are either
+    // emitted with external linkage or as linkonce if they are inline and used.
+    return false;
+  }
+
+  StringRef getPureVirtualCallName() override { return "__cxa_pure_virtual"; }
+  StringRef getDeletedVirtualCallName() override {
+    return "__cxa_deleted_virtual";
+  }
+
+  /// TODO(cir): seems like could be shared between LLVM IR and CIR codegen.
+  bool mayNeedDestruction(const VarDecl *VD) const {
+    if (VD->needsDestruction(getContext()))
+      return true;
+
+    // If the variable has an incomplete class type (or array thereof), it
+    // might need destruction.
+    const Type *T = VD->getType()->getBaseElementTypeUnsafe();
+    if (T->getAs<RecordType>() && T->isIncompleteType())
+      return true;
+
+    return false;
+  }
+
+  /// Determine whether we will definitely emit this variable with a constant
+  /// initializer, either because the language semantics demand it or because
+  /// we know that the initializer is a constant.
+  /// For weak definitions, any initializer available in the current translation
+  /// is not necessarily reflective of the initializer used; such initializers
+  /// are ignored unless if InspectInitForWeakDef is true.
+  /// TODO(cir): seems like could be shared between LLVM IR and CIR codegen.
+  bool
+  isEmittedWithConstantInitializer(const VarDecl *VD,
+                                   bool InspectInitForWeakDef = false) const {
+    VD = VD->getMostRecentDecl();
+    if (VD->hasAttr<ConstInitAttr>())
+      return true;
+
+    // All later checks examine the initializer specified on the variable. If
+    // the variable is weak, such examination would not be correct.
+    if (!InspectInitForWeakDef &&
+        (VD->isWeak() || VD->hasAttr<SelectAnyAttr>()))
+      return false;
+
+    const VarDecl *InitDecl = VD->getInitializingDeclaration();
+    if (!InitDecl)
+      return false;
+
+    // If there's no initializer to run, this is constant initialization.
+    if (!InitDecl->hasInit())
+      return true;
+
+    // If we have the only definition, we don't need a thread wrapper if we
+    // will emit the value as a constant.
+    if (isUniqueGVALinkage(getContext().GetGVALinkageForVariable(VD)))
+      return !mayNeedDestruction(VD) && InitDecl->evaluateValue();
+
+    // Otherwise, we need a thread wrapper unless we know that every
+    // translation unit will emit the value as a constant. We rely on the
+    // variable being constant-initialized in every translation unit if it's
+    // constant-initialized in any translation unit, which isn't actually
+    // guaranteed by the standard but is necessary for sanity.
+    return InitDecl->hasConstantInitialization();
+  }
+
+  // TODO(cir): seems like could be shared between LLVM IR and CIR codegen.
+  bool usesThreadWrapperFunction(const VarDecl *VD) const override {
+    return !isEmittedWithConstantInitializer(VD) || mayNeedDestruction(VD);
+  }
+
+  bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) override {
+    return true;
+  }
+
+  size_t getSrcArgforCopyCtor(const CXXConstructorDecl *,
+                              FunctionArgList &Args) const override {
+    assert(!Args.empty() && "expected the arglist to not be empty!");
+    return Args.size() - 1;
+  }
+
+  void buildBadCastCall(CIRGenFunction &CGF, mlir::Location loc) override;
+
+  // The traditional clang CodeGen emits calls to `__dynamic_cast` directly into
+  // LLVM in the `emitDynamicCastCall` function. In CIR, `dynamic_cast`
+  // expressions are lowered to `cir.dyn_cast` ops instead of calls to runtime
+  // functions. So during CIRGen we don't need the `emitDynamicCastCall`
+  // function that clang CodeGen has.
+
+  mlir::Value buildDynamicCast(CIRGenFunction &CGF, mlir::Location Loc,
+                               QualType SrcRecordTy, QualType DestRecordTy,
+                               mlir::cir::PointerType DestCIRTy, bool isRefCast,
+                               Address Src) override;
+
+  mlir::cir::MethodAttr
+  buildVirtualMethodAttr(mlir::cir::MethodType MethodTy,
+                         const CXXMethodDecl *MD) override;
+
+  /**************************** RTTI Uniqueness ******************************/
+protected:
+  /// Returns true if the ABI requires RTTI type_info objects to be unique
+  /// across a program.
+  virtual bool shouldRTTIBeUnique() const { return true; }
+
+public:
+  /// What sort of unique-RTTI behavior should we use?
+  enum RTTIUniquenessKind {
+    /// We are guaranteeing, or need to guarantee, that the RTTI string
+    /// is unique.
+    RUK_Unique,
+
+    /// We are not guaranteeing uniqueness for the RTTI string, so we
+    /// can demote to hidden visibility but must use string comparisons.
+    RUK_NonUniqueHidden,
+
+    /// We are not guaranteeing uniqueness for the RTTI string, so we
+    /// have to use string comparisons, but we also have to emit it with
+    /// non-hidden visibility.
+    RUK_NonUniqueVisible
+  };
+
+  /// Return the required visibility status for the given type and linkage in
+  /// the current ABI.
+  RTTIUniquenessKind
+  classifyRTTIUniqueness(QualType CanTy,
+                         mlir::cir::GlobalLinkageKind Linkage) const;
+  friend class CIRGenItaniumRTTIBuilder;
+};
+} // namespace
+
+CIRGenCXXABI::AddedStructorArgs CIRGenItaniumCXXABI::getImplicitConstructorArgs(
+    CIRGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
+    bool ForVirtualBase, bool Delegating) {
+  assert(!NeedsVTTParameter(GlobalDecl(D, Type)) && "VTT NYI");
+
+  return {};
+}
+
+/// Return whether the given global decl needs a VTT parameter, which it does if
+/// it's a base constructor or destructor with virtual bases.
+bool CIRGenItaniumCXXABI::NeedsVTTParameter(GlobalDecl GD) {
+  auto *MD = cast<CXXMethodDecl>(GD.getDecl());
+
+  // We don't have any virtual bases, just return early.
+  if (!MD->getParent()->getNumVBases())
+    return false;
+
+  // Check if we have a base constructor.
+  if (isa<CXXConstructorDecl>(MD) && GD.getCtorType() == Ctor_Base)
+    return true;
+
+  // Check if we have a base destructor.
+  if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base)
+    llvm_unreachable("NYI");
+
+  return false;
+}
+
+CIRGenCXXABI *cir::CreateCIRGenItaniumCXXABI(CIRGenModule &CGM) {
+  switch (CGM.getASTContext().getCXXABIKind()) {
+  case TargetCXXABI::GenericItanium:
+    assert(CGM.getASTContext().getTargetInfo().getTriple().getArch() !=
+               llvm::Triple::le32 &&
+           "le32 NYI");
+    LLVM_FALLTHROUGH;
+  case TargetCXXABI::GenericAArch64:
+  case TargetCXXABI::AppleARM64:
+    // TODO: this isn't quite right, clang uses AppleARM64CXXABI which inherits
+    // from ARMCXXABI. We'll have to follow suit.
+    assert(!MissingFeatures::appleArm64CXXABI());
+    return new CIRGenItaniumCXXABI(CGM);
+
+  default:
+    llvm_unreachable("bad or NYI ABI kind");
+  }
+}
+
+bool CIRGenItaniumCXXABI::classifyReturnType(CIRGenFunctionInfo &FI) const {
+  auto *RD = FI.getReturnType()->getAsCXXRecordDecl();
+  assert(!RD && "RecordDecl return types NYI");
+  return false;
+}
+
+CIRGenCXXABI::AddedStructorArgCounts
+CIRGenItaniumCXXABI::buildStructorSignature(
+    GlobalDecl GD, llvm::SmallVectorImpl<CanQualType> &ArgTys) {
+  auto &Context = getContext();
+
+  // All parameters are already in place except VTT, which goes after 'this'.
+  // These are clang types, so we don't need to worry about sret yet.
+
+  // Check if we need to add a VTT parameter (which has type void **).
+  if ((isa<CXXConstructorDecl>(GD.getDecl()) ? GD.getCtorType() == Ctor_Base
+                                             : GD.getDtorType() == Dtor_Base) &&
+      cast<CXXMethodDecl>(GD.getDecl())->getParent()->getNumVBases() != 0) {
+    llvm_unreachable("NYI");
+    (void)Context;
+  }
+
+  return AddedStructorArgCounts{};
+}
+
+// Find out how to cirgen the complete destructor and constructor
+namespace {
+enum class StructorCIRGen { Emit, RAUW, Alias, COMDAT };
+}
+
+static StructorCIRGen getCIRGenToUse(CIRGenModule &CGM,
+                                     const CXXMethodDecl *MD) {
+  if (!CGM.getCodeGenOpts().CXXCtorDtorAliases)
+    return StructorCIRGen::Emit;
+
+  // The complete and base structors are not equivalent if there are any virtual
+  // bases, so emit separate functions.
+  if (MD->getParent()->getNumVBases())
+    return StructorCIRGen::Emit;
+
+  GlobalDecl AliasDecl;
+  if (const auto *DD = dyn_cast<CXXDestructorDecl>(MD)) {
+    AliasDecl = GlobalDecl(DD, Dtor_Complete);
+  } else {
+    const auto *CD = cast<CXXConstructorDecl>(MD);
+    AliasDecl = GlobalDecl(CD, Ctor_Complete);
+  }
+  auto Linkage = CGM.getFunctionLinkage(AliasDecl);
+  (void)Linkage;
+
+  if (mlir::cir::isDiscardableIfUnused(Linkage))
+    return StructorCIRGen::RAUW;
+
+  // FIXME: Should we allow available_externally aliases?
+  if (!mlir::cir::isValidLinkage(Linkage))
+    return StructorCIRGen::RAUW;
+
+  if (mlir::cir::isWeakForLinker(Linkage)) {
+    // Only ELF and wasm support COMDATs with arbitrary names (C5/D5).
+    if (CGM.getTarget().getTriple().isOSBinFormatELF() ||
+        CGM.getTarget().getTriple().isOSBinFormatWasm())
+      return StructorCIRGen::COMDAT;
+    return StructorCIRGen::Emit;
+  }
+
+  return StructorCIRGen::Alias;
+}
+
+static void emitConstructorDestructorAlias(CIRGenModule &CGM,
+                                           GlobalDecl AliasDecl,
+                                           GlobalDecl TargetDecl) {
+  auto Linkage = CGM.getFunctionLinkage(AliasDecl);
+
+  // Does this function alias already exists?
+  StringRef MangledName = CGM.getMangledName(AliasDecl);
+  auto globalValue = dyn_cast_or_null<mlir::cir::CIRGlobalValueInterface>(
+      CGM.getGlobalValue(MangledName));
+  if (globalValue && !globalValue.isDeclaration()) {
+    return;
+  }
+
+  auto Entry =
+      dyn_cast_or_null<mlir::cir::FuncOp>(CGM.getGlobalValue(MangledName));
+
+  // Retrieve aliasee info.
+  auto Aliasee =
+      dyn_cast_or_null<mlir::cir::FuncOp>(CGM.GetAddrOfGlobal(TargetDecl));
+  assert(Aliasee && "expected cir.func");
+
+  // Populate actual alias.
+  CGM.buildAliasForGlobal(MangledName, Entry, AliasDecl, Aliasee, Linkage);
+}
+
+void CIRGenItaniumCXXABI::buildCXXStructor(GlobalDecl GD) {
+  auto *MD = cast<CXXMethodDecl>(GD.getDecl());
+  auto *CD = dyn_cast<CXXConstructorDecl>(MD);
+  const CXXDestructorDecl *DD = CD ? nullptr : cast<CXXDestructorDecl>(MD);
+
+  StructorCIRGen CIRGenType = getCIRGenToUse(CGM, MD);
+
+  if (CD ? GD.getCtorType() == Ctor_Complete
+         : GD.getDtorType() == Dtor_Complete) {
+    GlobalDecl BaseDecl;
+    if (CD)
+      BaseDecl = GD.getWithCtorType(Ctor_Base);
+    else
+      BaseDecl = GD.getWithDtorType(Dtor_Base);
+
+    if (CIRGenType == StructorCIRGen::Alias ||
+        CIRGenType == StructorCIRGen::COMDAT) {
+      emitConstructorDestructorAlias(CGM, GD, BaseDecl);
+      return;
+    }
+
+    if (CIRGenType == StructorCIRGen::RAUW) {
+      StringRef MangledName = CGM.getMangledName(GD);
+      auto *Aliasee = CGM.GetAddrOfGlobal(BaseDecl);
+      CGM.addReplacement(MangledName, Aliasee);
+      return;
+    }
+  }
+
+  // The base destructor is equivalent to the base destructor of its base class
+  // if there is exactly one non-virtual base class with a non-trivial
+  // destructor, there are no fields with a non-trivial destructor, and the body
+  // of the destructor is trivial.
+  if (DD && GD.getDtorType() == Dtor_Base &&
+      CIRGenType != StructorCIRGen::COMDAT &&
+      !CGM.tryEmitBaseDestructorAsAlias(DD))
+    return;
+
+  // FIXME: The deleting destructor is equivalent to the selected operator
+  // delete if:
+  //  * either the delete is a destroying operator delete or the destructor
+  //    would be trivial if it weren't virtual.
+  //  * the conversion from the 'this' parameter to the first parameter of the
+  //    destructor is equivalent to a bitcast,
+  //  * the destructor does not have an implicit "this" return, and
+  //  * the operator delete has the same calling convention and CIR function
+  //    type as the destructor.
+  // In such cases we should try to emit the deleting dtor as an alias to the
+  // selected 'operator delete'.
+
+  auto Fn = CGM.codegenCXXStructor(GD);
+
+  if (CIRGenType == StructorCIRGen::COMDAT) {
+    llvm_unreachable("NYI");
+  } else {
+    CGM.maybeSetTrivialComdat(*MD, Fn);
+  }
+}
+
+void CIRGenItaniumCXXABI::addImplicitStructorParams(CIRGenFunction &CGF,
+                                                    QualType &ResTY,
+                                                    FunctionArgList &Params) {
+  const auto *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
+  assert(isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD));
+
+  // Check if we need a VTT parameter as well.
+  if (NeedsVTTParameter(CGF.CurGD)) {
+    llvm_unreachable("NYI");
+  }
+}
+
+mlir::Value CIRGenCXXABI::loadIncomingCXXThis(CIRGenFunction &CGF) {
+  return CGF.createLoad(getThisDecl(CGF), "this");
+}
+
+void CIRGenCXXABI::setCXXABIThisValue(CIRGenFunction &CGF,
+                                      mlir::Value ThisPtr) {
+  /// Initialize the 'this' slot.
+  assert(getThisDecl(CGF) && "no 'this' variable for function");
+  CGF.CXXABIThisValue = ThisPtr;
+}
+
+void CIRGenItaniumCXXABI::buildInstanceFunctionProlog(CIRGenFunction &CGF) {
+  // Naked functions have no prolog.
+  if (CGF.CurFuncDecl && CGF.CurFuncDecl->hasAttr<NakedAttr>())
+    llvm_unreachable("NYI");
+
+  /// Initialize the 'this' slot. In the Itanium C++ ABI, no prologue
+  /// adjustments are required, because they are all handled by thunks.
+  setCXXABIThisValue(CGF, loadIncomingCXXThis(CGF));
+
+  /// Initialize the 'vtt' slot if needed.
+  if (getStructorImplicitParamDecl(CGF)) {
+    llvm_unreachable("NYI");
+  }
+
+  /// If this is a function that the ABI specifies returns 'this', initialize
+  /// the return slot to this' at the start of the function.
+  ///
+  /// Unlike the setting of return types, this is done within the ABI
+  /// implementation instead of by clients of CIRGenCXXBI because:
+  /// 1) getThisValue is currently protected
+  /// 2) in theory, an ABI could implement 'this' returns some other way;
+  ///    HasThisReturn only specifies a contract, not the implementation
+  if (HasThisReturn(CGF.CurGD))
+    llvm_unreachable("NYI");
+}
+
+void CIRGenItaniumCXXABI::buildCXXConstructors(const CXXConstructorDecl *D) {
+  // Just make sure we're in sync with TargetCXXABI.
+  assert(CGM.getTarget().getCXXABI().hasConstructorVariants());
+
+  // The constructor used for constructing this as a base class;
+  // ignores virtual bases.
+  CGM.buildGlobal(GlobalDecl(D, Ctor_Base));
+
+  // The constructor used for constructing this as a complete class;
+  // constructs the virtual bases, then calls the base constructor.
+  if (!D->getParent()->isAbstract()) {
+    // We don't need to emit the complete ctro if the class is abstract.
+    CGM.buildGlobal(GlobalDecl(D, Ctor_Complete));
+  }
+}
+
+void CIRGenItaniumCXXABI::buildCXXDestructors(const CXXDestructorDecl *D) {
+  // The destructor used for destructing this as a base class; ignores
+  // virtual bases.
+  CGM.buildGlobal(GlobalDecl(D, Dtor_Base));
+
+  // The destructor used for destructing this as a most-derived class;
+  // call the base destructor and then destructs any virtual bases.
+  CGM.buildGlobal(GlobalDecl(D, Dtor_Complete));
+
+  // The destructor in a virtual table is always a 'deleting'
+  // destructor, which calls the complete destructor and then uses the
+  // appropriate operator delete.
+  if (D->isVirtual())
+    CGM.buildGlobal(GlobalDecl(D, Dtor_Deleting));
+}
+
+namespace {
+/// From traditional LLVM, useful info for LLVM lowering support:
+/// A cleanup to call __cxa_end_catch.  In many cases, the caught
+/// exception type lets us state definitively that the thrown exception
+/// type does not have a destructor.  In particular:
+///   - Catch-alls tell us nothing, so we have to conservatively
+///     assume that the thrown exception might have a destructor.
+///   - Catches by reference behave according to their base types.
+///   - Catches of non-record types will only trigger for exceptions
+///     of non-record types, which never have destructors.
+///   - Catches of record types can trigger for arbitrary subclasses
+///     of the caught type, so we have to assume the actual thrown
+///     exception type might have a throwing destructor, even if the
+///     caught type's destructor is trivial or nothrow.
+struct CallEndCatch final : EHScopeStack::Cleanup {
+  CallEndCatch(bool MightThrow) : MightThrow(MightThrow) {}
+  bool MightThrow;
+
+  void Emit(CIRGenFunction &CGF, Flags flags) override {
+    if (!MightThrow) {
+      // Traditional LLVM codegen would emit a call to __cxa_end_catch
+      // here. For CIR, just let it pass since the cleanup is going
+      // to be emitted on a later pass when lowering the catch region.
+      // CGF.EmitNounwindRuntimeCall(getEndCatchFn(CGF.CGM));
+      CGF.getBuilder().create<mlir::cir::YieldOp>(*CGF.currSrcLoc);
+      return;
+    }
+
+    // Traditional LLVM codegen would emit a call to __cxa_end_catch
+    // here. For CIR, just let it pass since the cleanup is going
+    // to be emitted on a later pass when lowering the catch region.
+    // CGF.EmitRuntimeCallOrTryCall(getEndCatchFn(CGF.CGM));
+    CGF.getBuilder().create<mlir::cir::YieldOp>(*CGF.currSrcLoc);
+  }
+};
+} // namespace
+
+/// From traditional LLVM codegen, useful info for LLVM lowering support:
+/// Emits a call to __cxa_begin_catch and enters a cleanup to call
+/// __cxa_end_catch. If -fassume-nothrow-exception-dtor is specified, we assume
+/// that the exception object's dtor is nothrow, therefore the __cxa_end_catch
+/// call can be marked as nounwind even if EndMightThrow is true.
+///
+/// \param EndMightThrow - true if __cxa_end_catch might throw
+static mlir::Value CallBeginCatch(CIRGenFunction &CGF, mlir::Type ParamTy,
+                                  bool EndMightThrow) {
+  auto catchParam = CGF.getBuilder().create<mlir::cir::CatchParamOp>(
+      CGF.getBuilder().getUnknownLoc(), ParamTy, nullptr, nullptr);
+
+  CGF.EHStack.pushCleanup<CallEndCatch>(
+      NormalAndEHCleanup,
+      EndMightThrow && !CGF.CGM.getLangOpts().AssumeNothrowExceptionDtor);
+
+  return catchParam.getParam();
+}
+
+/// A "special initializer" callback for initializing a catch
+/// parameter during catch initialization.
+static void InitCatchParam(CIRGenFunction &CGF, const VarDecl &CatchParam,
+                           Address ParamAddr, SourceLocation Loc) {
+  CanQualType CatchType =
+      CGF.CGM.getASTContext().getCanonicalType(CatchParam.getType());
+  auto CIRCatchTy = CGF.convertTypeForMem(CatchType);
+
+  // If we're catching by reference, we can just cast the object
+  // pointer to the appropriate pointer.
+  if (isa<ReferenceType>(CatchType)) {
+    llvm_unreachable("NYI");
+    return;
+  }
+
+  // Scalars and complexes.
+  TypeEvaluationKind TEK = CGF.getEvaluationKind(CatchType);
+  if (TEK != TEK_Aggregate) {
+    // Notes for LLVM lowering:
+    // If the catch type is a pointer type, __cxa_begin_catch returns
+    // the pointer by value.
+    if (CatchType->hasPointerRepresentation()) {
+      auto catchParam = CallBeginCatch(CGF, CIRCatchTy, false);
+
+      switch (CatchType.getQualifiers().getObjCLifetime()) {
+      case Qualifiers::OCL_Strong:
+        llvm_unreachable("NYI");
+        // arc retain non block:
+        assert(!MissingFeatures::ARC());
+        [[fallthrough]];
+
+      case Qualifiers::OCL_None:
+      case Qualifiers::OCL_ExplicitNone:
+      case Qualifiers::OCL_Autoreleasing:
+        CGF.getBuilder().createStore(CGF.getBuilder().getUnknownLoc(),
+                                     catchParam, ParamAddr);
+        return;
+
+      case Qualifiers::OCL_Weak:
+        llvm_unreachable("NYI");
+        // arc init weak:
+        assert(!MissingFeatures::ARC());
+        return;
+      }
+      llvm_unreachable("bad ownership qualifier!");
+    }
+
+    // Otherwise, it returns a pointer into the exception object.
+    auto catchParam =
+        CallBeginCatch(CGF, CGF.getBuilder().getPointerTo(CIRCatchTy), false);
+    LValue srcLV = CGF.MakeNaturalAlignAddrLValue(catchParam, CatchType);
+    LValue destLV = CGF.makeAddrLValue(ParamAddr, CatchType);
+    switch (TEK) {
+    case TEK_Complex:
+      llvm_unreachable("NYI");
+      return;
+    case TEK_Scalar: {
+      auto exnLoad = CGF.buildLoadOfScalar(srcLV, catchParam.getLoc());
+      CGF.buildStoreOfScalar(exnLoad, destLV, /*init*/ true);
+      return;
+    }
+    case TEK_Aggregate:
+      llvm_unreachable("evaluation kind filtered out!");
+    }
+    llvm_unreachable("bad evaluation kind");
+  }
+
+  // Check for a copy expression.  If we don't have a copy expression,
+  // that means a trivial copy is okay.
+  const Expr *copyExpr = CatchParam.getInit();
+  if (!copyExpr) {
+    llvm_unreachable("NYI");
+  }
+
+  llvm_unreachable("NYI");
+}
+
+/// Begins a catch statement by initializing the catch variable and
+/// calling __cxa_begin_catch.
+void CIRGenItaniumCXXABI::emitBeginCatch(CIRGenFunction &CGF,
+                                         const CXXCatchStmt *S) {
+  // Notes for LLVM lowering:
+  // We have to be very careful with the ordering of cleanups here:
+  //   C++ [except.throw]p4:
+  //     The destruction [of the exception temporary] occurs
+  //     immediately after the destruction of the object declared in
+  //     the exception-declaration in the handler.
+  //
+  // So the precise ordering is:
+  //   1.  Construct catch variable.
+  //   2.  __cxa_begin_catch
+  //   3.  Enter __cxa_end_catch cleanup
+  //   4.  Enter dtor cleanup
+  //
+  // We do this by using a slightly abnormal initialization process.
+  // Delegation sequence:
+  //   - ExitCXXTryStmt opens a RunCleanupsScope
+  //     - EmitAutoVarAlloca creates the variable and debug info
+  //       - InitCatchParam initializes the variable from the exception
+  //       - CallBeginCatch calls __cxa_begin_catch
+  //       - CallBeginCatch enters the __cxa_end_catch cleanup
+  //     - EmitAutoVarCleanups enters the variable destructor cleanup
+  //   - EmitCXXTryStmt emits the code for the catch body
+  //   - EmitCXXTryStmt close the RunCleanupsScope
+
+  VarDecl *CatchParam = S->getExceptionDecl();
+  if (!CatchParam) {
+    CallBeginCatch(CGF, CGF.getBuilder().getVoidPtrTy(), true);
+    return;
+  }
+
+  auto getCatchParamAllocaIP = [&]() {
+    auto currIns = CGF.getBuilder().saveInsertionPoint();
+    auto currParent = currIns.getBlock()->getParentOp();
+    mlir::Operation *scopeLikeOp =
+        currParent->getParentOfType<mlir::cir::ScopeOp>();
+    if (!scopeLikeOp)
+      scopeLikeOp = currParent->getParentOfType<mlir::cir::FuncOp>();
+    assert(scopeLikeOp && "unknown outermost scope-like parent");
+    assert(scopeLikeOp->getNumRegions() == 1 && "expected single region");
+
+    auto *insertBlock = &scopeLikeOp->getRegion(0).getBlocks().back();
+    return CGF.getBuilder().getBestAllocaInsertPoint(insertBlock);
+  };
+
+  // Emit the local. Make sure the alloca's superseed the current scope, since
+  // these are going to be consumed by `cir.catch`, which is not within the
+  // current scope.
+  auto var = CGF.buildAutoVarAlloca(*CatchParam, getCatchParamAllocaIP());
+  InitCatchParam(CGF, *CatchParam, var.getObjectAddress(CGF), S->getBeginLoc());
+  // FIXME(cir): double check cleanups here are happening in the right blocks.
+  CGF.buildAutoVarCleanups(var);
+}
+
+mlir::cir::GlobalOp
+CIRGenItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
+                                     CharUnits VPtrOffset) {
+  assert(VPtrOffset.isZero() && "Itanium ABI only supports zero vptr offsets");
+  mlir::cir::GlobalOp &vtable = VTables[RD];
+  if (vtable)
+    return vtable;
+
+  // Queue up this vtable for possible deferred emission.
+  CGM.addDeferredVTable(RD);
+
+  SmallString<256> Name;
+  llvm::raw_svector_ostream Out(Name);
+  getMangleContext().mangleCXXVTable(RD, Out);
+
+  const VTableLayout &VTLayout =
+      CGM.getItaniumVTableContext().getVTableLayout(RD);
+  auto VTableType = CGM.getVTables().getVTableType(VTLayout);
+
+  // Use pointer alignment for the vtable. Otherwise we would align them based
+  // on the size of the initializer which doesn't make sense as only single
+  // values are read.
+  unsigned PAlign = CGM.getItaniumVTableContext().isRelativeLayout()
+                        ? 32
+                        : CGM.getTarget().getPointerAlign(LangAS::Default);
+
+  vtable = CGM.createOrReplaceCXXRuntimeVariable(
+      CGM.getLoc(RD->getSourceRange()), Name, VTableType,
+      mlir::cir::GlobalLinkageKind::ExternalLinkage,
+      getContext().toCharUnitsFromBits(PAlign));
+  // LLVM codegen handles unnamedAddr
+  assert(!MissingFeatures::unnamedAddr());
+
+  // In MS C++ if you have a class with virtual functions in which you are using
+  // selective member import/export, then all virtual functions must be exported
+  // unless they are inline, otherwise a link error will result. To match this
+  // behavior, for such classes, we dllimport the vtable if it is defined
+  // externally and all the non-inline virtual methods are marked dllimport, and
+  // we dllexport the vtable if it is defined in this TU and all the non-inline
+  // virtual methods are marked dllexport.
+  if (CGM.getTarget().hasPS4DLLImportExport())
+    llvm_unreachable("NYI");
+
+  CGM.setGVProperties(vtable, RD);
+  return vtable;
+}
+
+CIRGenCallee CIRGenItaniumCXXABI::getVirtualFunctionPointer(
+    CIRGenFunction &CGF, GlobalDecl GD, Address This, mlir::Type Ty,
+    SourceLocation Loc) {
+  auto loc = CGF.getLoc(Loc);
+  auto TyPtr = CGF.getBuilder().getPointerTo(Ty);
+  auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl());
+  auto VTable = CGF.getVTablePtr(
+      loc, This, CGF.getBuilder().getPointerTo(TyPtr), MethodDecl->getParent());
+
+  uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD);
+  mlir::Value VFunc{};
+  if (CGF.shouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) {
+    llvm_unreachable("NYI");
+  } else {
+    CGF.buildTypeMetadataCodeForVCall(MethodDecl->getParent(), VTable, Loc);
+
+    mlir::Value VFuncLoad;
+    if (CGM.getItaniumVTableContext().isRelativeLayout()) {
+      llvm_unreachable("NYI");
+    } else {
+      VTable = CGF.getBuilder().createBitcast(
+          loc, VTable, CGF.getBuilder().getPointerTo(TyPtr));
+      auto VTableSlotPtr =
+          CGF.getBuilder().create<mlir::cir::VTableAddrPointOp>(
+              loc, CGF.getBuilder().getPointerTo(TyPtr),
+              ::mlir::FlatSymbolRefAttr{}, VTable,
+              /*vtable_index=*/0, VTableIndex);
+      VFuncLoad = CGF.getBuilder().createAlignedLoad(loc, TyPtr, VTableSlotPtr,
+                                                     CGF.getPointerAlign());
+    }
+
+    // Add !invariant.load md to virtual function load to indicate that
+    // function didn't change inside vtable.
+    // It's safe to add it without -fstrict-vtable-pointers, but it would not
+    // help in devirtualization because it will only matter if we will have 2
+    // the same virtual function loads from the same vtable load, which won't
+    // happen without enabled devirtualization with -fstrict-vtable-pointers.
+    if (CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+        CGM.getCodeGenOpts().StrictVTablePointers) {
+      llvm_unreachable("NYI");
+    }
+    VFunc = VFuncLoad;
+  }
+
+  CIRGenCallee Callee(GD, VFunc.getDefiningOp());
+  return Callee;
+}
+
+mlir::Value
+CIRGenItaniumCXXABI::getVTableAddressPoint(BaseSubobject Base,
+                                           const CXXRecordDecl *VTableClass) {
+  auto vtable = getAddrOfVTable(VTableClass, CharUnits());
+
+  // Find the appropriate vtable within the vtable group, and the address point
+  // within that vtable.
+  VTableLayout::AddressPointLocation AddressPoint =
+      CGM.getItaniumVTableContext()
+          .getVTableLayout(VTableClass)
+          .getAddressPoint(Base);
+
+  auto &builder = CGM.getBuilder();
+  auto vtablePtrTy = builder.getVirtualFnPtrType(/*isVarArg=*/false);
+
+  return builder.create<mlir::cir::VTableAddrPointOp>(
+      CGM.getLoc(VTableClass->getSourceRange()), vtablePtrTy,
+      mlir::FlatSymbolRefAttr::get(vtable.getSymNameAttr()), mlir::Value{},
+      AddressPoint.VTableIndex, AddressPoint.AddressPointIndex);
+}
+
+mlir::Value CIRGenItaniumCXXABI::getVTableAddressPointInStructor(
+    CIRGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
+    const CXXRecordDecl *NearestVBase) {
+
+  if ((Base.getBase()->getNumVBases() || NearestVBase != nullptr) &&
+      NeedsVTTParameter(CGF.CurGD)) {
+    llvm_unreachable("NYI");
+  }
+  return getVTableAddressPoint(Base, VTableClass);
+}
+
+bool CIRGenItaniumCXXABI::isVirtualOffsetNeededForVTableField(
+    CIRGenFunction &CGF, CIRGenFunction::VPtr Vptr) {
+  if (Vptr.NearestVBase == nullptr)
+    return false;
+  return NeedsVTTParameter(CGF.CurGD);
+}
+
+bool CIRGenItaniumCXXABI::canSpeculativelyEmitVTableAsBaseClass(
+    const CXXRecordDecl *RD) const {
+  // We don't emit available_externally vtables if we are in -fapple-kext mode
+  // because kext mode does not permit devirtualization.
+  if (CGM.getLangOpts().AppleKext)
+    return false;
+
+  // If the vtable is hidden then it is not safe to emit an available_externally
+  // copy of vtable.
+  if (isVTableHidden(RD))
+    return false;
+
+  if (CGM.getCodeGenOpts().ForceEmitVTables)
+    return true;
+
+  // If we don't have any not emitted inline virtual function then we are safe
+  // to emit an available_externally copy of vtable.
+  // FIXME we can still emit a copy of the vtable if we
+  // can emit definition of the inline functions.
+  if (hasAnyUnusedVirtualInlineFunction(RD))
+    return false;
+
+  // For a class with virtual bases, we must also be able to speculatively
+  // emit the VTT, because CodeGen doesn't have separate notions of "can emit
+  // the vtable" and "can emit the VTT". For a base subobject, this means we
+  // need to be able to emit non-virtual base vtables.
+  if (RD->getNumVBases()) {
+    for (const auto &B : RD->bases()) {
+      auto *BRD = B.getType()->getAsCXXRecordDecl();
+      assert(BRD && "no class for base specifier");
+      if (B.isVirtual() || !BRD->isDynamicClass())
+        continue;
+      if (!canSpeculativelyEmitVTableAsBaseClass(BRD))
+        return false;
+    }
+  }
+
+  return true;
+}
+
+bool CIRGenItaniumCXXABI::canSpeculativelyEmitVTable(
+    const CXXRecordDecl *RD) const {
+  if (!canSpeculativelyEmitVTableAsBaseClass(RD))
+    return false;
+
+  // For a complete-object vtable (or more specifically, for the VTT), we need
+  // to be able to speculatively emit the vtables of all dynamic virtual bases.
+  for (const auto &B : RD->vbases()) {
+    auto *BRD = B.getType()->getAsCXXRecordDecl();
+    assert(BRD && "no class for base specifier");
+    if (!BRD->isDynamicClass())
+      continue;
+    if (!canSpeculativelyEmitVTableAsBaseClass(BRD))
+      return false;
+  }
+
+  return true;
+}
+
+namespace {
+class CIRGenItaniumRTTIBuilder {
+  CIRGenModule &CGM;                 // Per-module state.
+  const CIRGenItaniumCXXABI &CXXABI; // Per-module state.
+
+  /// The fields of the RTTI descriptor currently being built.
+  SmallVector<mlir::Attribute, 16> Fields;
+
+  // Returns the mangled type name of the given type.
+  mlir::cir::GlobalOp GetAddrOfTypeName(mlir::Location loc, QualType Ty,
+                                        mlir::cir::GlobalLinkageKind Linkage);
+
+  // /// Returns the constant for the RTTI
+  // /// descriptor of the given type.
+  mlir::Attribute GetAddrOfExternalRTTIDescriptor(mlir::Location loc,
+                                                  QualType Ty);
+
+  /// Build the vtable pointer for the given type.
+  void BuildVTablePointer(mlir::Location loc, const Type *Ty);
+
+  /// Build an abi::__si_class_type_info, used for single inheritance, according
+  /// to the Itanium C++ ABI, 2.9.5p6b.
+  void BuildSIClassTypeInfo(mlir::Location loc, const CXXRecordDecl *RD);
+
+  /// Build an abi::__vmi_class_type_info, used for
+  /// classes with bases that do not satisfy the abi::__si_class_type_info
+  /// constraints, according ti the Itanium C++ ABI, 2.9.5p5c.
+  void BuildVMIClassTypeInfo(mlir::Location loc, const CXXRecordDecl *RD);
+
+  // /// Build an abi::__pointer_type_info struct, used
+  // /// for pointer types.
+  // void BuildPointerTypeInfo(QualType PointeeTy);
+
+  // /// Build the appropriate kind of
+  // /// type_info for an object type.
+  // void BuildObjCObjectTypeInfo(const ObjCObjectType *Ty);
+
+  // /// Build an
+  // abi::__pointer_to_member_type_info
+  // /// struct, used for member pointer types.
+  // void BuildPointerToMemberTypeInfo(const MemberPointerType *Ty);
+
+public:
+  CIRGenItaniumRTTIBuilder(const CIRGenItaniumCXXABI &ABI, CIRGenModule &_CGM)
+      : CGM(_CGM), CXXABI(ABI) {}
+
+  // Pointer type info flags.
+  enum {
+    /// PTI_Const - Type has const qualifier.
+    PTI_Const = 0x1,
+
+    /// PTI_Volatile - Type has volatile qualifier.
+    PTI_Volatile = 0x2,
+
+    /// PTI_Restrict - Type has restrict qualifier.
+    PTI_Restrict = 0x4,
+
+    /// PTI_Incomplete - Type is incomplete.
+    PTI_Incomplete = 0x8,
+
+    /// PTI_ContainingClassIncomplete - Containing class is incomplete.
+    /// (in pointer to member).
+    PTI_ContainingClassIncomplete = 0x10,
+
+    /// PTI_TransactionSafe - Pointee is transaction_safe function (C++ TM TS).
+    // PTI_TransactionSafe = 0x20,
+
+    /// PTI_Noexcept - Pointee is noexcept function (C++1z).
+    PTI_Noexcept = 0x40,
+  };
+
+  // VMI type info flags.
+  enum {
+    /// VMI_NonDiamondRepeat - Class has non-diamond repeated inheritance.
+    VMI_NonDiamondRepeat = 0x1,
+
+    /// VMI_DiamondShaped - Class is diamond shaped.
+    VMI_DiamondShaped = 0x2
+  };
+
+  // Base class type info flags.
+  enum {
+    /// BCTI_Virtual - Base class is virtual.
+    BCTI_Virtual = 0x1,
+
+    /// BCTI_Public - Base class is public.
+    BCTI_Public = 0x2
+  };
+
+  /// Build the RTTI type info struct for the given type, or
+  /// link to an existing RTTI descriptor if one already exists.
+  mlir::Attribute BuildTypeInfo(mlir::Location loc, QualType Ty);
+
+  /// Build the RTTI type info struct for the given type.
+  mlir::Attribute BuildTypeInfo(mlir::Location loc, QualType Ty,
+                                mlir::cir::GlobalLinkageKind Linkage,
+                                mlir::SymbolTable::Visibility Visibility);
+};
+} // namespace
+
+/// Given a builtin type, returns whether the type
+/// info for that type is defined in the standard library.
+/// TODO(cir): this can unified with LLVM codegen
+static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
+  // Itanium C++ ABI 2.9.2:
+  //   Basic type information (e.g. for "int", "bool", etc.) will be kept in
+  //   the run-time support library. Specifically, the run-time support
+  //   library should contain type_info objects for the types X, X* and
+  //   X const*, for every X in: void, std::nullptr_t, bool, wchar_t, char,
+  //   unsigned char, signed char, short, unsigned short, int, unsigned int,
+  //   long, unsigned long, long long, unsigned long long, float, double,
+  //   long double, char16_t, char32_t, and the IEEE 754r decimal and
+  //   half-precision floating point types.
+  //
+  // GCC also emits RTTI for __int128.
+  // FIXME: We do not emit RTTI information for decimal types here.
+
+  // Types added here must also be added to EmitFundamentalRTTIDescriptors.
+  switch (Ty->getKind()) {
+  case BuiltinType::WasmExternRef:
+    llvm_unreachable("NYI");
+  case BuiltinType::Void:
+  case BuiltinType::NullPtr:
+  case BuiltinType::Bool:
+  case BuiltinType::WChar_S:
+  case BuiltinType::WChar_U:
+  case BuiltinType::Char_U:
+  case BuiltinType::Char_S:
+  case BuiltinType::UChar:
+  case BuiltinType::SChar:
+  case BuiltinType::Short:
+  case BuiltinType::UShort:
+  case BuiltinType::Int:
+  case BuiltinType::UInt:
+  case BuiltinType::Long:
+  case BuiltinType::ULong:
+  case BuiltinType::LongLong:
+  case BuiltinType::ULongLong:
+  case BuiltinType::Half:
+  case BuiltinType::Float:
+  case BuiltinType::Double:
+  case BuiltinType::LongDouble:
+  case BuiltinType::Float16:
+  case BuiltinType::Float128:
+  case BuiltinType::Ibm128:
+  case BuiltinType::Char8:
+  case BuiltinType::Char16:
+  case BuiltinType::Char32:
+  case BuiltinType::Int128:
+  case BuiltinType::UInt128:
+    return true;
+
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix)                   \
+  case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) case BuiltinType::Id:
+#include "clang/Basic/OpenCLExtensionTypes.def"
+  case BuiltinType::OCLSampler:
+  case BuiltinType::OCLEvent:
+  case BuiltinType::OCLClkEvent:
+  case BuiltinType::OCLQueue:
+  case BuiltinType::OCLReserveID:
+#define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/AArch64SVEACLETypes.def"
+#define PPC_VECTOR_TYPE(Name, Id, Size) case BuiltinType::Id:
+#include "clang/Basic/PPCTypes.def"
+#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/RISCVVTypes.def"
+#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/AMDGPUTypes.def"
+  case BuiltinType::ShortAccum:
+  case BuiltinType::Accum:
+  case BuiltinType::LongAccum:
+  case BuiltinType::UShortAccum:
+  case BuiltinType::UAccum:
+  case BuiltinType::ULongAccum:
+  case BuiltinType::ShortFract:
+  case BuiltinType::Fract:
+  case BuiltinType::LongFract:
+  case BuiltinType::UShortFract:
+  case BuiltinType::UFract:
+  case BuiltinType::ULongFract:
+  case BuiltinType::SatShortAccum:
+  case BuiltinType::SatAccum:
+  case BuiltinType::SatLongAccum:
+  case BuiltinType::SatUShortAccum:
+  case BuiltinType::SatUAccum:
+  case BuiltinType::SatULongAccum:
+  case BuiltinType::SatShortFract:
+  case BuiltinType::SatFract:
+  case BuiltinType::SatLongFract:
+  case BuiltinType::SatUShortFract:
+  case BuiltinType::SatUFract:
+  case BuiltinType::SatULongFract:
+  case BuiltinType::BFloat16:
+    return false;
+
+  case BuiltinType::Dependent:
+#define BUILTIN_TYPE(Id, SingletonId)
+#define PLACEHOLDER_TYPE(Id, SingletonId) case BuiltinType::Id:
+#include "clang/AST/BuiltinTypes.def"
+    llvm_unreachable("asking for RRTI for a placeholder type!");
+
+  case BuiltinType::ObjCId:
+  case BuiltinType::ObjCClass:
+  case BuiltinType::ObjCSel:
+    llvm_unreachable("FIXME: Objective-C types are unsupported!");
+  }
+
+  llvm_unreachable("Invalid BuiltinType Kind!");
+}
+
+static bool TypeInfoIsInStandardLibrary(const PointerType *PointerTy) {
+  QualType PointeeTy = PointerTy->getPointeeType();
+  const BuiltinType *BuiltinTy = dyn_cast<BuiltinType>(PointeeTy);
+  if (!BuiltinTy)
+    return false;
+
+  // Check the qualifiers.
+  Qualifiers Quals = PointeeTy.getQualifiers();
+  Quals.removeConst();
+
+  if (!Quals.empty())
+    return false;
+
+  return TypeInfoIsInStandardLibrary(BuiltinTy);
+}
+
+/// Returns whether the type
+/// information for the given type exists in the standard library.
+/// TODO(cir): this can unified with LLVM codegen
+static bool IsStandardLibraryRTTIDescriptor(QualType Ty) {
+  // Type info for builtin types is defined in the standard library.
+  if (const BuiltinType *BuiltinTy = dyn_cast<BuiltinType>(Ty))
+    return TypeInfoIsInStandardLibrary(BuiltinTy);
+
+  // Type info for some pointer types to builtin types is defined in the
+  // standard library.
+  if (const PointerType *PointerTy = dyn_cast<PointerType>(Ty))
+    return TypeInfoIsInStandardLibrary(PointerTy);
+
+  return false;
+}
+
+/// Returns whether the type information for
+/// the given type exists somewhere else, and that we should not emit the type
+/// information in this translation unit.  Assumes that it is not a
+/// standard-library type.
+/// TODO(cir): this can unified with LLVM codegen
+static bool ShouldUseExternalRTTIDescriptor(CIRGenModule &CGM, QualType Ty) {
+  ASTContext &Context = CGM.getASTContext();
+
+  // If RTTI is disabled, assume it might be disabled in the
+  // translation unit that defines any potential key function, too.
+  if (!Context.getLangOpts().RTTI)
+    return false;
+
+  if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
+    const CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl());
+    if (!RD->hasDefinition())
+      return false;
+
+    if (!RD->isDynamicClass())
+      return false;
+
+    // FIXME: this may need to be reconsidered if the key function
+    // changes.
+    // N.B. We must always emit the RTTI data ourselves if there exists a key
+    // function.
+    bool IsDLLImport = RD->hasAttr<DLLImportAttr>();
+
+    // Don't import the RTTI but emit it locally.
+    if (CGM.getTriple().isWindowsGNUEnvironment())
+      return false;
+
+    if (CGM.getVTables().isVTableExternal(RD)) {
+      if (CGM.getTarget().hasPS4DLLImportExport())
+        return true;
+
+      return IsDLLImport && !CGM.getTriple().isWindowsItaniumEnvironment()
+                 ? false
+                 : true;
+    }
+    if (IsDLLImport)
+      return true;
+  }
+
+  return false;
+}
+
+/// Returns whether the given record type is incomplete.
+/// TODO(cir): this can unified with LLVM codegen
+static bool IsIncompleteClassType(const RecordType *RecordTy) {
+  return !RecordTy->getDecl()->isCompleteDefinition();
+}
+
+/// Returns whether the given type contains an
+/// incomplete class type. This is true if
+///
+///   * The given type is an incomplete class type.
+///   * The given type is a pointer type whose pointee type contains an
+///     incomplete class type.
+///   * The given type is a member pointer type whose class is an incomplete
+///     class type.
+///   * The given type is a member pointer type whoise pointee type contains an
+///     incomplete class type.
+/// is an indirect or direct pointer to an incomplete class type.
+/// TODO(cir): this can unified with LLVM codegen
+static bool ContainsIncompleteClassType(QualType Ty) {
+  if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
+    if (IsIncompleteClassType(RecordTy))
+      return true;
+  }
+
+  if (const PointerType *PointerTy = dyn_cast<PointerType>(Ty))
+    return ContainsIncompleteClassType(PointerTy->getPointeeType());
+
+  if (const MemberPointerType *MemberPointerTy =
+          dyn_cast<MemberPointerType>(Ty)) {
+    // Check if the class type is incomplete.
+    const RecordType *ClassType = cast<RecordType>(MemberPointerTy->getClass());
+    if (IsIncompleteClassType(ClassType))
+      return true;
+
+    return ContainsIncompleteClassType(MemberPointerTy->getPointeeType());
+  }
+
+  return false;
+}
+
+// Return whether the given record decl has a "single,
+// public, non-virtual base at offset zero (i.e. the derived class is dynamic
+// iff the base is)", according to Itanium C++ ABI, 2.95p6b.
+// TODO(cir): this can unified with LLVM codegen
+static bool CanUseSingleInheritance(const CXXRecordDecl *RD) {
+  // Check the number of bases.
+  if (RD->getNumBases() != 1)
+    return false;
+
+  // Get the base.
+  CXXRecordDecl::base_class_const_iterator Base = RD->bases_begin();
+
+  // Check that the base is not virtual.
+  if (Base->isVirtual())
+    return false;
+
+  // Check that the base is public.
+  if (Base->getAccessSpecifier() != AS_public)
+    return false;
+
+  // Check that the class is dynamic iff the base is.
+  auto *BaseDecl =
+      cast<CXXRecordDecl>(Base->getType()->castAs<RecordType>()->getDecl());
+  if (!BaseDecl->isEmpty() &&
+      BaseDecl->isDynamicClass() != RD->isDynamicClass())
+    return false;
+
+  return true;
+}
+
+/// Return the linkage that the type info and type info name constants
+/// should have for the given type.
+static mlir::cir::GlobalLinkageKind getTypeInfoLinkage(CIRGenModule &CGM,
+                                                       QualType Ty) {
+  // Itanium C++ ABI 2.9.5p7:
+  //   In addition, it and all of the intermediate abi::__pointer_type_info
+  //   structs in the chain down to the abi::__class_type_info for the
+  //   incomplete class type must be prevented from resolving to the
+  //   corresponding type_info structs for the complete class type, possibly
+  //   by making them local static objects. Finally, a dummy class RTTI is
+  //   generated for the incomplete type that will not resolve to the final
+  //   complete class RTTI (because the latter need not exist), possibly by
+  //   making it a local static object.
+  if (ContainsIncompleteClassType(Ty))
+    return mlir::cir::GlobalLinkageKind::InternalLinkage;
+
+  switch (Ty->getLinkage()) {
+  case Linkage::None:
+  case Linkage::Internal:
+  case Linkage::UniqueExternal:
+    return mlir::cir::GlobalLinkageKind::InternalLinkage;
+
+  case Linkage::VisibleNone:
+  case Linkage::Module:
+  case Linkage::External:
+    // RTTI is not enabled, which means that this type info struct is going
+    // to be used for exception handling. Give it linkonce_odr linkage.
+    if (!CGM.getLangOpts().RTTI)
+      return mlir::cir::GlobalLinkageKind::LinkOnceODRLinkage;
+
+    if (const RecordType *Record = dyn_cast<RecordType>(Ty)) {
+      const CXXRecordDecl *RD = cast<CXXRecordDecl>(Record->getDecl());
+      if (RD->hasAttr<WeakAttr>())
+        return mlir::cir::GlobalLinkageKind::WeakODRLinkage;
+      if (CGM.getTriple().isWindowsItaniumEnvironment())
+        if (RD->hasAttr<DLLImportAttr>() &&
+            ShouldUseExternalRTTIDescriptor(CGM, Ty))
+          return mlir::cir::GlobalLinkageKind::ExternalLinkage;
+      // MinGW always uses LinkOnceODRLinkage for type info.
+      if (RD->isDynamicClass() && !CGM.getASTContext()
+                                       .getTargetInfo()
+                                       .getTriple()
+                                       .isWindowsGNUEnvironment())
+        return CGM.getVTableLinkage(RD);
+    }
+
+    return mlir::cir::GlobalLinkageKind::LinkOnceODRLinkage;
+  case Linkage::Invalid:
+    llvm_unreachable("Invalid linkage!");
+  }
+
+  llvm_unreachable("Invalid linkage!");
+}
+
+mlir::Attribute CIRGenItaniumRTTIBuilder::BuildTypeInfo(mlir::Location loc,
+                                                        QualType Ty) {
+  // We want to operate on the canonical type.
+  Ty = Ty.getCanonicalType();
+
+  // Check if we've already emitted an RTTI descriptor for this type.
+  SmallString<256> Name;
+  llvm::raw_svector_ostream Out(Name);
+  CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);
+
+  auto OldGV = dyn_cast_or_null<mlir::cir::GlobalOp>(
+      mlir::SymbolTable::lookupSymbolIn(CGM.getModule(), Name));
+
+  if (OldGV && !OldGV.isDeclaration()) {
+    assert(!OldGV.hasAvailableExternallyLinkage() &&
+           "available_externally typeinfos not yet implemented");
+    return CGM.getBuilder().getGlobalViewAttr(CGM.getBuilder().getUInt8PtrTy(),
+                                              OldGV);
+  }
+
+  // Check if there is already an external RTTI descriptor for this type.
+  if (IsStandardLibraryRTTIDescriptor(Ty) ||
+      ShouldUseExternalRTTIDescriptor(CGM, Ty))
+    return GetAddrOfExternalRTTIDescriptor(loc, Ty);
+
+  // Emit the standard library with external linkage.
+  auto Linkage = getTypeInfoLinkage(CGM, Ty);
+
+  // Give the type_info object and name the formal visibility of the
+  // type itself.
+  assert(!MissingFeatures::hiddenVisibility());
+  assert(!MissingFeatures::protectedVisibility());
+  mlir::SymbolTable::Visibility symVisibility;
+  if (mlir::cir::isLocalLinkage(Linkage))
+    // If the linkage is local, only default visibility makes sense.
+    symVisibility = mlir::SymbolTable::Visibility::Public;
+  else if (CXXABI.classifyRTTIUniqueness(Ty, Linkage) ==
+           CIRGenItaniumCXXABI::RUK_NonUniqueHidden)
+    llvm_unreachable("NYI");
+  else
+    symVisibility = CIRGenModule::getCIRVisibility(Ty->getVisibility());
+
+  assert(!MissingFeatures::setDLLStorageClass());
+  return BuildTypeInfo(loc, Ty, Linkage, symVisibility);
+}
+
+void CIRGenItaniumRTTIBuilder::BuildVTablePointer(mlir::Location loc,
+                                                  const Type *Ty) {
+  auto &builder = CGM.getBuilder();
+
+  // abi::__class_type_info.
+  static const char *const ClassTypeInfo =
+      "_ZTVN10__cxxabiv117__class_type_infoE";
+  // abi::__si_class_type_info.
+  static const char *const SIClassTypeInfo =
+      "_ZTVN10__cxxabiv120__si_class_type_infoE";
+  // abi::__vmi_class_type_info.
+  static const char *const VMIClassTypeInfo =
+      "_ZTVN10__cxxabiv121__vmi_class_type_infoE";
+
+  const char *VTableName = nullptr;
+
+  switch (Ty->getTypeClass()) {
+  case Type::ArrayParameter:
+    llvm_unreachable("NYI");
+#define TYPE(Class, Base)
+#define ABSTRACT_TYPE(Class, Base)
+#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
+#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
+#define DEPENDENT_TYPE(Class, Base) case Type::Class:
+#include "clang/AST/TypeNodes.inc"
+    llvm_unreachable("Non-canonical and dependent types shouldn't get here");
+
+  case Type::LValueReference:
+  case Type::RValueReference:
+    llvm_unreachable("References shouldn't get here");
+
+  case Type::Auto:
+  case Type::DeducedTemplateSpecialization:
+    llvm_unreachable("Undeduced type shouldn't get here");
+
+  case Type::Pipe:
+    llvm_unreachable("Pipe types shouldn't get here");
+
+  case Type::Builtin:
+  case Type::BitInt:
+  // GCC treats vector and complex types as fundamental types.
+  case Type::Vector:
+  case Type::ExtVector:
+  case Type::ConstantMatrix:
+  case Type::Complex:
+  case Type::Atomic:
+  // FIXME: GCC treats block pointers as fundamental types?!
+  case Type::BlockPointer:
+    // abi::__fundamental_type_info.
+    VTableName = "_ZTVN10__cxxabiv123__fundamental_type_infoE";
+    break;
+
+  case Type::ConstantArray:
+  case Type::IncompleteArray:
+  case Type::VariableArray:
+    // abi::__array_type_info.
+    VTableName = "_ZTVN10__cxxabiv117__array_type_infoE";
+    break;
+
+  case Type::FunctionNoProto:
+  case Type::FunctionProto:
+    // abi::__function_type_info.
+    VTableName = "_ZTVN10__cxxabiv120__function_type_infoE";
+    break;
+
+  case Type::Enum:
+    // abi::__enum_type_info.
+    VTableName = "_ZTVN10__cxxabiv116__enum_type_infoE";
+    break;
+
+  case Type::Record: {
+    const CXXRecordDecl *RD =
+        cast<CXXRecordDecl>(cast<RecordType>(Ty)->getDecl());
+
+    if (!RD->hasDefinition() || !RD->getNumBases()) {
+      VTableName = ClassTypeInfo;
+    } else if (CanUseSingleInheritance(RD)) {
+      VTableName = SIClassTypeInfo;
+    } else {
+      VTableName = VMIClassTypeInfo;
+    }
+
+    break;
+  }
+
+  case Type::ObjCObject:
+    // Ignore protocol qualifiers.
+    Ty = cast<ObjCObjectType>(Ty)->getBaseType().getTypePtr();
+
+    // Handle id and Class.
+    if (isa<BuiltinType>(Ty)) {
+      VTableName = ClassTypeInfo;
+      break;
+    }
+
+    assert(isa<ObjCInterfaceType>(Ty));
+    [[fallthrough]];
+
+  case Type::ObjCInterface:
+    if (cast<ObjCInterfaceType>(Ty)->getDecl()->getSuperClass()) {
+      VTableName = SIClassTypeInfo;
+    } else {
+      VTableName = ClassTypeInfo;
+    }
+    break;
+
+  case Type::ObjCObjectPointer:
+  case Type::Pointer:
+    // abi::__pointer_type_info.
+    VTableName = "_ZTVN10__cxxabiv119__pointer_type_infoE";
+    break;
+
+  case Type::MemberPointer:
+    // abi::__pointer_to_member_type_info.
+    VTableName = "_ZTVN10__cxxabiv129__pointer_to_member_type_infoE";
+    break;
+  }
+
+  mlir::cir::GlobalOp VTable{};
+
+  // Check if the alias exists. If it doesn't, then get or create the global.
+  if (CGM.getItaniumVTableContext().isRelativeLayout())
+    llvm_unreachable("NYI");
+  if (!VTable) {
+    VTable = CGM.getOrInsertGlobal(loc, VTableName,
+                                   CGM.getBuilder().getUInt8PtrTy());
+  }
+
+  if (MissingFeatures::setDSOLocal())
+    llvm_unreachable("NYI");
+
+  // The vtable address point is 2.
+  mlir::Attribute field{};
+  if (CGM.getItaniumVTableContext().isRelativeLayout()) {
+    llvm_unreachable("NYI");
+  } else {
+    SmallVector<mlir::Attribute, 4> offsets{
+        CGM.getBuilder().getI32IntegerAttr(2)};
+    auto indices = mlir::ArrayAttr::get(builder.getContext(), offsets);
+    field = CGM.getBuilder().getGlobalViewAttr(CGM.getBuilder().getUInt8PtrTy(),
+                                               VTable, indices);
+  }
+
+  assert(field && "expected attribute");
+  Fields.push_back(field);
+}
+
+mlir::cir::GlobalOp CIRGenItaniumRTTIBuilder::GetAddrOfTypeName(
+    mlir::Location loc, QualType Ty, mlir::cir::GlobalLinkageKind Linkage) {
+  auto &builder = CGM.getBuilder();
+  SmallString<256> Name;
+  llvm::raw_svector_ostream Out(Name);
+  CGM.getCXXABI().getMangleContext().mangleCXXRTTIName(Ty, Out);
+
+  // We know that the mangled name of the type starts at index 4 of the
+  // mangled name of the typename, so we can just index into it in order to
+  // get the mangled name of the type.
+  auto Init = builder.getString(
+      Name.substr(4), CGM.getTypes().ConvertType(CGM.getASTContext().CharTy));
+  auto Align =
+      CGM.getASTContext().getTypeAlignInChars(CGM.getASTContext().CharTy);
+
+  // builder.getString can return a #cir.zero if the string given to it only
+  // contains null bytes. However, type names cannot be full of null bytes.
+  // So cast Init to a ConstArrayAttr should be safe.
+  auto InitStr = cast<mlir::cir::ConstArrayAttr>(Init);
+
+  auto GV = CGM.createOrReplaceCXXRuntimeVariable(loc, Name, InitStr.getType(),
+                                                  Linkage, Align);
+  CIRGenModule::setInitializer(GV, Init);
+  return GV;
+}
+
+/// Build an abi::__si_class_type_info, used for single inheritance, according
+/// to the Itanium C++ ABI, 2.95p6b.
+void CIRGenItaniumRTTIBuilder::BuildSIClassTypeInfo(mlir::Location loc,
+                                                    const CXXRecordDecl *RD) {
+  // Itanium C++ ABI 2.9.5p6b:
+  // It adds to abi::__class_type_info a single member pointing to the
+  // type_info structure for the base type,
+  auto BaseTypeInfo = CIRGenItaniumRTTIBuilder(CXXABI, CGM)
+                          .BuildTypeInfo(loc, RD->bases_begin()->getType());
+  Fields.push_back(BaseTypeInfo);
+}
+
+namespace {
+/// Contains virtual and non-virtual bases seen when traversing a class
+/// hierarchy.
+struct SeenBases {
+  llvm::SmallPtrSet<const CXXRecordDecl *, 16> NonVirtualBases;
+  llvm::SmallPtrSet<const CXXRecordDecl *, 16> VirtualBases;
+};
+} // namespace
+
+/// Compute the value of the flags member in abi::__vmi_class_type_info.
+///
+static unsigned ComputeVMIClassTypeInfoFlags(const CXXBaseSpecifier *Base,
+                                             SeenBases &Bases) {
+
+  unsigned Flags = 0;
+
+  auto *BaseDecl =
+      cast<CXXRecordDecl>(Base->getType()->castAs<RecordType>()->getDecl());
+
+  if (Base->isVirtual()) {
+    // Mark the virtual base as seen.
+    if (!Bases.VirtualBases.insert(BaseDecl).second) {
+      // If this virtual base has been seen before, then the class is diamond
+      // shaped.
+      Flags |= CIRGenItaniumRTTIBuilder::VMI_DiamondShaped;
+    } else {
+      if (Bases.NonVirtualBases.count(BaseDecl))
+        Flags |= CIRGenItaniumRTTIBuilder::VMI_NonDiamondRepeat;
+    }
+  } else {
+    // Mark the non-virtual base as seen.
+    if (!Bases.NonVirtualBases.insert(BaseDecl).second) {
+      // If this non-virtual base has been seen before, then the class has non-
+      // diamond shaped repeated inheritance.
+      Flags |= CIRGenItaniumRTTIBuilder::VMI_NonDiamondRepeat;
+    } else {
+      if (Bases.VirtualBases.count(BaseDecl))
+        Flags |= CIRGenItaniumRTTIBuilder::VMI_NonDiamondRepeat;
+    }
+  }
+
+  // Walk all bases.
+  for (const auto &I : BaseDecl->bases())
+    Flags |= ComputeVMIClassTypeInfoFlags(&I, Bases);
+
+  return Flags;
+}
+
+static unsigned ComputeVMIClassTypeInfoFlags(const CXXRecordDecl *RD) {
+  unsigned Flags = 0;
+  SeenBases Bases;
+
+  // Walk all bases.
+  for (const auto &I : RD->bases())
+    Flags |= ComputeVMIClassTypeInfoFlags(&I, Bases);
+
+  return Flags;
+}
+
+/// Build an abi::__vmi_class_type_info, used for
+/// classes with bases that do not satisfy the abi::__si_class_type_info
+/// constraints, according to the Itanium C++ ABI, 2.9.5p5c.
+void CIRGenItaniumRTTIBuilder::BuildVMIClassTypeInfo(mlir::Location loc,
+                                                     const CXXRecordDecl *RD) {
+  auto UnsignedIntLTy =
+      CGM.getTypes().ConvertType(CGM.getASTContext().UnsignedIntTy);
+  // Itanium C++ ABI 2.9.5p6c:
+  //   __flags is a word with flags describing details about the class
+  //   structure, which may be referenced by using the __flags_masks
+  //   enumeration. These flags refer to both direct and indirect bases.
+  unsigned Flags = ComputeVMIClassTypeInfoFlags(RD);
+  Fields.push_back(mlir::cir::IntAttr::get(UnsignedIntLTy, Flags));
+
+  // Itanium C++ ABI 2.9.5p6c:
+  //   __base_count is a word with the number of direct proper base class
+  //   descriptions that follow.
+  Fields.push_back(mlir::cir::IntAttr::get(UnsignedIntLTy, RD->getNumBases()));
+
+  if (!RD->getNumBases())
+    return;
+
+  // Now add the base class descriptions.
+
+  // Itanium C++ ABI 2.9.5p6c:
+  //   __base_info[] is an array of base class descriptions -- one for every
+  //   direct proper base. Each description is of the type:
+  //
+  //   struct abi::__base_class_type_info {
+  //   public:
+  //     const __class_type_info *__base_type;
+  //     long __offset_flags;
+  //
+  //     enum __offset_flags_masks {
+  //       __virtual_mask = 0x1,
+  //       __public_mask = 0x2,
+  //       __offset_shift = 8
+  //     };
+  //   };
+
+  // If we're in mingw and 'long' isn't wide enough for a pointer, use 'long
+  // long' instead of 'long' for __offset_flags. libstdc++abi uses long long on
+  // LLP64 platforms.
+  // FIXME: Consider updating libc++abi to match, and extend this logic to all
+  // LLP64 platforms.
+  QualType OffsetFlagsTy = CGM.getASTContext().LongTy;
+  const TargetInfo &TI = CGM.getASTContext().getTargetInfo();
+  if (TI.getTriple().isOSCygMing() &&
+      TI.getPointerWidth(LangAS::Default) > TI.getLongWidth())
+    OffsetFlagsTy = CGM.getASTContext().LongLongTy;
+  auto OffsetFlagsLTy = CGM.getTypes().ConvertType(OffsetFlagsTy);
+
+  for (const auto &Base : RD->bases()) {
+    // The __base_type member points to the RTTI for the base type.
+    Fields.push_back(CIRGenItaniumRTTIBuilder(CXXABI, CGM)
+                         .BuildTypeInfo(loc, Base.getType()));
+
+    auto *BaseDecl =
+        cast<CXXRecordDecl>(Base.getType()->castAs<RecordType>()->getDecl());
+
+    int64_t OffsetFlags = 0;
+
+    // All but the lower 8 bits of __offset_flags are a signed offset.
+    // For a non-virtual base, this is the offset in the object of the base
+    // subobject. For a virtual base, this is the offset in the virtual table of
+    // the virtual base offset for the virtual base referenced (negative).
+    CharUnits Offset;
+    if (Base.isVirtual())
+      Offset = CGM.getItaniumVTableContext().getVirtualBaseOffsetOffset(
+          RD, BaseDecl);
+    else {
+      const ASTRecordLayout &Layout =
+          CGM.getASTContext().getASTRecordLayout(RD);
+      Offset = Layout.getBaseClassOffset(BaseDecl);
+    }
+    OffsetFlags = uint64_t(Offset.getQuantity()) << 8;
+
+    // The low-order byte of __offset_flags contains flags, as given by the
+    // masks from the enumeration __offset_flags_masks.
+    if (Base.isVirtual())
+      OffsetFlags |= BCTI_Virtual;
+    if (Base.getAccessSpecifier() == AS_public)
+      OffsetFlags |= BCTI_Public;
+
+    Fields.push_back(mlir::cir::IntAttr::get(OffsetFlagsLTy, OffsetFlags));
+  }
+}
+
+mlir::Attribute
+CIRGenItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(mlir::Location loc,
+                                                          QualType Ty) {
+  // Mangle the RTTI name.
+  SmallString<256> Name;
+  llvm::raw_svector_ostream Out(Name);
+  CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);
+  auto &builder = CGM.getBuilder();
+
+  // Look for an existing global.
+  auto GV = dyn_cast_or_null<mlir::cir::GlobalOp>(
+      mlir::SymbolTable::lookupSymbolIn(CGM.getModule(), Name));
+
+  if (!GV) {
+    // Create a new global variable.
+    // From LLVM codegen => Note for the future: If we would ever like to do
+    // deferred emission of RTTI, check if emitting vtables opportunistically
+    // need any adjustment.
+    GV = CIRGenModule::createGlobalOp(CGM, loc, Name, builder.getUInt8PtrTy(),
+                                      /*isConstant=*/true);
+    const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
+    CGM.setGVProperties(GV, RD);
+
+    // Import the typeinfo symbol when all non-inline virtual methods are
+    // imported.
+    if (CGM.getTarget().hasPS4DLLImportExport())
+      llvm_unreachable("NYI");
+  }
+
+  return builder.getGlobalViewAttr(builder.getUInt8PtrTy(), GV);
+}
+
+mlir::Attribute CIRGenItaniumRTTIBuilder::BuildTypeInfo(
+    mlir::Location loc, QualType Ty, mlir::cir::GlobalLinkageKind Linkage,
+    mlir::SymbolTable::Visibility Visibility) {
+  auto &builder = CGM.getBuilder();
+  assert(!MissingFeatures::setDLLStorageClass());
+
+  // Add the vtable pointer.
+  BuildVTablePointer(loc, cast<Type>(Ty));
+
+  // And the name.
+  auto TypeName = GetAddrOfTypeName(loc, Ty, Linkage);
+  mlir::Attribute TypeNameField;
+
+  // If we're supposed to demote the visibility, be sure to set a flag
+  // to use a string comparison for type_info comparisons.
+  CIRGenItaniumCXXABI::RTTIUniquenessKind RTTIUniqueness =
+      CXXABI.classifyRTTIUniqueness(Ty, Linkage);
+  if (RTTIUniqueness != CIRGenItaniumCXXABI::RUK_Unique) {
+    // The flag is the sign bit, which on ARM64 is defined to be clear
+    // for global pointers.  This is very ARM64-specific.
+    llvm_unreachable("NYI");
+  } else {
+    TypeNameField =
+        builder.getGlobalViewAttr(builder.getUInt8PtrTy(), TypeName);
+  }
+  Fields.push_back(TypeNameField);
+
+  switch (Ty->getTypeClass()) {
+  case Type::ArrayParameter:
+    llvm_unreachable("NYI");
+#define TYPE(Class, Base)
+#define ABSTRACT_TYPE(Class, Base)
+#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
+#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
+#define DEPENDENT_TYPE(Class, Base) case Type::Class:
+#include "clang/AST/TypeNodes.inc"
+    llvm_unreachable("Non-canonical and dependent types shouldn't get here");
+
+  // GCC treats vector types as fundamental types.
+  case Type::Builtin:
+  case Type::Vector:
+  case Type::ExtVector:
+  case Type::ConstantMatrix:
+  case Type::Complex:
+  case Type::BlockPointer:
+    // Itanium C++ ABI 2.9.5p4:
+    // abi::__fundamental_type_info adds no data members to std::type_info.
+    break;
+
+  case Type::LValueReference:
+  case Type::RValueReference:
+    llvm_unreachable("References shouldn't get here");
+
+  case Type::Auto:
+  case Type::DeducedTemplateSpecialization:
+    llvm_unreachable("Undeduced type shouldn't get here");
+
+  case Type::Pipe:
+    break;
+
+  case Type::BitInt:
+    break;
+
+  case Type::ConstantArray:
+  case Type::IncompleteArray:
+  case Type::VariableArray:
+    // Itanium C++ ABI 2.9.5p5:
+    // abi::__array_type_info adds no data members to std::type_info.
+    break;
+
+  case Type::FunctionNoProto:
+  case Type::FunctionProto:
+    // Itanium C++ ABI 2.9.5p5:
+    // abi::__function_type_info adds no data members to std::type_info.
+    break;
+
+  case Type::Enum:
+    // Itanium C++ ABI 2.9.5p5:
+    // abi::__enum_type_info adds no data members to std::type_info.
+    break;
+
+  case Type::Record: {
+    const CXXRecordDecl *RD =
+        cast<CXXRecordDecl>(cast<RecordType>(Ty)->getDecl());
+    if (!RD->hasDefinition() || !RD->getNumBases()) {
+      // We don't need to emit any fields.
+      break;
+    }
+
+    if (CanUseSingleInheritance(RD)) {
+      BuildSIClassTypeInfo(loc, RD);
+    } else {
+      BuildVMIClassTypeInfo(loc, RD);
+    }
+
+    break;
+  }
+
+  case Type::ObjCObject:
+  case Type::ObjCInterface:
+    llvm_unreachable("NYI");
+    break;
+
+  case Type::ObjCObjectPointer:
+    llvm_unreachable("NYI");
+    break;
+
+  case Type::Pointer:
+    llvm_unreachable("NYI");
+    break;
+
+  case Type::MemberPointer:
+    llvm_unreachable("NYI");
+    break;
+
+  case Type::Atomic:
+    // No fields, at least for the moment.
+    break;
+  }
+
+  assert(!MissingFeatures::setDLLImportDLLExport());
+  auto init = builder.getTypeInfo(builder.getArrayAttr(Fields));
+
+  SmallString<256> Name;
+  llvm::raw_svector_ostream Out(Name);
+  CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);
+
+  // Create new global and search for an existing global.
+  auto OldGV = dyn_cast_or_null<mlir::cir::GlobalOp>(
+      mlir::SymbolTable::lookupSymbolIn(CGM.getModule(), Name));
+  mlir::cir::GlobalOp GV =
+      CIRGenModule::createGlobalOp(CGM, loc, Name, init.getType(),
+                                   /*isConstant=*/true);
+
+  // Export the typeinfo in the same circumstances as the vtable is
+  // exported.
+  if (CGM.getTarget().hasPS4DLLImportExport())
+    llvm_unreachable("NYI");
+
+  // If there's already an old global variable, replace it with the new one.
+  if (OldGV) {
+    // Replace occurrences of the old variable if needed.
+    GV.setName(OldGV.getName());
+    if (!OldGV->use_empty()) {
+      // TODO: replaceAllUsesWith
+      llvm_unreachable("NYI");
+    }
+    OldGV->erase();
+  }
+
+  if (CGM.supportsCOMDAT() && mlir::cir::isWeakForLinker(GV.getLinkage())) {
+    assert(!MissingFeatures::setComdat());
+    llvm_unreachable("NYI");
+  }
+
+  CharUnits Align = CGM.getASTContext().toCharUnitsFromBits(
+      CGM.getTarget().getPointerAlign(LangAS::Default));
+  GV.setAlignmentAttr(CGM.getSize(Align));
+
+  // The Itanium ABI specifies that type_info objects must be globally
+  // unique, with one exception: if the type is an incomplete class
+  // type or a (possibly indirect) pointer to one.  That exception
+  // affects the general case of comparing type_info objects produced
+  // by the typeid operator, which is why the comparison operators on
+  // std::type_info generally use the type_info name pointers instead
+  // of the object addresses.  However, the language's built-in uses
+  // of RTTI generally require class types to be complete, even when
+  // manipulating pointers to those class types.  This allows the
+  // implementation of dynamic_cast to rely on address equality tests,
+  // which is much faster.
+  //
+  // All of this is to say that it's important that both the type_info
+  // object and the type_info name be uniqued when weakly emitted.
+
+  // TODO(cir): setup other bits for TypeName
+  assert(!MissingFeatures::setDLLStorageClass());
+  assert(!MissingFeatures::setPartition());
+  assert(!MissingFeatures::setDSOLocal());
+  mlir::SymbolTable::setSymbolVisibility(
+      TypeName, CIRGenModule::getMLIRVisibility(TypeName));
+
+  // TODO(cir): setup other bits for GV
+  assert(!MissingFeatures::setDLLStorageClass());
+  assert(!MissingFeatures::setPartition());
+  assert(!MissingFeatures::setDSOLocal());
+  CIRGenModule::setInitializer(GV, init);
+
+  return builder.getGlobalViewAttr(builder.getUInt8PtrTy(), GV);
+  ;
+}
+
+mlir::Attribute CIRGenItaniumCXXABI::getAddrOfRTTIDescriptor(mlir::Location loc,
+                                                             QualType Ty) {
+  return CIRGenItaniumRTTIBuilder(*this, CGM).BuildTypeInfo(loc, Ty);
+}
+
+void CIRGenItaniumCXXABI::emitVTableDefinitions(CIRGenVTables &CGVT,
+                                                const CXXRecordDecl *RD) {
+  auto VTable = getAddrOfVTable(RD, CharUnits());
+  if (VTable.hasInitializer())
+    return;
+
+  ItaniumVTableContext &VTContext = CGM.getItaniumVTableContext();
+  const VTableLayout &VTLayout = VTContext.getVTableLayout(RD);
+  auto Linkage = CGM.getVTableLinkage(RD);
+  auto RTTI = CGM.getAddrOfRTTIDescriptor(
+      CGM.getLoc(RD->getBeginLoc()), CGM.getASTContext().getTagDeclType(RD));
+
+  // Create and set the initializer.
+  ConstantInitBuilder builder(CGM);
+  auto components = builder.beginStruct();
+
+  CGVT.createVTableInitializer(components, VTLayout, RTTI,
+                               mlir::cir::isLocalLinkage(Linkage));
+  components.finishAndSetAsInitializer(VTable, /*forVtable=*/true);
+
+  // Set the correct linkage.
+  VTable.setLinkage(Linkage);
+
+  if (CGM.supportsCOMDAT() && mlir::cir::isWeakForLinker(Linkage)) {
+    assert(!MissingFeatures::setComdat());
+  }
+
+  // Set the right visibility.
+  CGM.setGVProperties(VTable, RD);
+
+  // If this is the magic class __cxxabiv1::__fundamental_type_info,
+  // we will emit the typeinfo for the fundamental types. This is the
+  // same behaviour as GCC.
+  const DeclContext *DC = RD->getDeclContext();
+  if (RD->getIdentifier() &&
+      RD->getIdentifier()->isStr("__fundamental_type_info") &&
+      isa<NamespaceDecl>(DC) && cast<NamespaceDecl>(DC)->getIdentifier() &&
+      cast<NamespaceDecl>(DC)->getIdentifier()->isStr("__cxxabiv1") &&
+      DC->getParent()->isTranslationUnit()) {
+    llvm_unreachable("NYI");
+    // EmitFundamentalRTTIDescriptors(RD);
+  }
+
+  auto VTableAsGlobalValue =
+      dyn_cast<mlir::cir::CIRGlobalValueInterface>(*VTable);
+  assert(VTableAsGlobalValue && "VTable must support CIRGlobalValueInterface");
+  bool isDeclarationForLinker = VTableAsGlobalValue.isDeclarationForLinker();
+  // Always emit type metadata on non-available_externally definitions, and on
+  // available_externally definitions if we are performing whole program
+  // devirtualization. For WPD we need the type metadata on all vtable
+  // definitions to ensure we associate derived classes with base classes
+  // defined in headers but with a strong definition only in a shared
+  // library.
+  if (!isDeclarationForLinker || CGM.getCodeGenOpts().WholeProgramVTables) {
+    CGM.buildVTableTypeMetadata(RD, VTable, VTLayout);
+    // For available_externally definitions, add the vtable to
+    // @llvm.compiler.used so that it isn't deleted before whole program
+    // analysis.
+    if (isDeclarationForLinker) {
+      llvm_unreachable("NYI");
+      assert(CGM.getCodeGenOpts().WholeProgramVTables);
+      assert(!MissingFeatures::addCompilerUsedGlobal());
+    }
+  }
+
+  if (VTContext.isRelativeLayout())
+    llvm_unreachable("NYI");
+}
+
+void CIRGenItaniumCXXABI::emitVirtualInheritanceTables(
+    const CXXRecordDecl *RD) {
+  CIRGenVTables &VTables = CGM.getVTables();
+  auto VTT = VTables.getAddrOfVTT(RD);
+  VTables.buildVTTDefinition(VTT, CGM.getVTableLinkage(RD), RD);
+}
+
+/// What sort of uniqueness rules should we use for the RTTI for the
+/// given type?
+CIRGenItaniumCXXABI::RTTIUniquenessKind
+CIRGenItaniumCXXABI::classifyRTTIUniqueness(
+    QualType CanTy, mlir::cir::GlobalLinkageKind Linkage) const {
+  if (shouldRTTIBeUnique())
+    return RUK_Unique;
+
+  // It's only necessary for linkonce_odr or weak_odr linkage.
+  if (Linkage != mlir::cir::GlobalLinkageKind::LinkOnceODRLinkage &&
+      Linkage != mlir::cir::GlobalLinkageKind::WeakODRLinkage)
+    return RUK_Unique;
+
+  // It's only necessary with default visibility.
+  if (CanTy->getVisibility() != DefaultVisibility)
+    return RUK_Unique;
+
+  // If we're not required to publish this symbol, hide it.
+  if (Linkage == mlir::cir::GlobalLinkageKind::LinkOnceODRLinkage)
+    return RUK_NonUniqueHidden;
+
+  // If we're required to publish this symbol, as we might be under an
+  // explicit instantiation, leave it with default visibility but
+  // enable string-comparisons.
+  assert(Linkage == mlir::cir::GlobalLinkageKind::WeakODRLinkage);
+  return RUK_NonUniqueVisible;
+}
+
+void CIRGenItaniumCXXABI::buildDestructorCall(
+    CIRGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type,
+    bool ForVirtualBase, bool Delegating, Address This, QualType ThisTy) {
+  GlobalDecl GD(DD, Type);
+  auto VTT =
+      getCXXDestructorImplicitParam(CGF, DD, Type, ForVirtualBase, Delegating);
+  QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
+  CIRGenCallee Callee;
+  if (getContext().getLangOpts().AppleKext && Type != Dtor_Base &&
+      DD->isVirtual())
+    llvm_unreachable("NYI");
+  else
+    Callee = CIRGenCallee::forDirect(CGM.getAddrOfCXXStructor(GD), GD);
+
+  CGF.buildCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, VTT, VTTTy,
+                             nullptr);
+}
+
+void CIRGenItaniumCXXABI::registerGlobalDtor(CIRGenFunction &CGF,
+                                             const VarDecl *D,
+                                             mlir::cir::FuncOp dtor,
+                                             mlir::Attribute Addr) {
+  if (D->isNoDestroy(CGM.getASTContext()))
+    return;
+
+  if (D->getTLSKind())
+    llvm_unreachable("NYI");
+
+  // HLSL doesn't support atexit.
+  if (CGM.getLangOpts().HLSL)
+    llvm_unreachable("NYI");
+
+  // The default behavior is to use atexit. This is handled in lowering
+  // prepare. Nothing to be done for CIR here.
+}
+
+mlir::Value CIRGenItaniumCXXABI::getCXXDestructorImplicitParam(
+    CIRGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type,
+    bool ForVirtualBase, bool Delegating) {
+  GlobalDecl GD(DD, Type);
+  return CGF.GetVTTParameter(GD, ForVirtualBase, Delegating);
+}
+
+void CIRGenItaniumCXXABI::buildRethrow(CIRGenFunction &CGF, bool isNoReturn) {
+  // void __cxa_rethrow();
+  llvm_unreachable("NYI");
+}
+
+void CIRGenItaniumCXXABI::buildThrow(CIRGenFunction &CGF,
+                                     const CXXThrowExpr *E) {
+  // This differs a bit from LLVM codegen, CIR has native operations for some
+  // cxa functions, and defers allocation size computation, always pass the dtor
+  // symbol, etc. CIRGen also does not use getAllocateExceptionFn / getThrowFn.
+
+  // Now allocate the exception object.
+  auto &builder = CGF.getBuilder();
+  QualType clangThrowType = E->getSubExpr()->getType();
+  auto throwTy = builder.getPointerTo(CGF.ConvertType(clangThrowType));
+  uint64_t typeSize =
+      CGF.getContext().getTypeSizeInChars(clangThrowType).getQuantity();
+  auto subExprLoc = CGF.getLoc(E->getSubExpr()->getSourceRange());
+  // Defer computing allocation size to some later lowering pass.
+  auto exceptionPtr =
+      builder
+          .create<mlir::cir::AllocExceptionOp>(
+              subExprLoc, throwTy, builder.getI64IntegerAttr(typeSize))
+          .getAddr();
+
+  // Build expression and store its result into exceptionPtr.
+  CharUnits exnAlign = CGF.getContext().getExnObjectAlignment();
+  CGF.buildAnyExprToExn(E->getSubExpr(), Address(exceptionPtr, exnAlign));
+
+  // Get the RTTI symbol address.
+  auto typeInfo = mlir::dyn_cast_if_present<mlir::cir::GlobalViewAttr>(
+      CGM.getAddrOfRTTIDescriptor(subExprLoc, clangThrowType,
+                                  /*ForEH=*/true));
+  assert(typeInfo && "expected GlobalViewAttr typeinfo");
+  assert(!typeInfo.getIndices() && "expected no indirection");
+
+  // The address of the destructor.
+  //
+  // Note: LLVM codegen already optimizes out the dtor if the
+  // type is a record with trivial dtor (by passing down a
+  // null dtor). In CIR, we forward this info and allow for
+  // LoweringPrepare or some other pass to skip passing the
+  // trivial function.
+  //
+  // TODO(cir): alternatively, dtor could be ignored here and
+  // the type used to gather the relevant dtor during
+  // LoweringPrepare.
+  mlir::FlatSymbolRefAttr dtor{};
+  if (const RecordType *recordTy = clangThrowType->getAs<RecordType>()) {
+    CXXRecordDecl *rec = cast<CXXRecordDecl>(recordTy->getDecl());
+    CXXDestructorDecl *dtorD = rec->getDestructor();
+    dtor = mlir::FlatSymbolRefAttr::get(
+        CGM.getAddrOfCXXStructor(GlobalDecl(dtorD, Dtor_Complete))
+            .getSymNameAttr());
+  }
+
+  // FIXME: When adding support for invoking, we should wrap the throw op
+  // below into a try, and let CFG flatten pass to generate a cir.try_call.
+  assert(!CGF.isInvokeDest() && "landing pad like logic NYI");
+
+  // Now throw the exception.
+  mlir::Location loc = CGF.getLoc(E->getSourceRange());
+  builder.create<mlir::cir::ThrowOp>(loc, exceptionPtr, typeInfo.getSymbol(),
+                                     dtor);
+  builder.create<mlir::cir::UnreachableOp>(loc);
+}
+
+static mlir::cir::FuncOp getBadCastFn(CIRGenFunction &CGF) {
+  // Prototype: void __cxa_bad_cast();
+
+  // TODO(cir): set the calling convention of the runtime function.
+  assert(!MissingFeatures::setCallingConv());
+
+  mlir::cir::FuncType FTy =
+      CGF.getBuilder().getFuncType({}, CGF.getBuilder().getVoidTy());
+  return CGF.CGM.createRuntimeFunction(FTy, "__cxa_bad_cast");
+}
+
+static void buildCallToBadCast(CIRGenFunction &CGF, mlir::Location loc) {
+  // TODO(cir): set the calling convention to the runtime function.
+  assert(!MissingFeatures::setCallingConv());
+
+  CGF.buildRuntimeCall(loc, getBadCastFn(CGF));
+  CGF.getBuilder().create<mlir::cir::UnreachableOp>(loc);
+  CGF.getBuilder().clearInsertionPoint();
+}
+
+void CIRGenItaniumCXXABI::buildBadCastCall(CIRGenFunction &CGF,
+                                           mlir::Location loc) {
+  buildCallToBadCast(CGF, loc);
+}
+
+static CharUnits computeOffsetHint(ASTContext &Context,
+                                   const CXXRecordDecl *Src,
+                                   const CXXRecordDecl *Dst) {
+  CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
+                     /*DetectVirtual=*/false);
+
+  // If Dst is not derived from Src we can skip the whole computation below and
+  // return that Src is not a public base of Dst.  Record all inheritance paths.
+  if (!Dst->isDerivedFrom(Src, Paths))
+    return CharUnits::fromQuantity(-2ULL);
+
+  unsigned NumPublicPaths = 0;
+  CharUnits Offset;
+
+  // Now walk all possible inheritance paths.
+  for (const CXXBasePath &Path : Paths) {
+    if (Path.Access != AS_public) // Ignore non-public inheritance.
+      continue;
+
+    ++NumPublicPaths;
+
+    for (const CXXBasePathElement &PathElement : Path) {
+      // If the path contains a virtual base class we can't give any hint.
+      // -1: no hint.
+      if (PathElement.Base->isVirtual())
+        return CharUnits::fromQuantity(-1ULL);
+
+      if (NumPublicPaths > 1) // Won't use offsets, skip computation.
+        continue;
+
+      // Accumulate the base class offsets.
+      const ASTRecordLayout &L = Context.getASTRecordLayout(PathElement.Class);
+      Offset += L.getBaseClassOffset(
+          PathElement.Base->getType()->getAsCXXRecordDecl());
+    }
+  }
+
+  // -2: Src is not a public base of Dst.
+  if (NumPublicPaths == 0)
+    return CharUnits::fromQuantity(-2ULL);
+
+  // -3: Src is a multiple public base type but never a virtual base type.
+  if (NumPublicPaths > 1)
+    return CharUnits::fromQuantity(-3ULL);
+
+  // Otherwise, the Src type is a unique public nonvirtual base type of Dst.
+  // Return the offset of Src from the origin of Dst.
+  return Offset;
+}
+
+static mlir::cir::FuncOp getItaniumDynamicCastFn(CIRGenFunction &CGF) {
+  // Prototype:
+  // void *__dynamic_cast(const void *sub,
+  //                      global_as const abi::__class_type_info *src,
+  //                      global_as const abi::__class_type_info *dst,
+  //                      std::ptrdiff_t src2dst_offset);
+
+  mlir::Type VoidPtrTy = CGF.VoidPtrTy;
+  mlir::Type RTTIPtrTy = CGF.getBuilder().getUInt8PtrTy();
+  mlir::Type PtrDiffTy = CGF.ConvertType(CGF.getContext().getPointerDiffType());
+
+  // TODO(cir): mark the function as nowind readonly.
+
+  // TODO(cir): set the calling convention of the runtime function.
+  assert(!MissingFeatures::setCallingConv());
+
+  mlir::cir::FuncType FTy = CGF.getBuilder().getFuncType(
+      {VoidPtrTy, RTTIPtrTy, RTTIPtrTy, PtrDiffTy}, VoidPtrTy);
+  return CGF.CGM.createRuntimeFunction(FTy, "__dynamic_cast");
+}
+
+static Address buildDynamicCastToVoid(CIRGenFunction &CGF, mlir::Location Loc,
+                                      QualType SrcRecordTy, Address Src) {
+  auto vtableUsesRelativeLayout =
+      CGF.CGM.getItaniumVTableContext().isRelativeLayout();
+  auto ptr = CGF.getBuilder().createDynCastToVoid(Loc, Src.getPointer(),
+                                                  vtableUsesRelativeLayout);
+  return Address{ptr, Src.getAlignment()};
+}
+
+static mlir::Value
+buildExactDynamicCast(CIRGenItaniumCXXABI &ABI, CIRGenFunction &CGF,
+                      mlir::Location Loc, QualType SrcRecordTy,
+                      QualType DestRecordTy, mlir::cir::PointerType DestCIRTy,
+                      bool IsRefCast, Address Src) {
+  // Find all the inheritance paths from SrcRecordTy to DestRecordTy.
+  const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
+  const CXXRecordDecl *DestDecl = DestRecordTy->getAsCXXRecordDecl();
+  CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
+                     /*DetectVirtual=*/false);
+  (void)DestDecl->isDerivedFrom(SrcDecl, Paths);
+
+  // Find an offset within `DestDecl` where a `SrcDecl` instance and its vptr
+  // might appear.
+  std::optional<CharUnits> Offset;
+  for (const CXXBasePath &Path : Paths) {
+    // dynamic_cast only finds public inheritance paths.
+    if (Path.Access != AS_public)
+      continue;
+
+    CharUnits PathOffset;
+    for (const CXXBasePathElement &PathElement : Path) {
+      // Find the offset along this inheritance step.
+      const CXXRecordDecl *Base =
+          PathElement.Base->getType()->getAsCXXRecordDecl();
+      if (PathElement.Base->isVirtual()) {
+        // For a virtual base class, we know that the derived class is exactly
+        // DestDecl, so we can use the vbase offset from its layout.
+        const ASTRecordLayout &L =
+            CGF.getContext().getASTRecordLayout(DestDecl);
+        PathOffset = L.getVBaseClassOffset(Base);
+      } else {
+        const ASTRecordLayout &L =
+            CGF.getContext().getASTRecordLayout(PathElement.Class);
+        PathOffset += L.getBaseClassOffset(Base);
+      }
+    }
+
+    if (!Offset)
+      Offset = PathOffset;
+    else if (Offset != PathOffset) {
+      // Base appears in at least two different places. Find the most-derived
+      // object and see if it's a DestDecl. Note that the most-derived object
+      // must be at least as aligned as this base class subobject, and must
+      // have a vptr at offset 0.
+      Src = buildDynamicCastToVoid(CGF, Loc, SrcRecordTy, Src);
+      SrcDecl = DestDecl;
+      Offset = CharUnits::Zero();
+      break;
+    }
+  }
+
+  if (!Offset) {
+    // If there are no public inheritance paths, the cast always fails.
+    mlir::Value NullPtrValue = CGF.getBuilder().getNullPtr(DestCIRTy, Loc);
+    if (IsRefCast) {
+      auto *CurrentRegion = CGF.getBuilder().getBlock()->getParent();
+      buildCallToBadCast(CGF, Loc);
+
+      // The call to bad_cast will terminate the block. Create a new block to
+      // hold any follow up code.
+      CGF.getBuilder().createBlock(CurrentRegion, CurrentRegion->end());
+    }
+
+    return NullPtrValue;
+  }
+
+  // Compare the vptr against the expected vptr for the destination type at
+  // this offset. Note that we do not know what type Src points to in the case
+  // where the derived class multiply inherits from the base class so we can't
+  // use GetVTablePtr, so we load the vptr directly instead.
+
+  mlir::Value ExpectedVPtr =
+      ABI.getVTableAddressPoint(BaseSubobject(SrcDecl, *Offset), DestDecl);
+
+  // TODO(cir): handle address space here.
+  assert(!MissingFeatures::addressSpace());
+  mlir::Type VPtrTy = ExpectedVPtr.getType();
+  mlir::Type VPtrPtrTy = CGF.getBuilder().getPointerTo(VPtrTy);
+  Address SrcVPtrPtr(
+      CGF.getBuilder().createBitcast(Src.getPointer(), VPtrPtrTy),
+      Src.getAlignment());
+  mlir::Value SrcVPtr = CGF.getBuilder().createLoad(Loc, SrcVPtrPtr);
+
+  // TODO(cir): decorate SrcVPtr with TBAA info.
+  assert(!MissingFeatures::tbaa());
+
+  mlir::Value Success = CGF.getBuilder().createCompare(
+      Loc, mlir::cir::CmpOpKind::eq, SrcVPtr, ExpectedVPtr);
+
+  auto buildCastResult = [&] {
+    if (Offset->isZero())
+      return CGF.getBuilder().createBitcast(Src.getPointer(), DestCIRTy);
+
+    // TODO(cir): handle address space here.
+    assert(!MissingFeatures::addressSpace());
+    mlir::Type U8PtrTy =
+        CGF.getBuilder().getPointerTo(CGF.getBuilder().getUInt8Ty());
+
+    mlir::Value StrideToApply = CGF.getBuilder().getConstInt(
+        Loc, CGF.getBuilder().getUInt64Ty(), Offset->getQuantity());
+    mlir::Value SrcU8Ptr =
+        CGF.getBuilder().createBitcast(Src.getPointer(), U8PtrTy);
+    mlir::Value ResultU8Ptr = CGF.getBuilder().create<mlir::cir::PtrStrideOp>(
+        Loc, U8PtrTy, SrcU8Ptr, StrideToApply);
+    return CGF.getBuilder().createBitcast(ResultU8Ptr, DestCIRTy);
+  };
+
+  if (IsRefCast) {
+    mlir::Value Failed = CGF.getBuilder().createNot(Success);
+    CGF.getBuilder().create<mlir::cir::IfOp>(
+        Loc, Failed, /*withElseRegion=*/false,
+        [&](mlir::OpBuilder &, mlir::Location) {
+          buildCallToBadCast(CGF, Loc);
+        });
+    return buildCastResult();
+  }
+
+  return CGF.getBuilder()
+      .create<mlir::cir::TernaryOp>(
+          Loc, Success,
+          [&](mlir::OpBuilder &, mlir::Location) {
+            auto Result = buildCastResult();
+            CGF.getBuilder().createYield(Loc, Result);
+          },
+          [&](mlir::OpBuilder &, mlir::Location) {
+            mlir::Value NullPtrValue =
+                CGF.getBuilder().getNullPtr(DestCIRTy, Loc);
+            CGF.getBuilder().createYield(Loc, NullPtrValue);
+          })
+      .getResult();
+}
+
+static mlir::cir::DynamicCastInfoAttr
+buildDynamicCastInfo(CIRGenFunction &CGF, mlir::Location Loc,
+                     QualType SrcRecordTy, QualType DestRecordTy) {
+  auto srcRtti = mlir::cast<mlir::cir::GlobalViewAttr>(
+      CGF.CGM.getAddrOfRTTIDescriptor(Loc, SrcRecordTy));
+  auto destRtti = mlir::cast<mlir::cir::GlobalViewAttr>(
+      CGF.CGM.getAddrOfRTTIDescriptor(Loc, DestRecordTy));
+
+  auto runtimeFuncOp = getItaniumDynamicCastFn(CGF);
+  auto badCastFuncOp = getBadCastFn(CGF);
+  auto runtimeFuncRef = mlir::FlatSymbolRefAttr::get(runtimeFuncOp);
+  auto badCastFuncRef = mlir::FlatSymbolRefAttr::get(badCastFuncOp);
+
+  const CXXRecordDecl *srcDecl = SrcRecordTy->getAsCXXRecordDecl();
+  const CXXRecordDecl *destDecl = DestRecordTy->getAsCXXRecordDecl();
+  auto offsetHint = computeOffsetHint(CGF.getContext(), srcDecl, destDecl);
+
+  mlir::Type ptrdiffTy = CGF.ConvertType(CGF.getContext().getPointerDiffType());
+  auto offsetHintAttr =
+      mlir::cir::IntAttr::get(ptrdiffTy, offsetHint.getQuantity());
+
+  return mlir::cir::DynamicCastInfoAttr::get(srcRtti, destRtti, runtimeFuncRef,
+                                             badCastFuncRef, offsetHintAttr);
+}
+
+mlir::Value CIRGenItaniumCXXABI::buildDynamicCast(
+    CIRGenFunction &CGF, mlir::Location Loc, QualType SrcRecordTy,
+    QualType DestRecordTy, mlir::cir::PointerType DestCIRTy, bool isRefCast,
+    Address Src) {
+  bool isCastToVoid = DestRecordTy.isNull();
+  assert((!isCastToVoid || !isRefCast) && "cannot cast to void reference");
+
+  if (isCastToVoid)
+    return buildDynamicCastToVoid(CGF, Loc, SrcRecordTy, Src).getPointer();
+
+  // If the destination is effectively final, the cast succeeds if and only
+  // if the dynamic type of the pointer is exactly the destination type.
+  if (DestRecordTy->getAsCXXRecordDecl()->isEffectivelyFinal() &&
+      CGF.CGM.getCodeGenOpts().OptimizationLevel > 0)
+    return buildExactDynamicCast(*this, CGF, Loc, SrcRecordTy, DestRecordTy,
+                                 DestCIRTy, isRefCast, Src);
+
+  auto castInfo = buildDynamicCastInfo(CGF, Loc, SrcRecordTy, DestRecordTy);
+  return CGF.getBuilder().createDynCast(Loc, Src.getPointer(), DestCIRTy,
+                                        isRefCast, castInfo);
+}
+
+mlir::cir::MethodAttr
+CIRGenItaniumCXXABI::buildVirtualMethodAttr(mlir::cir::MethodType MethodTy,
+                                            const CXXMethodDecl *MD) {
+  assert(MD->isVirtual() && "only deal with virtual member functions");
+
+  uint64_t Index = CGM.getItaniumVTableContext().getMethodVTableIndex(MD);
+  uint64_t VTableOffset;
+  if (CGM.getItaniumVTableContext().isRelativeLayout()) {
+    // Multiply by 4-byte relative offsets.
+    VTableOffset = Index * 4;
+  } else {
+    const ASTContext &Context = getContext();
+    CharUnits PointerWidth = Context.toCharUnitsFromBits(
+        Context.getTargetInfo().getPointerWidth(LangAS::Default));
+    VTableOffset = Index * PointerWidth.getQuantity();
+  }
+
+  return mlir::cir::MethodAttr::get(MethodTy, VTableOffset);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
new file mode 100644
index 000000000000..b1f052cd3e09
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -0,0 +1,3396 @@
+//===- CIRGenModule.cpp - Per-Module state for CIR generation -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the internal per-translation-unit state used for CIR translation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenModule.h"
+
+#include "CIRGenCXXABI.h"
+#include "CIRGenCstEmitter.h"
+#include "CIRGenFunction.h"
+#include "CIRGenOpenMPRuntime.h"
+#include "CIRGenTBAA.h"
+#include "CIRGenTypes.h"
+#include "CIRGenValue.h"
+#include "TargetInfo.h"
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributeInterfaces.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/OperationSupport.h"
+#include "mlir/IR/SymbolTable.h"
+#include "mlir/IR/Verifier.h"
+#include "clang/AST/Expr.h"
+#include "clang/Basic/Cuda.h"
+#include "clang/CIR/MissingFeatures.h"
+
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclGroup.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/AST/EvaluatedExprVisitor.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/ExprObjC.h"
+#include "clang/AST/GlobalDecl.h"
+#include "clang/AST/ParentMap.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/StmtCXX.h"
+#include "clang/AST/StmtObjC.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LangStandard.h"
+#include "clang/Basic/NoSanitizeList.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/CIR/CIRGenerator.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/LowerToLLVM.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
+#include "clang/Lex/Preprocessor.h"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <iterator>
+#include <numeric>
+
+using namespace mlir::cir;
+using namespace cir;
+using namespace clang;
+
+using llvm::cast;
+using llvm::dyn_cast;
+using llvm::isa;
+using llvm::SmallVector;
+using llvm::StringRef;
+
+static CIRGenCXXABI *createCXXABI(CIRGenModule &CGM) {
+  switch (CGM.getASTContext().getCXXABIKind()) {
+  case TargetCXXABI::GenericItanium:
+  case TargetCXXABI::GenericAArch64:
+  case TargetCXXABI::AppleARM64:
+    return CreateCIRGenItaniumCXXABI(CGM);
+  default:
+    llvm_unreachable("invalid C++ ABI kind");
+  }
+}
+
+CIRGenModule::CIRGenModule(mlir::MLIRContext &context,
+                           clang::ASTContext &astctx,
+                           const clang::CodeGenOptions &CGO,
+                           DiagnosticsEngine &Diags)
+    : builder(context, *this), astCtx(astctx), langOpts(astctx.getLangOpts()),
+      codeGenOpts(CGO),
+      theModule{mlir::ModuleOp::create(builder.getUnknownLoc())}, Diags(Diags),
+      target(astCtx.getTargetInfo()), ABI(createCXXABI(*this)), genTypes{*this},
+      VTables{*this}, openMPRuntime(new CIRGenOpenMPRuntime(*this)) {
+
+  // Initialize CIR signed integer types cache.
+  SInt8Ty =
+      ::mlir::cir::IntType::get(builder.getContext(), 8, /*isSigned=*/true);
+  SInt16Ty =
+      ::mlir::cir::IntType::get(builder.getContext(), 16, /*isSigned=*/true);
+  SInt32Ty =
+      ::mlir::cir::IntType::get(builder.getContext(), 32, /*isSigned=*/true);
+  SInt64Ty =
+      ::mlir::cir::IntType::get(builder.getContext(), 64, /*isSigned=*/true);
+
+  // Initialize CIR unsigned integer types cache.
+  UInt8Ty =
+      ::mlir::cir::IntType::get(builder.getContext(), 8, /*isSigned=*/false);
+  UInt16Ty =
+      ::mlir::cir::IntType::get(builder.getContext(), 16, /*isSigned=*/false);
+  UInt32Ty =
+      ::mlir::cir::IntType::get(builder.getContext(), 32, /*isSigned=*/false);
+  UInt64Ty =
+      ::mlir::cir::IntType::get(builder.getContext(), 64, /*isSigned=*/false);
+
+  VoidTy = ::mlir::cir::VoidType::get(builder.getContext());
+
+  // Initialize CIR pointer types cache.
+  VoidPtrTy = ::mlir::cir::PointerType::get(builder.getContext(), VoidTy);
+
+  FP16Ty = ::mlir::cir::FP16Type::get(builder.getContext());
+  BFloat16Ty = ::mlir::cir::BF16Type::get(builder.getContext());
+  FloatTy = ::mlir::cir::SingleType::get(builder.getContext());
+  DoubleTy = ::mlir::cir::DoubleType::get(builder.getContext());
+  FP80Ty = ::mlir::cir::FP80Type::get(builder.getContext());
+
+  // TODO: PointerWidthInBits
+  PointerAlignInBytes =
+      astctx
+          .toCharUnitsFromBits(
+              astctx.getTargetInfo().getPointerAlign(LangAS::Default))
+          .getQuantity();
+  // TODO: SizeSizeInBytes
+  // TODO: IntAlignInBytes
+  UCharTy = ::mlir::cir::IntType::get(builder.getContext(),
+                                      astCtx.getTargetInfo().getCharWidth(),
+                                      /*isSigned=*/false);
+  UIntTy = ::mlir::cir::IntType::get(builder.getContext(),
+                                     astCtx.getTargetInfo().getIntWidth(),
+                                     /*isSigned=*/false);
+  UIntPtrTy = ::mlir::cir::IntType::get(
+      builder.getContext(), astCtx.getTargetInfo().getMaxPointerWidth(),
+      /*isSigned=*/false);
+  UInt8PtrTy = builder.getPointerTo(UInt8Ty);
+  UInt8PtrPtrTy = builder.getPointerTo(UInt8PtrTy);
+  AllocaInt8PtrTy = UInt8PtrTy;
+  // TODO: GlobalsInt8PtrTy
+  // TODO: ConstGlobalsPtrTy
+  CIRAllocaAddressSpace = getTargetCIRGenInfo().getCIRAllocaAddressSpace();
+
+  PtrDiffTy = ::mlir::cir::IntType::get(
+      builder.getContext(), astCtx.getTargetInfo().getMaxPointerWidth(),
+      /*isSigned=*/true);
+
+  if (langOpts.OpenCL) {
+    createOpenCLRuntime();
+  }
+
+  mlir::cir::sob::SignedOverflowBehavior sob;
+  switch (langOpts.getSignedOverflowBehavior()) {
+  case clang::LangOptions::SignedOverflowBehaviorTy::SOB_Defined:
+    sob = sob::SignedOverflowBehavior::defined;
+    break;
+  case clang::LangOptions::SignedOverflowBehaviorTy::SOB_Undefined:
+    sob = sob::SignedOverflowBehavior::undefined;
+    break;
+  case clang::LangOptions::SignedOverflowBehaviorTy::SOB_Trapping:
+    sob = sob::SignedOverflowBehavior::trapping;
+    break;
+  }
+
+  // FIXME(cir): Implement a custom CIR Module Op and attributes to leverage
+  // MLIR features.
+  theModule->setAttr("cir.sob",
+                     mlir::cir::SignedOverflowBehaviorAttr::get(&context, sob));
+  theModule->setAttr(
+      "cir.lang", mlir::cir::LangAttr::get(&context, getCIRSourceLanguage()));
+  theModule->setAttr("cir.triple", builder.getStringAttr(getTriple().str()));
+  // Set the module name to be the name of the main file. TranslationUnitDecl
+  // often contains invalid source locations and isn't a reliable source for the
+  // module location.
+  auto MainFileID = astctx.getSourceManager().getMainFileID();
+  const FileEntry &MainFile =
+      *astctx.getSourceManager().getFileEntryForID(MainFileID);
+  auto Path = MainFile.tryGetRealPathName();
+  if (!Path.empty()) {
+    theModule.setSymName(Path);
+    theModule->setLoc(mlir::FileLineColLoc::get(&context, Path,
+                                                /*line=*/0,
+                                                /*col=*/0));
+  }
+}
+
+CIRGenModule::~CIRGenModule() {}
+
+bool CIRGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor,
+                                  bool ExcludeDtor) {
+  if (!Ty.isConstant(astCtx) && !Ty->isReferenceType())
+    return false;
+
+  if (astCtx.getLangOpts().CPlusPlus) {
+    if (const CXXRecordDecl *Record =
+            astCtx.getBaseElementType(Ty)->getAsCXXRecordDecl())
+      return ExcludeCtor && !Record->hasMutableFields() &&
+             (Record->hasTrivialDestructor() || ExcludeDtor);
+  }
+
+  return true;
+}
+
+/// FIXME: this could likely be a common helper and not necessarily related
+/// with codegen.
+/// Return the best known alignment for an unknown pointer to a
+/// particular class.
+CharUnits CIRGenModule::getClassPointerAlignment(const CXXRecordDecl *RD) {
+  if (!RD->hasDefinition())
+    return CharUnits::One(); // Hopefully won't be used anywhere.
+
+  auto &layout = astCtx.getASTRecordLayout(RD);
+
+  // If the class is final, then we know that the pointer points to an
+  // object of that type and can use the full alignment.
+  if (RD->isEffectivelyFinal())
+    return layout.getAlignment();
+
+  // Otherwise, we have to assume it could be a subclass.
+  return layout.getNonVirtualAlignment();
+}
+
+/// FIXME: this could likely be a common helper and not necessarily related
+/// with codegen.
+CharUnits CIRGenModule::getNaturalPointeeTypeAlignment(
+    QualType ty, LValueBaseInfo *baseInfo, TBAAAccessInfo *tbaaInfo) {
+  return getNaturalTypeAlignment(ty->getPointeeType(), baseInfo, tbaaInfo,
+                                 /* forPointeeType= */ true);
+}
+
+/// FIXME: this could likely be a common helper and not necessarily related
+/// with codegen.
+/// TODO: Add TBAAAccessInfo
+CharUnits CIRGenModule::getNaturalTypeAlignment(QualType T,
+                                                LValueBaseInfo *BaseInfo,
+                                                TBAAAccessInfo *tbaaInfo,
+                                                bool forPointeeType) {
+  // FIXME: This duplicates logic in ASTContext::getTypeAlignIfKnown. But
+  // that doesn't return the information we need to compute BaseInfo.
+
+  // Honor alignment typedef attributes even on incomplete types.
+  // We also honor them straight for C++ class types, even as pointees;
+  // there's an expressivity gap here.
+  if (auto TT = T->getAs<TypedefType>()) {
+    if (auto Align = TT->getDecl()->getMaxAlignment()) {
+      if (BaseInfo)
+        *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType);
+      return astCtx.toCharUnitsFromBits(Align);
+    }
+  }
+
+  bool AlignForArray = T->isArrayType();
+
+  // Analyze the base element type, so we don't get confused by incomplete
+  // array types.
+  T = astCtx.getBaseElementType(T);
+
+  if (T->isIncompleteType()) {
+    // We could try to replicate the logic from
+    // ASTContext::getTypeAlignIfKnown, but nothing uses the alignment if the
+    // type is incomplete, so it's impossible to test. We could try to reuse
+    // getTypeAlignIfKnown, but that doesn't return the information we need
+    // to set BaseInfo.  So just ignore the possibility that the alignment is
+    // greater than one.
+    if (BaseInfo)
+      *BaseInfo = LValueBaseInfo(AlignmentSource::Type);
+    return CharUnits::One();
+  }
+
+  if (BaseInfo)
+    *BaseInfo = LValueBaseInfo(AlignmentSource::Type);
+
+  CharUnits Alignment;
+  const CXXRecordDecl *RD;
+  if (T.getQualifiers().hasUnaligned()) {
+    Alignment = CharUnits::One();
+  } else if (forPointeeType && !AlignForArray &&
+             (RD = T->getAsCXXRecordDecl())) {
+    // For C++ class pointees, we don't know whether we're pointing at a
+    // base or a complete object, so we generally need to use the
+    // non-virtual alignment.
+    Alignment = getClassPointerAlignment(RD);
+  } else {
+    Alignment = astCtx.getTypeAlignInChars(T);
+  }
+
+  // Cap to the global maximum type alignment unless the alignment
+  // was somehow explicit on the type.
+  if (unsigned MaxAlign = astCtx.getLangOpts().MaxTypeAlign) {
+    if (Alignment.getQuantity() > MaxAlign && !astCtx.isAlignmentRequired(T))
+      Alignment = CharUnits::fromQuantity(MaxAlign);
+  }
+  return Alignment;
+}
+
+bool CIRGenModule::MustBeEmitted(const ValueDecl *Global) {
+  // Never defer when EmitAllDecls is specified.
+  assert(!langOpts.EmitAllDecls && "EmitAllDecls NYI");
+  assert(!codeGenOpts.KeepStaticConsts && "KeepStaticConsts NYI");
+
+  return getASTContext().DeclMustBeEmitted(Global);
+}
+
+bool CIRGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
+  // In OpenMP 5.0 variables and function may be marked as
+  // device_type(host/nohost) and we should not emit them eagerly unless we sure
+  // that they must be emitted on the host/device. To be sure we need to have
+  // seen a declare target with an explicit mentioning of the function, we know
+  // we have if the level of the declare target attribute is -1. Note that we
+  // check somewhere else if we should emit this at all.
+  if (langOpts.OpenMP >= 50 && !langOpts.OpenMPSimd) {
+    std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
+        OMPDeclareTargetDeclAttr::getActiveAttr(Global);
+    if (!ActiveAttr || (*ActiveAttr)->getLevel() != (unsigned)-1)
+      return false;
+  }
+
+  const auto *FD = dyn_cast<FunctionDecl>(Global);
+  if (FD) {
+    // Implicit template instantiations may change linkage if they are later
+    // explicitly instantiated, so they should not be emitted eagerly.
+    // TODO(cir): do we care?
+    assert(FD->getTemplateSpecializationKind() != TSK_ImplicitInstantiation &&
+           "not implemented");
+    assert(!FD->isTemplated() && "Templates NYI");
+  }
+  const auto *VD = dyn_cast<VarDecl>(Global);
+  if (VD)
+    // A definition of an inline constexpr static data member may change
+    // linkage later if it's redeclared outside the class.
+    // TODO(cir): do we care?
+    assert(astCtx.getInlineVariableDefinitionKind(VD) !=
+               ASTContext::InlineVariableDefinitionKind::WeakUnknown &&
+           "not implemented");
+
+  // If OpenMP is enabled and threadprivates must be generated like TLS, delay
+  // codegen for global variables, because they may be marked as threadprivate.
+  if (langOpts.OpenMP && langOpts.OpenMPUseTLS &&
+      getASTContext().getTargetInfo().isTLSSupported() &&
+      isa<VarDecl>(Global) &&
+      !Global->getType().isConstantStorage(getASTContext(), false, false) &&
+      !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Global))
+    return false;
+
+  assert((FD || VD) &&
+         "Only FunctionDecl and VarDecl should hit this path so far.");
+  return true;
+}
+
+static bool shouldAssumeDSOLocal(const CIRGenModule &CGM,
+                                 CIRGlobalValueInterface GV) {
+  if (GV.hasLocalLinkage())
+    return true;
+
+  if (!GV.hasDefaultVisibility() && !GV.hasExternalWeakLinkage()) {
+    return true;
+  }
+
+  // DLLImport explicitly marks the GV as external.
+  // so it shouldn't be dso_local
+  // But we don't have the info set now
+  assert(!MissingFeatures::setDLLImportDLLExport());
+
+  const llvm::Triple &TT = CGM.getTriple();
+  const auto &CGOpts = CGM.getCodeGenOpts();
+  if (TT.isWindowsGNUEnvironment()) {
+    // In MinGW, variables without DLLImport can still be automatically
+    // imported from a DLL by the linker; don't mark variables that
+    // potentially could come from another DLL as DSO local.
+
+    // With EmulatedTLS, TLS variables can be autoimported from other DLLs
+    // (and this actually happens in the public interface of libstdc++), so
+    // such variables can't be marked as DSO local. (Native TLS variables
+    // can't be dllimported at all, though.)
+    llvm_unreachable("MinGW not supported here");
+  }
+
+  // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols
+  // remain unresolved in the link, they can be resolved to zero, which is
+  // outside the current DSO.
+  if (TT.isOSBinFormatCOFF() && GV.hasExternalWeakLinkage())
+    return false;
+
+  // Every other GV is local on COFF.
+  // Make an exception for windows OS in the triple: Some firmware builds use
+  // *-win32-macho triples. This (accidentally?) produced windows relocations
+  // without GOT tables in older clang versions; Keep this behaviour.
+  // FIXME: even thread local variables?
+  if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO()))
+    return true;
+
+  // Only handle COFF and ELF for now.
+  if (!TT.isOSBinFormatELF())
+    return false;
+
+  llvm::Reloc::Model RM = CGOpts.RelocationModel;
+  const auto &LOpts = CGM.getLangOpts();
+  if (RM != llvm::Reloc::Static && !LOpts.PIE) {
+    // On ELF, if -fno-semantic-interposition is specified and the target
+    // supports local aliases, there will be neither CC1
+    // -fsemantic-interposition nor -fhalf-no-semantic-interposition. Set
+    // dso_local on the function if using a local alias is preferable (can avoid
+    // PLT indirection).
+    if (!(isa<mlir::cir::FuncOp>(GV) && GV.canBenefitFromLocalAlias())) {
+      return false;
+    }
+    return !(CGM.getLangOpts().SemanticInterposition ||
+             CGM.getLangOpts().HalfNoSemanticInterposition);
+  }
+
+  // A definition cannot be preempted from an executable.
+  if (!GV.isDeclarationForLinker())
+    return true;
+
+  // Most PIC code sequences that assume that a symbol is local cannot produce a
+  // 0 if it turns out the symbol is undefined. While this is ABI and relocation
+  // depended, it seems worth it to handle it here.
+  if (RM == llvm::Reloc::PIC_ && GV.hasExternalWeakLinkage())
+    return false;
+
+  // PowerPC64 prefers TOC indirection to avoid copy relocations.
+  if (TT.isPPC64())
+    return false;
+
+  if (CGOpts.DirectAccessExternalData) {
+    llvm_unreachable("-fdirect-access-external-data not supported");
+  }
+
+  // If we can use copy relocations we can assume it is local.
+
+  // Otherwise don't assume it is local.
+
+  return false;
+}
+
+void CIRGenModule::setDSOLocal(CIRGlobalValueInterface GV) const {
+  GV.setDSOLocal(shouldAssumeDSOLocal(*this, GV));
+}
+
+void CIRGenModule::buildGlobal(GlobalDecl GD) {
+  const auto *Global = cast<ValueDecl>(GD.getDecl());
+
+  assert(!Global->hasAttr<IFuncAttr>() && "NYI");
+  assert(!Global->hasAttr<CPUDispatchAttr>() && "NYI");
+  assert(!langOpts.CUDA && "NYI");
+
+  if (langOpts.OpenMP) {
+    // If this is OpenMP, check if it is legal to emit this global normally.
+    if (openMPRuntime && openMPRuntime->emitTargetGlobal(GD)) {
+      assert(!MissingFeatures::openMPRuntime());
+      return;
+    }
+    if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Global)) {
+      assert(!MissingFeatures::openMP());
+      return;
+    }
+    if (auto *DMD = dyn_cast<OMPDeclareMapperDecl>(Global)) {
+      assert(!MissingFeatures::openMP());
+      return;
+    }
+  }
+
+  // Ignore declarations, they will be emitted on their first use.
+  if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
+    // Update deferred annotations with the latest declaration if the function
+    // was already used or defined.
+    if (FD->hasAttr<AnnotateAttr>()) {
+      StringRef MangledName = getMangledName(GD);
+      if (getGlobalValue(MangledName))
+        deferredAnnotations[MangledName] = FD;
+    }
+    // Forward declarations are emitted lazily on first use.
+    if (!FD->doesThisDeclarationHaveABody()) {
+      if (!FD->doesDeclarationForceExternallyVisibleDefinition())
+        return;
+
+      llvm::StringRef MangledName = getMangledName(GD);
+
+      // Compute the function info and CIR type.
+      const auto &FI = getTypes().arrangeGlobalDeclaration(GD);
+      mlir::Type Ty = getTypes().GetFunctionType(FI);
+
+      GetOrCreateCIRFunction(MangledName, Ty, GD, /*ForVTable=*/false,
+                             /*DontDefer=*/false);
+      return;
+    }
+  } else {
+    const auto *VD = cast<VarDecl>(Global);
+    assert(VD->isFileVarDecl() && "Cannot emit local var decl as global.");
+    if (VD->isThisDeclarationADefinition() != VarDecl::Definition &&
+        !astCtx.isMSStaticDataMemberInlineDefinition(VD)) {
+      if (langOpts.OpenMP) {
+        // Emit declaration of the must-be-emitted declare target variable.
+        if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+                OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
+          assert(0 && "OMPDeclareTargetDeclAttr NYI");
+        }
+      }
+      // If this declaration may have caused an inline variable definition to
+      // change linkage, make sure that it's emitted.
+      if (astCtx.getInlineVariableDefinitionKind(VD) ==
+          ASTContext::InlineVariableDefinitionKind::Strong)
+        getAddrOfGlobalVar(VD);
+      return;
+    }
+  }
+
+  // Defer code generation to first use when possible, e.g. if this is an inline
+  // function. If the global mjust always be emitted, do it eagerly if possible
+  // to benefit from cache locality.
+  if (MustBeEmitted(Global) && MayBeEmittedEagerly(Global)) {
+    // Emit the definition if it can't be deferred.
+    buildGlobalDefinition(GD);
+    return;
+  }
+
+  // If we're deferring emission of a C++ variable with an initializer, remember
+  // the order in which it appeared on the file.
+  if (getLangOpts().CPlusPlus && isa<VarDecl>(Global) &&
+      cast<VarDecl>(Global)->hasInit()) {
+    DelayedCXXInitPosition[Global] = CXXGlobalInits.size();
+    CXXGlobalInits.push_back(nullptr);
+  }
+
+  llvm::StringRef MangledName = getMangledName(GD);
+  if (getGlobalValue(MangledName) != nullptr) {
+    // The value has already been used and should therefore be emitted.
+    addDeferredDeclToEmit(GD);
+  } else if (MustBeEmitted(Global)) {
+    // The value must be emitted, but cannot be emitted eagerly.
+    assert(!MayBeEmittedEagerly(Global));
+    addDeferredDeclToEmit(GD);
+  } else {
+    // Otherwise, remember that we saw a deferred decl with this name. The first
+    // use of the mangled name will cause it to move into DeferredDeclsToEmit.
+    DeferredDecls[MangledName] = GD;
+  }
+}
+
+void CIRGenModule::buildGlobalFunctionDefinition(GlobalDecl GD,
+                                                 mlir::Operation *Op) {
+  auto const *D = cast<FunctionDecl>(GD.getDecl());
+
+  // Compute the function info and CIR type.
+  const CIRGenFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
+  auto Ty = getTypes().GetFunctionType(FI);
+
+  // Get or create the prototype for the function.
+  // if (!V || (V.getValueType() != Ty))
+  // TODO(cir): Figure out what to do here? llvm uses a GlobalValue for the
+  // FuncOp in mlir
+  Op = GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/true,
+                         ForDefinition);
+
+  auto globalVal = dyn_cast_or_null<mlir::cir::CIRGlobalValueInterface>(Op);
+  if (globalVal && !globalVal.isDeclaration()) {
+    // Already emitted.
+    return;
+  }
+  auto Fn = cast<mlir::cir::FuncOp>(Op);
+  setFunctionLinkage(GD, Fn);
+  setGVProperties(Op, D);
+  // TODO(cir): MaubeHandleStaticInExternC
+  // TODO(cir): maybeSetTrivialComdat
+  // TODO(cir): setLLVMFunctionFEnvAttributes
+
+  CIRGenFunction CGF{*this, builder};
+  CurCGF = &CGF;
+  {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+    CGF.generateCode(GD, Fn, FI);
+  }
+  CurCGF = nullptr;
+
+  setNonAliasAttributes(GD, Op);
+  setCIRFunctionAttributesForDefinition(D, Fn);
+
+  if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>())
+    AddGlobalCtor(Fn, CA->getPriority());
+  if (const DestructorAttr *DA = D->getAttr<DestructorAttr>())
+    AddGlobalDtor(Fn, DA->getPriority(), true);
+
+  if (D->getAttr<AnnotateAttr>())
+    deferredAnnotations[getMangledName(GD)] = cast<ValueDecl>(D);
+}
+
+/// Track functions to be called before main() runs.
+void CIRGenModule::AddGlobalCtor(mlir::cir::FuncOp Ctor, int Priority) {
+  // FIXME(cir): handle LexOrder and Associated data upon testcases.
+  //
+  // Traditional LLVM codegen directly adds the function to the list of global
+  // ctors. In CIR we just add a global_ctor attribute to the function. The
+  // global list is created in LoweringPrepare.
+  //
+  // FIXME(from traditional LLVM): Type coercion of void()* types.
+  Ctor->setAttr(Ctor.getGlobalCtorAttrName(),
+                mlir::cir::GlobalCtorAttr::get(builder.getContext(),
+                                               Ctor.getName(), Priority));
+}
+
+/// Add a function to the list that will be called when the module is unloaded.
+void CIRGenModule::AddGlobalDtor(mlir::cir::FuncOp Dtor, int Priority,
+                                 bool IsDtorAttrFunc) {
+  assert(IsDtorAttrFunc && "NYI");
+  if (codeGenOpts.RegisterGlobalDtorsWithAtExit &&
+      (!getASTContext().getTargetInfo().getTriple().isOSAIX() ||
+       IsDtorAttrFunc)) {
+    llvm_unreachable("NYI");
+  }
+
+  // FIXME(from traditional LLVM): Type coercion of void()* types.
+  Dtor->setAttr(Dtor.getGlobalDtorAttrName(),
+                mlir::cir::GlobalDtorAttr::get(builder.getContext(),
+                                               Dtor.getName(), Priority));
+}
+
+mlir::Operation *CIRGenModule::getGlobalValue(StringRef Name) {
+  auto global = mlir::SymbolTable::lookupSymbolIn(theModule, Name);
+  if (!global)
+    return {};
+  return global;
+}
+
+mlir::Value CIRGenModule::getGlobalValue(const Decl *D) {
+  assert(CurCGF);
+  return CurCGF->symbolTable.lookup(D);
+}
+
+mlir::cir::GlobalOp CIRGenModule::createGlobalOp(
+    CIRGenModule &cgm, mlir::Location loc, StringRef name, mlir::Type t,
+    bool isConstant, mlir::cir::AddressSpaceAttr addrSpace,
+    mlir::Operation *insertPoint, mlir::cir::GlobalLinkageKind linkage) {
+  mlir::cir::GlobalOp g;
+  auto &builder = cgm.getBuilder();
+  {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+
+    // Some global emissions are triggered while emitting a function, e.g.
+    // void s() { const char *s = "yolo"; ... }
+    //
+    // Be sure to insert global before the current function
+    auto *curCGF = cgm.getCurrCIRGenFun();
+    if (curCGF)
+      builder.setInsertionPoint(curCGF->CurFn);
+
+    g = builder.create<mlir::cir::GlobalOp>(loc, name, t, isConstant, linkage,
+                                            addrSpace);
+    if (!curCGF) {
+      if (insertPoint)
+        cgm.getModule().insert(insertPoint, g);
+      else
+        cgm.getModule().push_back(g);
+    }
+
+    // Default to private until we can judge based on the initializer,
+    // since MLIR doesn't allow public declarations.
+    mlir::SymbolTable::setSymbolVisibility(
+        g, mlir::SymbolTable::Visibility::Private);
+  }
+  return g;
+}
+
+void CIRGenModule::setCommonAttributes(GlobalDecl GD, mlir::Operation *GV) {
+  const Decl *D = GD.getDecl();
+  if (isa_and_nonnull<NamedDecl>(D))
+    setGVProperties(GV, dyn_cast<NamedDecl>(D));
+  else
+    assert(!MissingFeatures::setDefaultVisibility());
+
+  if (D && D->hasAttr<UsedAttr>())
+    assert(!MissingFeatures::addUsedOrCompilerUsedGlobal());
+
+  if (const auto *VD = dyn_cast_if_present<VarDecl>(D);
+      VD &&
+      ((codeGenOpts.KeepPersistentStorageVariables &&
+        (VD->getStorageDuration() == SD_Static ||
+         VD->getStorageDuration() == SD_Thread)) ||
+       (codeGenOpts.KeepStaticConsts && VD->getStorageDuration() == SD_Static &&
+        VD->getType().isConstQualified())))
+    assert(!MissingFeatures::addUsedOrCompilerUsedGlobal());
+}
+
+void CIRGenModule::setNonAliasAttributes(GlobalDecl GD, mlir::Operation *GO) {
+  const Decl *D = GD.getDecl();
+  setCommonAttributes(GD, GO);
+
+  if (D) {
+    auto GV = llvm::dyn_cast_or_null<mlir::cir::GlobalOp>(GO);
+    if (GV) {
+      if (D->hasAttr<RetainAttr>())
+        assert(!MissingFeatures::addUsedGlobal());
+      if (auto *SA = D->getAttr<PragmaClangBSSSectionAttr>())
+        assert(!MissingFeatures::addSectionAttributes());
+      if (auto *SA = D->getAttr<PragmaClangDataSectionAttr>())
+        assert(!MissingFeatures::addSectionAttributes());
+      if (auto *SA = D->getAttr<PragmaClangRodataSectionAttr>())
+        assert(!MissingFeatures::addSectionAttributes());
+      if (auto *SA = D->getAttr<PragmaClangRelroSectionAttr>())
+        assert(!MissingFeatures::addSectionAttributes());
+    }
+    auto F = llvm::dyn_cast_or_null<mlir::cir::FuncOp>(GO);
+    if (F) {
+      if (D->hasAttr<RetainAttr>())
+        assert(!MissingFeatures::addUsedGlobal());
+      if (auto *SA = D->getAttr<PragmaClangTextSectionAttr>())
+        if (!D->getAttr<SectionAttr>())
+          assert(!MissingFeatures::setSectionForFuncOp());
+
+      assert(!MissingFeatures::updateCPUAndFeaturesAttributes());
+    }
+
+    if (const auto *CSA = D->getAttr<CodeSegAttr>()) {
+      assert(!MissingFeatures::setSectionForFuncOp());
+      if (GV)
+        GV.setSection(CSA->getName());
+      if (F)
+        assert(!MissingFeatures::setSectionForFuncOp());
+    } else if (const auto *SA = D->getAttr<SectionAttr>())
+      if (GV)
+        GV.setSection(SA->getName());
+    if (F)
+      assert(!MissingFeatures::setSectionForFuncOp());
+  }
+  assert(!MissingFeatures::setTargetAttributes());
+}
+
+void CIRGenModule::replaceGlobal(mlir::cir::GlobalOp Old,
+                                 mlir::cir::GlobalOp New) {
+  assert(Old.getSymName() == New.getSymName() && "symbol names must match");
+
+  // If the types does not match, update all references to Old to the new type.
+  auto OldTy = Old.getSymType();
+  auto NewTy = New.getSymType();
+  mlir::cir::AddressSpaceAttr oldAS = Old.getAddrSpaceAttr();
+  mlir::cir::AddressSpaceAttr newAS = New.getAddrSpaceAttr();
+  // TODO(cir): If the AS differs, we should also update all references.
+  if (oldAS != newAS) {
+    llvm_unreachable("NYI");
+  }
+  if (OldTy != NewTy) {
+    auto OldSymUses = Old.getSymbolUses(theModule.getOperation());
+    if (OldSymUses.has_value()) {
+      for (auto Use : *OldSymUses) {
+        auto *UserOp = Use.getUser();
+        assert((isa<mlir::cir::GetGlobalOp>(UserOp) ||
+                isa<mlir::cir::GlobalOp>(UserOp)) &&
+               "GlobalOp symbol user is neither a GetGlobalOp nor a GlobalOp");
+
+        if (auto GGO = dyn_cast<mlir::cir::GetGlobalOp>(Use.getUser())) {
+          auto UseOpResultValue = GGO.getAddr();
+          UseOpResultValue.setType(
+              mlir::cir::PointerType::get(builder.getContext(), NewTy));
+        }
+      }
+    }
+  }
+
+  // Remove old global from the module.
+  Old.erase();
+}
+
+mlir::cir::TLS_Model CIRGenModule::GetDefaultCIRTLSModel() const {
+  switch (getCodeGenOpts().getDefaultTLSModel()) {
+  case CodeGenOptions::GeneralDynamicTLSModel:
+    return mlir::cir::TLS_Model::GeneralDynamic;
+  case CodeGenOptions::LocalDynamicTLSModel:
+    return mlir::cir::TLS_Model::LocalDynamic;
+  case CodeGenOptions::InitialExecTLSModel:
+    return mlir::cir::TLS_Model::InitialExec;
+  case CodeGenOptions::LocalExecTLSModel:
+    return mlir::cir::TLS_Model::LocalExec;
+  }
+  llvm_unreachable("Invalid TLS model!");
+}
+
+void CIRGenModule::setTLSMode(mlir::Operation *Op, const VarDecl &D) const {
+  assert(D.getTLSKind() && "setting TLS mode on non-TLS var!");
+
+  auto TLM = GetDefaultCIRTLSModel();
+
+  // Override the TLS model if it is explicitly specified.
+  if (const TLSModelAttr *Attr = D.getAttr<TLSModelAttr>()) {
+    llvm_unreachable("NYI");
+  }
+
+  auto global = dyn_cast<mlir::cir::GlobalOp>(Op);
+  assert(global && "NYI for other operations");
+  global.setTlsModel(TLM);
+}
+
+/// If the specified mangled name is not in the module,
+/// create and return an mlir GlobalOp with the specified type (TODO(cir):
+/// address space).
+///
+/// TODO(cir):
+/// 1. If there is something in the module with the specified name, return
+/// it potentially bitcasted to the right type.
+///
+/// 2. If D is non-null, it specifies a decl that correspond to this.  This is
+/// used to set the attributes on the global when it is first created.
+///
+/// 3. If IsForDefinition is true, it is guaranteed that an actual global with
+/// type Ty will be returned, not conversion of a variable with the same
+/// mangled name but some other type.
+mlir::cir::GlobalOp
+CIRGenModule::getOrCreateCIRGlobal(StringRef MangledName, mlir::Type Ty,
+                                   LangAS langAS, const VarDecl *D,
+                                   ForDefinition_t IsForDefinition) {
+  // Lookup the entry, lazily creating it if necessary.
+  mlir::cir::GlobalOp Entry;
+  if (auto *V = getGlobalValue(MangledName)) {
+    assert(isa<mlir::cir::GlobalOp>(V) && "only supports GlobalOp for now");
+    Entry = dyn_cast_or_null<mlir::cir::GlobalOp>(V);
+  }
+
+  mlir::cir::AddressSpaceAttr cirAS = builder.getAddrSpaceAttr(langAS);
+  if (Entry) {
+    auto entryCIRAS = Entry.getAddrSpaceAttr();
+    if (WeakRefReferences.erase(Entry)) {
+      if (D && !D->hasAttr<WeakAttr>()) {
+        auto LT = mlir::cir::GlobalLinkageKind::ExternalLinkage;
+        Entry.setLinkageAttr(
+            mlir::cir::GlobalLinkageKindAttr::get(builder.getContext(), LT));
+        mlir::SymbolTable::setSymbolVisibility(Entry, getMLIRVisibility(Entry));
+      }
+    }
+
+    // Handle dropped DLL attributes.
+    if (D && !D->hasAttr<clang::DLLImportAttr>() &&
+        !D->hasAttr<clang::DLLExportAttr>())
+      assert(!MissingFeatures::setDLLStorageClass() && "NYI");
+
+    if (langOpts.OpenMP && !langOpts.OpenMPSimd && D)
+      getOpenMPRuntime().registerTargetGlobalVariable(D, Entry);
+
+    if (Entry.getSymType() == Ty && entryCIRAS == cirAS)
+      return Entry;
+
+    // If there are two attempts to define the same mangled name, issue an
+    // error.
+    //
+    // TODO(cir): look at mlir::GlobalValue::isDeclaration for all aspects of
+    // recognizing the global as a declaration, for now only check if
+    // initializer is present.
+    if (IsForDefinition && !Entry.isDeclaration()) {
+      GlobalDecl OtherGD;
+      const VarDecl *OtherD;
+
+      // Check that D is not yet in DiagnosedConflictingDefinitions is required
+      // to make sure that we issue an error only once.
+      if (D && lookupRepresentativeDecl(MangledName, OtherGD) &&
+          (D->getCanonicalDecl() != OtherGD.getCanonicalDecl().getDecl()) &&
+          (OtherD = dyn_cast<VarDecl>(OtherGD.getDecl())) &&
+          OtherD->hasInit() &&
+          DiagnosedConflictingDefinitions.insert(D).second) {
+        getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name)
+            << MangledName;
+        getDiags().Report(OtherGD.getDecl()->getLocation(),
+                          diag::note_previous_definition);
+      }
+    }
+
+    // TODO(cir): LLVM codegen makes sure the result is of the correct type
+    // by issuing a address space cast.
+    if (entryCIRAS != cirAS)
+      llvm_unreachable("NYI");
+
+    // (If global is requested for a definition, we always need to create a new
+    // global, not just return a bitcast.)
+    if (!IsForDefinition)
+      return Entry;
+  }
+
+  auto declCIRAS = builder.getAddrSpaceAttr(getGlobalVarAddressSpace(D));
+  // TODO(cir): do we need to strip pointer casts for Entry?
+
+  auto loc = getLoc(D->getSourceRange());
+
+  // mlir::SymbolTable::Visibility::Public is the default, no need to explicitly
+  // mark it as such.
+  auto GV = CIRGenModule::createGlobalOp(*this, loc, MangledName, Ty,
+                                         /*isConstant=*/false,
+                                         /*addrSpace=*/declCIRAS,
+                                         /*insertPoint=*/Entry.getOperation());
+
+  // If we already created a global with the same mangled name (but different
+  // type) before, replace it with the new global.
+  if (Entry) {
+    replaceGlobal(Entry, GV);
+  }
+
+  // This is the first use or definition of a mangled name.  If there is a
+  // deferred decl with this name, remember that we need to emit it at the end
+  // of the file.
+  auto DDI = DeferredDecls.find(MangledName);
+  if (DDI != DeferredDecls.end()) {
+    // Move the potentially referenced deferred decl to the DeferredDeclsToEmit
+    // list, and remove it from DeferredDecls (since we don't need it anymore).
+    addDeferredDeclToEmit(DDI->second);
+    DeferredDecls.erase(DDI);
+  }
+
+  // Handle things which are present even on external declarations.
+  if (D) {
+    if (langOpts.OpenMP && !langOpts.OpenMPSimd && D)
+      getOpenMPRuntime().registerTargetGlobalVariable(D, Entry);
+
+    // FIXME: This code is overly simple and should be merged with other global
+    // handling.
+    GV.setAlignmentAttr(getSize(astCtx.getDeclAlign(D)));
+    // TODO(cir):
+    //   GV->setConstant(isTypeConstant(D->getType(), false));
+    //   setLinkageForGV(GV, D);
+
+    if (D->getTLSKind()) {
+      if (D->getTLSKind() == VarDecl::TLS_Dynamic)
+        llvm_unreachable("NYI");
+      setTLSMode(GV, *D);
+    }
+
+    setGVProperties(GV, D);
+
+    // If required by the ABI, treat declarations of static data members with
+    // inline initializers as definitions.
+    if (astCtx.isMSStaticDataMemberInlineDefinition(D)) {
+      assert(0 && "not implemented");
+    }
+
+    // Emit section information for extern variables.
+    if (D->hasExternalStorage()) {
+      if (const SectionAttr *SA = D->getAttr<SectionAttr>())
+        GV.setSectionAttr(builder.getStringAttr(SA->getName()));
+    }
+
+    GV.setGlobalVisibilityAttr(getGlobalVisibilityAttrFromDecl(D));
+
+    // Handle XCore specific ABI requirements.
+    if (getTriple().getArch() == llvm::Triple::xcore)
+      assert(0 && "not implemented");
+
+    // Check if we a have a const declaration with an initializer, we maybe
+    // able to emit it as available_externally to expose it's value to the
+    // optimizer.
+    if (getLangOpts().CPlusPlus && GV.isPublic() &&
+        D->getType().isConstQualified() && GV.isDeclaration() &&
+        !D->hasDefinition() && D->hasInit() && !D->hasAttr<DLLImportAttr>()) {
+      assert(0 && "not implemented");
+    }
+  }
+
+  // TODO(cir): if this method is used to handle functions we must have
+  // something closer to GlobalValue::isDeclaration instead of checking for
+  // initializer.
+  if (GV.isDeclaration()) {
+    // TODO(cir): set target attributes
+
+    // External HIP managed variables needed to be recorded for transformation
+    // in both device and host compilations.
+    if (getLangOpts().CUDA)
+      assert(0 && "not implemented");
+  }
+
+  // TODO(cir): address space cast when needed for DAddrSpace.
+  return GV;
+}
+
+mlir::cir::GlobalOp CIRGenModule::buildGlobal(const VarDecl *D, mlir::Type Ty,
+                                              ForDefinition_t IsForDefinition) {
+  assert(D->hasGlobalStorage() && "Not a global variable");
+  QualType ASTTy = D->getType();
+  if (!Ty)
+    Ty = getTypes().convertTypeForMem(ASTTy);
+
+  StringRef MangledName = getMangledName(D);
+  return getOrCreateCIRGlobal(MangledName, Ty, ASTTy.getAddressSpace(), D,
+                              IsForDefinition);
+}
+
+/// Return the mlir::Value for the address of the given global variable. If Ty
+/// is non-null and if the global doesn't exist, then it will be created with
+/// the specified type instead of whatever the normal requested type would be.
+/// If IsForDefinition is true, it is guaranteed that an actual global with type
+/// Ty will be returned, not conversion of a variable with the same mangled name
+/// but some other type.
+mlir::Value CIRGenModule::getAddrOfGlobalVar(const VarDecl *D, mlir::Type Ty,
+                                             ForDefinition_t IsForDefinition) {
+  assert(D->hasGlobalStorage() && "Not a global variable");
+  QualType ASTTy = D->getType();
+  if (!Ty)
+    Ty = getTypes().convertTypeForMem(ASTTy);
+
+  bool tlsAccess = D->getTLSKind() != VarDecl::TLS_None;
+  auto g = buildGlobal(D, Ty, IsForDefinition);
+  auto ptrTy = builder.getPointerTo(g.getSymType(), g.getAddrSpaceAttr());
+  return builder.create<mlir::cir::GetGlobalOp>(
+      getLoc(D->getSourceRange()), ptrTy, g.getSymName(), tlsAccess);
+}
+
+mlir::cir::GlobalViewAttr
+CIRGenModule::getAddrOfGlobalVarAttr(const VarDecl *D, mlir::Type Ty,
+                                     ForDefinition_t IsForDefinition) {
+  assert(D->hasGlobalStorage() && "Not a global variable");
+  QualType ASTTy = D->getType();
+  if (!Ty)
+    Ty = getTypes().convertTypeForMem(ASTTy);
+
+  auto globalOp = buildGlobal(D, Ty, IsForDefinition);
+  return builder.getGlobalViewAttr(builder.getPointerTo(Ty), globalOp);
+}
+
+mlir::Operation *CIRGenModule::getWeakRefReference(const ValueDecl *VD) {
+  const AliasAttr *AA = VD->getAttr<AliasAttr>();
+  assert(AA && "No alias?");
+
+  // See if there is already something with the target's name in the module.
+  mlir::Operation *Entry = getGlobalValue(AA->getAliasee());
+  if (Entry) {
+    assert((isa<mlir::cir::GlobalOp>(Entry) || isa<mlir::cir::FuncOp>(Entry)) &&
+           "weak ref should be against a global variable or function");
+    return Entry;
+  }
+
+  mlir::Type DeclTy = getTypes().convertTypeForMem(VD->getType());
+  if (mlir::isa<mlir::cir::FuncType>(DeclTy)) {
+    auto F = GetOrCreateCIRFunction(AA->getAliasee(), DeclTy,
+                                    GlobalDecl(cast<FunctionDecl>(VD)),
+                                    /*ForVtable=*/false);
+    F.setLinkage(mlir::cir::GlobalLinkageKind::ExternalWeakLinkage);
+    WeakRefReferences.insert(F);
+    return F;
+  }
+
+  llvm_unreachable("GlobalOp NYI");
+}
+
+/// TODO(cir): looks like part of this code can be part of a common AST
+/// helper betweem CIR and LLVM codegen.
+template <typename SomeDecl>
+void CIRGenModule::maybeHandleStaticInExternC(const SomeDecl *D,
+                                              mlir::cir::GlobalOp GV) {
+  if (!getLangOpts().CPlusPlus)
+    return;
+
+  // Must have 'used' attribute, or else inline assembly can't rely on
+  // the name existing.
+  if (!D->template hasAttr<UsedAttr>())
+    return;
+
+  // Must have internal linkage and an ordinary name.
+  if (!D->getIdentifier() || D->getFormalLinkage() != Linkage::Internal)
+    return;
+
+  // Must be in an extern "C" context. Entities declared directly within
+  // a record are not extern "C" even if the record is in such a context.
+  const SomeDecl *First = D->getFirstDecl();
+  if (First->getDeclContext()->isRecord() || !First->isInExternCContext())
+    return;
+
+  // TODO(cir):
+  // OK, this is an internal linkage entity inside an extern "C" linkage
+  // specification. Make a note of that so we can give it the "expected"
+  // mangled name if nothing else is using that name.
+  //
+  // If we have multiple internal linkage entities with the same name
+  // in extern "C" regions, none of them gets that name.
+  assert(0 && "not implemented");
+}
+
+void CIRGenModule::buildGlobalVarDefinition(const clang::VarDecl *D,
+                                            bool IsTentative) {
+  // TODO(cir):
+  // OpenCL global variables of sampler type are translated to function calls,
+  // therefore no need to be translated.
+  // If this is OpenMP device, check if it is legal to emit this global
+  // normally.
+  QualType ASTTy = D->getType();
+  if ((getLangOpts().OpenCL && ASTTy->isSamplerT()) ||
+      getLangOpts().OpenMPIsTargetDevice)
+    llvm_unreachable("not implemented");
+
+  // TODO(cir): LLVM's codegen uses a llvm::TrackingVH here. Is that
+  // necessary here for CIR gen?
+  mlir::Attribute Init;
+  bool NeedsGlobalCtor = false;
+  // Whether the definition of the variable is available externally.
+  // If yes, we shouldn't emit the GloablCtor and GlobalDtor for the variable
+  // since this is the job for its original source.
+  bool IsDefinitionAvailableExternally =
+      astCtx.GetGVALinkageForVariable(D) == GVA_AvailableExternally;
+  bool NeedsGlobalDtor =
+      !IsDefinitionAvailableExternally &&
+      D->needsDestruction(astCtx) == QualType::DK_cxx_destructor;
+
+  // It is helpless to emit the definition for an available_externally variable
+  // which can't be marked as const.
+  // We don't need to check if it needs global ctor or dtor. See the above
+  // comment for ideas.
+  if (IsDefinitionAvailableExternally &&
+      (!D->hasConstantInitialization() ||
+       // TODO: Update this when we have interface to check constexpr
+       // destructor.
+       D->needsDestruction(getASTContext()) ||
+       !D->getType().isConstantStorage(getASTContext(), true, true)))
+    return;
+
+  const VarDecl *InitDecl;
+  const Expr *InitExpr = D->getAnyInitializer(InitDecl);
+
+  std::optional<ConstantEmitter> emitter;
+
+  // CUDA E.2.4.1 "__shared__ variables cannot have an initialization
+  // as part of their declaration."  Sema has already checked for
+  // error cases, so we just need to set Init to UndefValue.
+  bool IsCUDASharedVar =
+      getLangOpts().CUDAIsDevice && D->hasAttr<CUDASharedAttr>();
+  // Shadows of initialized device-side global variables are also left
+  // undefined.
+  // Managed Variables should be initialized on both host side and device side.
+  bool IsCUDAShadowVar =
+      !getLangOpts().CUDAIsDevice && !D->hasAttr<HIPManagedAttr>() &&
+      (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() ||
+       D->hasAttr<CUDASharedAttr>());
+  bool IsCUDADeviceShadowVar =
+      getLangOpts().CUDAIsDevice && !D->hasAttr<HIPManagedAttr>() &&
+      (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
+       D->getType()->isCUDADeviceBuiltinTextureType());
+  if (getLangOpts().CUDA &&
+      (IsCUDASharedVar || IsCUDAShadowVar || IsCUDADeviceShadowVar))
+    assert(0 && "not implemented");
+  else if (D->hasAttr<LoaderUninitializedAttr>())
+    assert(0 && "not implemented");
+  else if (!InitExpr) {
+    // This is a tentative definition; tentative definitions are
+    // implicitly initialized with { 0 }.
+    //
+    // Note that tentative definitions are only emitted at the end of
+    // a translation unit, so they should never have incomplete
+    // type. In addition, EmitTentativeDefinition makes sure that we
+    // never attempt to emit a tentative definition if a real one
+    // exists. A use may still exists, however, so we still may need
+    // to do a RAUW.
+    assert(!ASTTy->isIncompleteType() && "Unexpected incomplete type");
+    Init = builder.getZeroInitAttr(getCIRType(D->getType()));
+  } else {
+    initializedGlobalDecl = GlobalDecl(D);
+    emitter.emplace(*this);
+    auto Initializer = emitter->tryEmitForInitializer(*InitDecl);
+    if (!Initializer) {
+      QualType T = InitExpr->getType();
+      if (D->getType()->isReferenceType())
+        T = D->getType();
+
+      if (getLangOpts().CPlusPlus) {
+        if (InitDecl->hasFlexibleArrayInit(astCtx))
+          ErrorUnsupported(D, "flexible array initializer");
+        Init = builder.getZeroInitAttr(getCIRType(T));
+        if (!IsDefinitionAvailableExternally)
+          NeedsGlobalCtor = true;
+      } else {
+        ErrorUnsupported(D, "static initializer");
+      }
+    } else {
+      Init = Initializer;
+      // We don't need an initializer, so remove the entry for the delayed
+      // initializer position (just in case this entry was delayed) if we
+      // also don't need to register a destructor.
+      if (getLangOpts().CPlusPlus && !NeedsGlobalDtor)
+        DelayedCXXInitPosition.erase(D);
+    }
+  }
+
+  mlir::Type InitType;
+  // If the initializer attribute is a SymbolRefAttr it means we are
+  // initializing the global based on a global constant.
+  //
+  // TODO(cir): create another attribute to contain the final type and abstract
+  // away SymbolRefAttr.
+  if (auto symAttr = mlir::dyn_cast<mlir::SymbolRefAttr>(Init)) {
+    auto cstGlobal = mlir::SymbolTable::lookupSymbolIn(theModule, symAttr);
+    assert(isa<mlir::cir::GlobalOp>(cstGlobal) &&
+           "unaware of other symbol providers");
+    auto g = cast<mlir::cir::GlobalOp>(cstGlobal);
+    auto arrayTy = mlir::dyn_cast<mlir::cir::ArrayType>(g.getSymType());
+    // TODO(cir): pointer to array decay. Should this be modeled explicitly in
+    // CIR?
+    if (arrayTy)
+      InitType = mlir::cir::PointerType::get(builder.getContext(),
+                                             arrayTy.getEltType());
+  } else {
+    assert(mlir::isa<mlir::TypedAttr>(Init) && "This should have a type");
+    auto TypedInitAttr = mlir::cast<mlir::TypedAttr>(Init);
+    InitType = TypedInitAttr.getType();
+  }
+  assert(!mlir::isa<mlir::NoneType>(InitType) && "Should have a type by now");
+
+  auto Entry = buildGlobal(D, InitType, ForDefinition_t(!IsTentative));
+  // TODO(cir): Strip off pointer casts from Entry if we get them?
+
+  // TODO(cir): use GlobalValue interface
+  assert(dyn_cast<GlobalOp>(&Entry) && "FuncOp not supported here");
+  auto GV = Entry;
+
+  // We have a definition after a declaration with the wrong type.
+  // We must make a new GlobalVariable* and update everything that used OldGV
+  // (a declaration or tentative definition) with the new GlobalVariable*
+  // (which will be a definition).
+  //
+  // This happens if there is a prototype for a global (e.g.
+  // "extern int x[];") and then a definition of a different type (e.g.
+  // "int x[10];"). This also happens when an initializer has a different type
+  // from the type of the global (this happens with unions).
+  if (!GV || GV.getSymType() != InitType) {
+    // TODO(cir): this should include an address space check as well.
+    assert(0 && "not implemented");
+  }
+
+  maybeHandleStaticInExternC(D, GV);
+
+  if (D->hasAttr<AnnotateAttr>())
+    addGlobalAnnotations(D, GV);
+
+  // Set CIR's linkage type as appropriate.
+  mlir::cir::GlobalLinkageKind Linkage =
+      getCIRLinkageVarDefinition(D, /*IsConstant=*/false);
+
+  // TODO(cir):
+  // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+  // the device. [...]"
+  // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+  // __device__, declares a variable that: [...]
+  if (GV && getLangOpts().CUDA) {
+    assert(0 && "not implemented");
+  }
+
+  // Set initializer and finalize emission
+  CIRGenModule::setInitializer(GV, Init);
+  if (emitter)
+    emitter->finalize(GV);
+
+  // TODO(cir): If it is safe to mark the global 'constant', do so now.
+  // GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor &&
+  //                 isTypeConstant(D->getType(), true));
+
+  // If it is in a read-only section, mark it 'constant'.
+  if (const SectionAttr *SA = D->getAttr<SectionAttr>())
+    GV.setSectionAttr(builder.getStringAttr(SA->getName()));
+
+  GV.setGlobalVisibilityAttr(getGlobalVisibilityAttrFromDecl(D));
+
+  // TODO(cir):
+  // GV->setAlignment(getContext().getDeclAlign(D).getAsAlign());
+
+  // On Darwin, unlike other Itanium C++ ABI platforms, the thread-wrapper
+  // function is only defined alongside the variable, not also alongside
+  // callers. Normally, all accesses to a thread_local go through the
+  // thread-wrapper in order to ensure initialization has occurred, underlying
+  // variable will never be used other than the thread-wrapper, so it can be
+  // converted to internal linkage.
+  //
+  // However, if the variable has the 'constinit' attribute, it _can_ be
+  // referenced directly, without calling the thread-wrapper, so the linkage
+  // must not be changed.
+  //
+  // Additionally, if the variable isn't plain external linkage, e.g. if it's
+  // weak or linkonce, the de-duplication semantics are important to preserve,
+  // so we don't change the linkage.
+  if (D->getTLSKind() == VarDecl::TLS_Dynamic && GV.isPublic() &&
+      astCtx.getTargetInfo().getTriple().isOSDarwin() &&
+      !D->hasAttr<ConstInitAttr>()) {
+    // TODO(cir): set to mlir::SymbolTable::Visibility::Private once we have
+    // testcases.
+    assert(0 && "not implemented");
+  }
+
+  // Set CIR linkage and DLL storage class.
+  GV.setLinkage(Linkage);
+  // FIXME(cir): setLinkage should likely set MLIR's visibility automatically.
+  GV.setVisibility(getMLIRVisibilityFromCIRLinkage(Linkage));
+  // TODO(cir): handle DLL storage classes in CIR?
+  if (D->hasAttr<DLLImportAttr>())
+    assert(!MissingFeatures::setDLLStorageClass());
+  else if (D->hasAttr<DLLExportAttr>())
+    assert(!MissingFeatures::setDLLStorageClass());
+  else
+    assert(!MissingFeatures::setDLLStorageClass());
+
+  if (Linkage == mlir::cir::GlobalLinkageKind::CommonLinkage) {
+    // common vars aren't constant even if declared const.
+    GV.setConstant(false);
+    // Tentative definition of global variables may be initialized with
+    // non-zero null pointers. In this case they should have weak linkage
+    // since common linkage must have zero initializer and must not have
+    // explicit section therefore cannot have non-zero initial value.
+    auto Initializer = GV.getInitialValue();
+    if (Initializer && !getBuilder().isNullValue(*Initializer))
+      GV.setLinkage(mlir::cir::GlobalLinkageKind::WeakAnyLinkage);
+  }
+
+  setNonAliasAttributes(D, GV);
+
+  if (D->getTLSKind() && !GV.getTlsModelAttr()) {
+    if (D->getTLSKind() == VarDecl::TLS_Dynamic)
+      llvm_unreachable("NYI");
+    setTLSMode(GV, *D);
+  }
+
+  maybeSetTrivialComdat(*D, GV);
+
+  // TODO(cir):
+  // Emit the initializer function if necessary.
+  if (NeedsGlobalCtor || NeedsGlobalDtor) {
+    globalOpContext = GV;
+    buildCXXGlobalVarDeclInitFunc(D, GV, NeedsGlobalCtor);
+    globalOpContext = nullptr;
+  }
+
+  // TODO(cir): sanitizers (reportGlobalToASan) and global variable debug
+  // information.
+  assert(!MissingFeatures::sanitizeOther());
+  assert(!MissingFeatures::generateDebugInfo());
+}
+
+void CIRGenModule::buildGlobalDefinition(GlobalDecl GD, mlir::Operation *Op) {
+  const auto *D = cast<ValueDecl>(GD.getDecl());
+  if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
+    // At -O0, don't generate CIR for functions with available_externally
+    // linkage.
+    if (!shouldEmitFunction(GD))
+      return;
+
+    if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) {
+      // Make sure to emit the definition(s) before we emit the thunks. This is
+      // necessary for the generation of certain thunks.
+      if (isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method))
+        ABI->buildCXXStructor(GD);
+      else if (FD->isMultiVersion())
+        llvm_unreachable("NYI");
+      else
+        buildGlobalFunctionDefinition(GD, Op);
+
+      if (Method->isVirtual())
+        getVTables().buildThunks(GD);
+
+      return;
+    }
+
+    if (FD->isMultiVersion())
+      llvm_unreachable("NYI");
+    buildGlobalFunctionDefinition(GD, Op);
+    return;
+  }
+
+  if (const auto *VD = dyn_cast<VarDecl>(D)) {
+    return buildGlobalVarDefinition(VD, !VD->hasDefinition());
+  }
+
+  llvm_unreachable("Invalid argument to buildGlobalDefinition()");
+}
+
+mlir::Attribute
+CIRGenModule::getConstantArrayFromStringLiteral(const StringLiteral *E) {
+  assert(!E->getType()->isPointerType() && "Strings are always arrays");
+
+  // Don't emit it as the address of the string, emit the string data itself
+  // as an inline array.
+  if (E->getCharByteWidth() == 1) {
+    SmallString<64> Str(E->getString());
+
+    // Resize the string to the right size, which is indicated by its type.
+    const ConstantArrayType *CAT = astCtx.getAsConstantArrayType(E->getType());
+    auto finalSize = CAT->getSize().getZExtValue();
+    Str.resize(finalSize);
+
+    auto eltTy = getTypes().ConvertType(CAT->getElementType());
+    return builder.getString(Str, eltTy, finalSize);
+  }
+
+  auto arrayTy = mlir::dyn_cast<mlir::cir::ArrayType>(
+      getTypes().ConvertType(E->getType()));
+  assert(arrayTy && "string literals must be emitted as an array type");
+
+  auto arrayEltTy = mlir::dyn_cast<mlir::cir::IntType>(arrayTy.getEltType());
+  assert(arrayEltTy &&
+         "string literal elements must be emitted as integral type");
+
+  auto arraySize = arrayTy.getSize();
+  auto literalSize = E->getLength();
+
+  // Collect the code units.
+  SmallVector<uint32_t, 32> elementValues;
+  elementValues.reserve(arraySize);
+  for (unsigned i = 0; i < literalSize; ++i)
+    elementValues.push_back(E->getCodeUnit(i));
+  elementValues.resize(arraySize);
+
+  // If the string is full of null bytes, emit a #cir.zero instead.
+  if (std::all_of(elementValues.begin(), elementValues.end(),
+                  [](uint32_t x) { return x == 0; }))
+    return builder.getZeroAttr(arrayTy);
+
+  // Otherwise emit a constant array holding the characters.
+  SmallVector<mlir::Attribute, 32> elements;
+  elements.reserve(arraySize);
+  for (uint64_t i = 0; i < arraySize; ++i)
+    elements.push_back(mlir::cir::IntAttr::get(arrayEltTy, elementValues[i]));
+
+  auto elementsAttr = mlir::ArrayAttr::get(builder.getContext(), elements);
+  return builder.getConstArray(elementsAttr, arrayTy);
+}
+
+// TODO(cir): this could be a common AST helper for both CIR and LLVM codegen.
+LangAS CIRGenModule::getGlobalConstantAddressSpace() const {
+  // OpenCL v1.2 s6.5.3: a string literal is in the constant address space.
+  if (getLangOpts().OpenCL)
+    return LangAS::opencl_constant;
+  if (getLangOpts().SYCLIsDevice)
+    return LangAS::sycl_global;
+  if (auto AS = getTarget().getConstantAddressSpace())
+    return AS.value();
+  return LangAS::Default;
+}
+
+// TODO(cir): this could be a common AST helper for both CIR and LLVM codegen.
+LangAS CIRGenModule::getLangTempAllocaAddressSpace() const {
+  if (getLangOpts().OpenCL)
+    return LangAS::opencl_private;
+  if (getLangOpts().SYCLIsDevice || getLangOpts().CUDAIsDevice ||
+      (getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice))
+    llvm_unreachable("NYI");
+  return LangAS::Default;
+}
+
+static mlir::cir::GlobalOp
+generateStringLiteral(mlir::Location loc, mlir::TypedAttr C,
+                      mlir::cir::GlobalLinkageKind LT, CIRGenModule &CGM,
+                      StringRef GlobalName, CharUnits Alignment) {
+  unsigned AddrSpace = CGM.getASTContext().getTargetAddressSpace(
+      CGM.getGlobalConstantAddressSpace());
+  assert((AddrSpace == 0 && !cir::MissingFeatures::addressSpaceInGlobalVar()) &&
+         "NYI");
+
+  // Create a global variable for this string
+  // FIXME(cir): check for insertion point in module level.
+  auto GV = CIRGenModule::createGlobalOp(CGM, loc, GlobalName, C.getType(),
+                                         !CGM.getLangOpts().WritableStrings);
+
+  // Set up extra information and add to the module
+  GV.setAlignmentAttr(CGM.getSize(Alignment));
+  GV.setLinkageAttr(
+      mlir::cir::GlobalLinkageKindAttr::get(CGM.getBuilder().getContext(), LT));
+  CIRGenModule::setInitializer(GV, C);
+  // TODO(cir)
+  assert(!cir::MissingFeatures::threadLocal() && "NYI");
+  assert(!cir::MissingFeatures::unnamedAddr() && "NYI");
+  if (GV.isWeakForLinker()) {
+    assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals");
+    GV.setComdat(true);
+  }
+  CGM.setDSOLocal(static_cast<mlir::Operation *>(GV));
+  return GV;
+}
+
+/// Return a pointer to a constant array for the given string literal.
+mlir::cir::GlobalViewAttr
+CIRGenModule::getAddrOfConstantStringFromLiteral(const StringLiteral *S,
+                                                 StringRef Name) {
+  CharUnits Alignment =
+      astCtx.getAlignOfGlobalVarInChars(S->getType(), /*VD=*/nullptr);
+
+  mlir::Attribute C = getConstantArrayFromStringLiteral(S);
+
+  mlir::cir::GlobalOp GV;
+  if (!getLangOpts().WritableStrings && ConstantStringMap.count(C)) {
+    GV = ConstantStringMap[C];
+    // The bigger alignment always wins.
+    if (!GV.getAlignment() ||
+        uint64_t(Alignment.getQuantity()) > *GV.getAlignment())
+      GV.setAlignmentAttr(getSize(Alignment));
+  } else {
+    SmallString<256> StringNameBuffer = Name;
+    llvm::raw_svector_ostream Out(StringNameBuffer);
+    if (StringLiteralCnt)
+      Out << StringLiteralCnt;
+    Name = Out.str();
+    StringLiteralCnt++;
+
+    SmallString<256> MangledNameBuffer;
+    StringRef GlobalVariableName;
+    auto LT = mlir::cir::GlobalLinkageKind::ExternalLinkage;
+
+    // Mangle the string literal if that's how the ABI merges duplicate strings.
+    // Don't do it if they are writable, since we don't want writes in one TU to
+    // affect strings in another.
+    if (getCXXABI().getMangleContext().shouldMangleStringLiteral(S) &&
+        !getLangOpts().WritableStrings) {
+      assert(0 && "not implemented");
+    } else {
+      LT = mlir::cir::GlobalLinkageKind::InternalLinkage;
+      GlobalVariableName = Name;
+    }
+
+    auto loc = getLoc(S->getSourceRange());
+    auto typedC = llvm::dyn_cast<mlir::TypedAttr>(C);
+    if (!typedC)
+      llvm_unreachable("this should never be untyped at this point");
+    GV = generateStringLiteral(loc, typedC, LT, *this, GlobalVariableName,
+                               Alignment);
+    setDSOLocal(static_cast<mlir::Operation *>(GV));
+    ConstantStringMap[C] = GV;
+
+    assert(!cir::MissingFeatures::reportGlobalToASan() && "NYI");
+  }
+
+  auto ArrayTy = mlir::dyn_cast<mlir::cir::ArrayType>(GV.getSymType());
+  assert(ArrayTy && "String literal must be array");
+  auto PtrTy =
+      mlir::cir::PointerType::get(builder.getContext(), ArrayTy.getEltType());
+
+  return builder.getGlobalViewAttr(PtrTy, GV);
+}
+
+void CIRGenModule::buildDeclContext(const DeclContext *DC) {
+  for (auto *I : DC->decls()) {
+    // Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope
+    // are themselves considered "top-level", so EmitTopLevelDecl on an
+    // ObjCImplDecl does not recursively visit them. We need to do that in
+    // case they're nested inside another construct (LinkageSpecDecl /
+    // ExportDecl) that does stop them from being considered "top-level".
+    if (auto *OID = dyn_cast<ObjCImplDecl>(I))
+      llvm_unreachable("NYI");
+
+    buildTopLevelDecl(I);
+  }
+}
+
+void CIRGenModule::buildLinkageSpec(const LinkageSpecDecl *LSD) {
+  if (LSD->getLanguage() != LinkageSpecLanguageIDs::C &&
+      LSD->getLanguage() != LinkageSpecLanguageIDs::CXX) {
+    llvm_unreachable("unsupported linkage spec");
+    return;
+  }
+  buildDeclContext(LSD);
+}
+
+mlir::Operation *
+CIRGenModule::getAddrOfGlobalTemporary(const MaterializeTemporaryExpr *expr,
+                                       const Expr *init) {
+  assert((expr->getStorageDuration() == SD_Static ||
+          expr->getStorageDuration() == SD_Thread) &&
+         "not a global temporary");
+  const auto *varDecl = cast<VarDecl>(expr->getExtendingDecl());
+
+  // If we're not materializing a subobject of the temporay, keep the
+  // cv-qualifiers from the type of the MaterializeTemporaryExpr.
+  QualType materializedType = init->getType();
+  if (init == expr->getSubExpr())
+    materializedType = expr->getType();
+
+  [[maybe_unused]] CharUnits align =
+      getASTContext().getTypeAlignInChars(materializedType);
+
+  auto insertResult = materializedGlobalTemporaryMap.insert({expr, nullptr});
+  if (!insertResult.second) {
+    llvm_unreachable("NYI");
+  }
+
+  // FIXME: If an externally-visible declaration extends multiple temporaries,
+  // we need to give each temporary the same name in every translation unit (and
+  // we also need to make the temporaries externally-visible).
+  llvm::SmallString<256> name;
+  llvm::raw_svector_ostream out(name);
+  getCXXABI().getMangleContext().mangleReferenceTemporary(
+      varDecl, expr->getManglingNumber(), out);
+
+  APValue *value = nullptr;
+  if (expr->getStorageDuration() == SD_Static && varDecl->evaluateValue()) {
+    // If the initializer of the extending declaration is a constant
+    // initializer, we should have a cached constant initializer for this
+    // temporay. Note taht this m ight have a different value from the value
+    // computed by evaluating the initializer if the surrounding constant
+    // expression modifies the temporary.
+    value = expr->getOrCreateValue(false);
+  }
+
+  // Try evaluating it now, it might have a constant initializer
+  Expr::EvalResult evalResult;
+  if (!value && init->EvaluateAsRValue(evalResult, getASTContext()) &&
+      !evalResult.hasSideEffects())
+    value = &evalResult.Val;
+
+  LangAS addrSpace = getGlobalVarAddressSpace(varDecl);
+
+  std::optional<ConstantEmitter> emitter;
+  mlir::Attribute initialValue = nullptr;
+  bool isConstant = false;
+  mlir::Type type;
+  if (value) {
+    emitter.emplace(*this);
+    initialValue =
+        emitter->emitForInitializer(*value, addrSpace, materializedType);
+
+    isConstant = materializedType.isConstantStorage(
+        getASTContext(), /*ExcludeCtor*/ value, /*ExcludeDtor*/ false);
+
+    type = mlir::cast<mlir::TypedAttr>(initialValue).getType();
+  } else {
+    // No initializer, the initialization will be provided when we initialize
+    // the declaration which performed lifetime extension.
+    llvm_unreachable("else value");
+  }
+
+  // Create a global variable for this lifetime-extended temporary.
+  mlir::cir::GlobalLinkageKind linkage =
+      getCIRLinkageVarDefinition(varDecl, false);
+  if (linkage == mlir::cir::GlobalLinkageKind::ExternalLinkage) {
+    const VarDecl *initVD;
+    if (varDecl->isStaticDataMember() && varDecl->getAnyInitializer(initVD) &&
+        isa<CXXRecordDecl>(initVD->getLexicalDeclContext())) {
+      // Temporaries defined inside a class get linkonce_odr linkage because the
+      // calss can be defined in multiple translation units.
+      llvm_unreachable("staticdatamember NYI");
+    } else {
+      // There is no need for this temporary to have external linkage if the
+      // VarDecl has external linkage.
+      linkage = mlir::cir::GlobalLinkageKind::InternalLinkage;
+    }
+  }
+  auto targetAS = builder.getAddrSpaceAttr(addrSpace);
+
+  auto loc = getLoc(expr->getSourceRange());
+  auto gv = createGlobalOp(*this, loc, name, type, isConstant, targetAS,
+                           nullptr, linkage);
+  gv.setInitialValueAttr(initialValue);
+
+  if (emitter)
+    emitter->finalize(gv);
+  // Don't assign dllimport or dllexport to lcoal linkage globals
+  if (!gv.hasLocalLinkage()) {
+    llvm_unreachable("NYI");
+  }
+  gv.setAlignment(align.getAsAlign().value());
+  if (supportsCOMDAT() && gv.isWeakForLinker())
+    llvm_unreachable("NYI");
+  if (varDecl->getTLSKind())
+    llvm_unreachable("NYI");
+  mlir::Operation *cv = gv;
+  if (addrSpace != LangAS::Default)
+    llvm_unreachable("NYI");
+
+  // Update the map with the new temporay. If we created a placeholder above,
+  // replace it with the new global now.
+  mlir::Operation *&entry = materializedGlobalTemporaryMap[expr];
+  if (entry) {
+    entry->replaceAllUsesWith(cv);
+    entry->erase();
+  }
+  entry = cv;
+
+  return cv;
+}
+
+// Emit code for a single top level declaration.
+void CIRGenModule::buildTopLevelDecl(Decl *decl) {
+  // Ignore dependent declarations
+  if (decl->isTemplated())
+    return;
+
+  // Consteval function shouldn't be emitted.
+  if (auto *FD = dyn_cast<FunctionDecl>(decl))
+    if (FD->isConsteval())
+      return;
+
+  switch (decl->getKind()) {
+  default:
+    llvm::errs() << "buildTopLevelDecl codegen for decl kind '"
+                 << decl->getDeclKindName() << "' not implemented\n";
+    assert(false && "Not yet implemented");
+
+  case Decl::TranslationUnit: {
+    // This path is CIR only - CIRGen handles TUDecls because
+    // of clang-tidy checks, that operate on TU granularity.
+    TranslationUnitDecl *TU = cast<TranslationUnitDecl>(decl);
+    for (DeclContext::decl_iterator D = TU->decls_begin(),
+                                    DEnd = TU->decls_end();
+         D != DEnd; ++D)
+      buildTopLevelDecl(*D);
+    return;
+  }
+  case Decl::Var:
+  case Decl::Decomposition:
+  case Decl::VarTemplateSpecialization:
+    buildGlobal(cast<VarDecl>(decl));
+    assert(!isa<DecompositionDecl>(decl) && "not implemented");
+    // if (auto *DD = dyn_cast<DecompositionDecl>(decl))
+    //   for (auto *B : DD->bindings())
+    //     if (auto *HD = B->getHoldingVar())
+    //       EmitGlobal(HD);
+    break;
+
+  case Decl::CXXConversion:
+  case Decl::CXXMethod:
+  case Decl::Function:
+    buildGlobal(cast<FunctionDecl>(decl));
+    assert(!codeGenOpts.CoverageMapping && "Coverage Mapping NYI");
+    break;
+  // C++ Decls
+  case Decl::Namespace:
+    buildDeclContext(cast<NamespaceDecl>(decl));
+    break;
+  case Decl::ClassTemplateSpecialization: {
+    // const auto *Spec = cast<ClassTemplateSpecializationDecl>(decl);
+    assert(!MissingFeatures::generateDebugInfo() && "NYI");
+  }
+    [[fallthrough]];
+  case Decl::CXXRecord: {
+    CXXRecordDecl *crd = cast<CXXRecordDecl>(decl);
+    // TODO: Handle debug info as CodeGenModule.cpp does
+    for (auto *childDecl : crd->decls())
+      if (isa<VarDecl>(childDecl) || isa<CXXRecordDecl>(childDecl))
+        buildTopLevelDecl(childDecl);
+    break;
+  }
+  // No code generation needed.
+  case Decl::UsingShadow:
+  case Decl::ClassTemplate:
+  case Decl::VarTemplate:
+  case Decl::Concept:
+  case Decl::VarTemplatePartialSpecialization:
+  case Decl::FunctionTemplate:
+  case Decl::TypeAliasTemplate:
+  case Decl::Block:
+  case Decl::Empty:
+  case Decl::Binding:
+    break;
+  case Decl::Using:     // using X; [C++]
+  case Decl::UsingEnum: // using enum X; [C++]
+  case Decl::NamespaceAlias:
+  case Decl::UsingDirective: // using namespace X; [C++]
+    assert(!MissingFeatures::generateDebugInfo() && "NYI");
+    break;
+  case Decl::CXXConstructor:
+    getCXXABI().buildCXXConstructors(cast<CXXConstructorDecl>(decl));
+    break;
+  case Decl::CXXDestructor:
+    getCXXABI().buildCXXDestructors(cast<CXXDestructorDecl>(decl));
+    break;
+
+  case Decl::StaticAssert:
+    // Nothing to do.
+    break;
+
+  case Decl::LinkageSpec:
+    buildLinkageSpec(cast<LinkageSpecDecl>(decl));
+    break;
+
+  case Decl::Typedef:
+  case Decl::TypeAlias: // using foo = bar; [C++11]
+  case Decl::Record:
+  case Decl::Enum:
+    assert(!MissingFeatures::generateDebugInfo() && "NYI");
+    break;
+  }
+}
+
+static bool shouldBeInCOMDAT(CIRGenModule &CGM, const Decl &D) {
+  if (!CGM.supportsCOMDAT())
+    return false;
+
+  if (D.hasAttr<SelectAnyAttr>())
+    return true;
+
+  GVALinkage Linkage;
+  if (auto *VD = dyn_cast<VarDecl>(&D))
+    Linkage = CGM.getASTContext().GetGVALinkageForVariable(VD);
+  else
+    Linkage =
+        CGM.getASTContext().GetGVALinkageForFunction(cast<FunctionDecl>(&D));
+
+  switch (Linkage) {
+  case clang::GVA_Internal:
+  case clang::GVA_AvailableExternally:
+  case clang::GVA_StrongExternal:
+    return false;
+  case clang::GVA_DiscardableODR:
+  case clang::GVA_StrongODR:
+    return true;
+  }
+  llvm_unreachable("No such linkage");
+}
+
+// TODO(cir): this could be a common method between LLVM codegen.
+static bool isVarDeclStrongDefinition(const ASTContext &Context,
+                                      CIRGenModule &CGM, const VarDecl *D,
+                                      bool NoCommon) {
+  // Don't give variables common linkage if -fno-common was specified unless it
+  // was overridden by a NoCommon attribute.
+  if ((NoCommon || D->hasAttr<NoCommonAttr>()) && !D->hasAttr<CommonAttr>())
+    return true;
+
+  // C11 6.9.2/2:
+  //   A declaration of an identifier for an object that has file scope without
+  //   an initializer, and without a storage-class specifier or with the
+  //   storage-class specifier static, constitutes a tentative definition.
+  if (D->getInit() || D->hasExternalStorage())
+    return true;
+
+  // A variable cannot be both common and exist in a section.
+  if (D->hasAttr<SectionAttr>())
+    return true;
+
+  // A variable cannot be both common and exist in a section.
+  // We don't try to determine which is the right section in the front-end.
+  // If no specialized section name is applicable, it will resort to default.
+  if (D->hasAttr<PragmaClangBSSSectionAttr>() ||
+      D->hasAttr<PragmaClangDataSectionAttr>() ||
+      D->hasAttr<PragmaClangRelroSectionAttr>() ||
+      D->hasAttr<PragmaClangRodataSectionAttr>())
+    return true;
+
+  // Thread local vars aren't considered common linkage.
+  if (D->getTLSKind())
+    return true;
+
+  // Tentative definitions marked with WeakImportAttr are true definitions.
+  if (D->hasAttr<WeakImportAttr>())
+    return true;
+
+  // A variable cannot be both common and exist in a comdat.
+  if (shouldBeInCOMDAT(CGM, *D))
+    return true;
+
+  // Declarations with a required alignment do not have common linkage in MSVC
+  // mode.
+  if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
+    if (D->hasAttr<AlignedAttr>())
+      return true;
+    QualType VarType = D->getType();
+    if (Context.isAlignmentRequired(VarType))
+      return true;
+
+    if (const auto *RT = VarType->getAs<RecordType>()) {
+      const RecordDecl *RD = RT->getDecl();
+      for (const FieldDecl *FD : RD->fields()) {
+        if (FD->isBitField())
+          continue;
+        if (FD->hasAttr<AlignedAttr>())
+          return true;
+        if (Context.isAlignmentRequired(FD->getType()))
+          return true;
+      }
+    }
+  }
+
+  // Microsoft's link.exe doesn't support alignments greater than 32 bytes for
+  // common symbols, so symbols with greater alignment requirements cannot be
+  // common.
+  // Other COFF linkers (ld.bfd and LLD) support arbitrary power-of-two
+  // alignments for common symbols via the aligncomm directive, so this
+  // restriction only applies to MSVC environments.
+  if (Context.getTargetInfo().getTriple().isKnownWindowsMSVCEnvironment() &&
+      Context.getTypeAlignIfKnown(D->getType()) >
+          Context.toBits(CharUnits::fromQuantity(32)))
+    return true;
+
+  return false;
+}
+
+void CIRGenModule::setInitializer(mlir::cir::GlobalOp &global,
+                                  mlir::Attribute value) {
+  // Recompute visibility when updating initializer.
+  global.setInitialValueAttr(value);
+  mlir::SymbolTable::setSymbolVisibility(
+      global, CIRGenModule::getMLIRVisibility(global));
+}
+
+mlir::SymbolTable::Visibility
+CIRGenModule::getMLIRVisibility(mlir::cir::GlobalOp op) {
+  // MLIR doesn't accept public symbols declarations (only
+  // definitions).
+  if (op.isDeclaration())
+    return mlir::SymbolTable::Visibility::Private;
+  return getMLIRVisibilityFromCIRLinkage(op.getLinkage());
+}
+
+mlir::SymbolTable::Visibility CIRGenModule::getMLIRVisibilityFromCIRLinkage(
+    mlir::cir::GlobalLinkageKind GLK) {
+  switch (GLK) {
+  case mlir::cir::GlobalLinkageKind::InternalLinkage:
+  case mlir::cir::GlobalLinkageKind::PrivateLinkage:
+    return mlir::SymbolTable::Visibility::Private;
+  case mlir::cir::GlobalLinkageKind::ExternalLinkage:
+  case mlir::cir::GlobalLinkageKind::ExternalWeakLinkage:
+  case mlir::cir::GlobalLinkageKind::LinkOnceODRLinkage:
+  case mlir::cir::GlobalLinkageKind::AvailableExternallyLinkage:
+  case mlir::cir::GlobalLinkageKind::CommonLinkage:
+  case mlir::cir::GlobalLinkageKind::WeakAnyLinkage:
+  case mlir::cir::GlobalLinkageKind::WeakODRLinkage:
+    return mlir::SymbolTable::Visibility::Public;
+  default: {
+    llvm::errs() << "visibility not implemented for '"
+                 << stringifyGlobalLinkageKind(GLK) << "'\n";
+    assert(0 && "not implemented");
+  }
+  }
+  llvm_unreachable("linkage should be handled above!");
+}
+
+mlir::cir::VisibilityKind
+CIRGenModule::getGlobalVisibilityKindFromClangVisibility(
+    clang::VisibilityAttr::VisibilityType visibility) {
+  switch (visibility) {
+  case clang::VisibilityAttr::VisibilityType::Default:
+    return VisibilityKind::Default;
+  case clang::VisibilityAttr::VisibilityType::Hidden:
+    return VisibilityKind::Hidden;
+  case clang::VisibilityAttr::VisibilityType::Protected:
+    return VisibilityKind::Protected;
+  }
+}
+
+mlir::cir::VisibilityAttr
+CIRGenModule::getGlobalVisibilityAttrFromDecl(const Decl *decl) {
+  const clang::VisibilityAttr *VA = decl->getAttr<clang::VisibilityAttr>();
+  mlir::cir::VisibilityAttr cirVisibility =
+      mlir::cir::VisibilityAttr::get(builder.getContext());
+  if (VA) {
+    cirVisibility = mlir::cir::VisibilityAttr::get(
+        builder.getContext(),
+        getGlobalVisibilityKindFromClangVisibility(VA->getVisibility()));
+  }
+  return cirVisibility;
+}
+
+mlir::cir::GlobalLinkageKind CIRGenModule::getCIRLinkageForDeclarator(
+    const DeclaratorDecl *D, GVALinkage Linkage, bool IsConstantVariable) {
+  if (Linkage == GVA_Internal)
+    return mlir::cir::GlobalLinkageKind::InternalLinkage;
+
+  if (D->hasAttr<WeakAttr>()) {
+    if (IsConstantVariable)
+      return mlir::cir::GlobalLinkageKind::WeakODRLinkage;
+    else
+      return mlir::cir::GlobalLinkageKind::WeakAnyLinkage;
+  }
+
+  if (const auto *FD = D->getAsFunction())
+    if (FD->isMultiVersion() && Linkage == GVA_AvailableExternally)
+      return mlir::cir::GlobalLinkageKind::LinkOnceAnyLinkage;
+
+  // We are guaranteed to have a strong definition somewhere else,
+  // so we can use available_externally linkage.
+  if (Linkage == GVA_AvailableExternally)
+    return mlir::cir::GlobalLinkageKind::AvailableExternallyLinkage;
+
+  // Note that Apple's kernel linker doesn't support symbol
+  // coalescing, so we need to avoid linkonce and weak linkages there.
+  // Normally, this means we just map to internal, but for explicit
+  // instantiations we'll map to external.
+
+  // In C++, the compiler has to emit a definition in every translation unit
+  // that references the function.  We should use linkonce_odr because
+  // a) if all references in this translation unit are optimized away, we
+  // don't need to codegen it.  b) if the function persists, it needs to be
+  // merged with other definitions. c) C++ has the ODR, so we know the
+  // definition is dependable.
+  if (Linkage == GVA_DiscardableODR)
+    return !astCtx.getLangOpts().AppleKext
+               ? mlir::cir::GlobalLinkageKind::LinkOnceODRLinkage
+               : mlir::cir::GlobalLinkageKind::InternalLinkage;
+
+  // An explicit instantiation of a template has weak linkage, since
+  // explicit instantiations can occur in multiple translation units
+  // and must all be equivalent. However, we are not allowed to
+  // throw away these explicit instantiations.
+  //
+  // CUDA/HIP: For -fno-gpu-rdc case, device code is limited to one TU,
+  // so say that CUDA templates are either external (for kernels) or internal.
+  // This lets llvm perform aggressive inter-procedural optimizations. For
+  // -fgpu-rdc case, device function calls across multiple TU's are allowed,
+  // therefore we need to follow the normal linkage paradigm.
+  if (Linkage == GVA_StrongODR) {
+    if (getLangOpts().AppleKext)
+      return mlir::cir::GlobalLinkageKind::ExternalLinkage;
+    if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+        !getLangOpts().GPURelocatableDeviceCode)
+      return D->hasAttr<CUDAGlobalAttr>()
+                 ? mlir::cir::GlobalLinkageKind::ExternalLinkage
+                 : mlir::cir::GlobalLinkageKind::InternalLinkage;
+    return mlir::cir::GlobalLinkageKind::WeakODRLinkage;
+  }
+
+  // C++ doesn't have tentative definitions and thus cannot have common
+  // linkage.
+  if (!getLangOpts().CPlusPlus && isa<VarDecl>(D) &&
+      !isVarDeclStrongDefinition(astCtx, *this, cast<VarDecl>(D),
+                                 getCodeGenOpts().NoCommon))
+    return mlir::cir::GlobalLinkageKind::CommonLinkage;
+
+  // selectany symbols are externally visible, so use weak instead of
+  // linkonce.  MSVC optimizes away references to const selectany globals, so
+  // all definitions should be the same and ODR linkage should be used.
+  // http://msdn.microsoft.com/en-us/library/5tkz6s71.aspx
+  if (D->hasAttr<SelectAnyAttr>())
+    return mlir::cir::GlobalLinkageKind::WeakODRLinkage;
+
+  // Otherwise, we have strong external linkage.
+  assert(Linkage == GVA_StrongExternal);
+  return mlir::cir::GlobalLinkageKind::ExternalLinkage;
+}
+
+/// This function is called when we implement a function with no prototype, e.g.
+/// "int foo() {}". If there are existing call uses of the old function in the
+/// module, this adjusts them to call the new function directly.
+///
+/// This is not just a cleanup: the always_inline pass requires direct calls to
+/// functions to be able to inline them.  If there is a bitcast in the way, it
+/// won't inline them. Instcombine normally deletes these calls, but it isn't
+/// run at -O0.
+void CIRGenModule::ReplaceUsesOfNonProtoTypeWithRealFunction(
+    mlir::Operation *Old, mlir::cir::FuncOp NewFn) {
+
+  // If we're redefining a global as a function, don't transform it.
+  auto OldFn = dyn_cast<mlir::cir::FuncOp>(Old);
+  if (!OldFn)
+    return;
+
+  // TODO(cir): this RAUW ignores the features below.
+  assert(!MissingFeatures::exceptions() && "Call vs Invoke NYI");
+  assert(!MissingFeatures::parameterAttributes());
+  assert(!MissingFeatures::operandBundles());
+  assert(OldFn->getAttrs().size() > 1 && "Attribute forwarding NYI");
+
+  // Mark new function as originated from a no-proto declaration.
+  NewFn.setNoProtoAttr(OldFn.getNoProtoAttr());
+
+  // Iterate through all calls of the no-proto function.
+  auto SymUses = OldFn.getSymbolUses(OldFn->getParentOp());
+  for (auto Use : SymUses.value()) {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+
+    if (auto noProtoCallOp = dyn_cast<mlir::cir::CallOp>(Use.getUser())) {
+      builder.setInsertionPoint(noProtoCallOp);
+
+      // Patch call type with the real function type.
+      auto realCallOp = builder.createCallOp(noProtoCallOp.getLoc(), NewFn,
+                                             noProtoCallOp.getOperands());
+
+      // Replace old no proto call with fixed call.
+      noProtoCallOp.replaceAllUsesWith(realCallOp);
+      noProtoCallOp.erase();
+    } else if (auto getGlobalOp =
+                   dyn_cast<mlir::cir::GetGlobalOp>(Use.getUser())) {
+      // Replace type
+      getGlobalOp.getAddr().setType(mlir::cir::PointerType::get(
+          builder.getContext(), NewFn.getFunctionType()));
+    } else {
+      llvm_unreachable("NIY");
+    }
+  }
+}
+
+mlir::cir::GlobalLinkageKind
+CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *VD, bool IsConstant) {
+  assert(!IsConstant && "constant variables NYI");
+  GVALinkage Linkage = astCtx.GetGVALinkageForVariable(VD);
+  return getCIRLinkageForDeclarator(VD, Linkage, IsConstant);
+}
+
+mlir::cir::GlobalLinkageKind CIRGenModule::getFunctionLinkage(GlobalDecl GD) {
+  const auto *D = cast<FunctionDecl>(GD.getDecl());
+
+  GVALinkage Linkage = astCtx.GetGVALinkageForFunction(D);
+
+  if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(D))
+    return getCXXABI().getCXXDestructorLinkage(Linkage, Dtor, GD.getDtorType());
+
+  if (isa<CXXConstructorDecl>(D) &&
+      cast<CXXConstructorDecl>(D)->isInheritingConstructor() &&
+      astCtx.getTargetInfo().getCXXABI().isMicrosoft()) {
+    // Just like in LLVM codegen:
+    // Our approach to inheriting constructors is fundamentally different from
+    // that used by the MS ABI, so keep our inheriting constructor thunks
+    // internal rather than trying to pick an unambiguous mangling for them.
+    return mlir::cir::GlobalLinkageKind::InternalLinkage;
+  }
+
+  return getCIRLinkageForDeclarator(D, Linkage, /*IsConstantVariable=*/false);
+}
+
+void CIRGenModule::buildAliasForGlobal(StringRef mangledName,
+                                       mlir::Operation *op, GlobalDecl aliasGD,
+                                       mlir::cir::FuncOp aliasee,
+                                       mlir::cir::GlobalLinkageKind linkage) {
+  auto *aliasFD = dyn_cast<FunctionDecl>(aliasGD.getDecl());
+  assert(aliasFD && "expected FunctionDecl");
+  auto alias =
+      createCIRFunction(getLoc(aliasGD.getDecl()->getSourceRange()),
+                        mangledName, aliasee.getFunctionType(), aliasFD);
+  alias.setAliasee(aliasee.getName());
+  alias.setLinkage(linkage);
+  mlir::SymbolTable::setSymbolVisibility(
+      alias, getMLIRVisibilityFromCIRLinkage(linkage));
+
+  // Alias constructors and destructors are always unnamed_addr.
+  assert(!MissingFeatures::unnamedAddr());
+
+  // Switch any previous uses to the alias.
+  if (op) {
+    llvm_unreachable("NYI");
+  } else {
+    // Name already set by createCIRFunction
+  }
+
+  // Finally, set up the alias with its proper name and attributes.
+  setCommonAttributes(aliasGD, alias);
+}
+
+mlir::Type CIRGenModule::getCIRType(const QualType &type) {
+  return genTypes.ConvertType(type);
+}
+
+bool CIRGenModule::verifyModule() {
+  // Verify the module after we have finished constructing it, this will
+  // check the structural properties of the IR and invoke any specific
+  // verifiers we have on the CIR operations.
+  return mlir::verify(theModule).succeeded();
+}
+
+std::pair<mlir::cir::FuncType, mlir::cir::FuncOp>
+CIRGenModule::getAddrAndTypeOfCXXStructor(GlobalDecl GD,
+                                          const CIRGenFunctionInfo *FnInfo,
+                                          mlir::cir::FuncType FnType,
+                                          bool Dontdefer,
+                                          ForDefinition_t IsForDefinition) {
+  auto *MD = cast<CXXMethodDecl>(GD.getDecl());
+
+  if (isa<CXXDestructorDecl>(MD)) {
+    // Always alias equivalent complete destructors to base destructors in the
+    // MS ABI.
+    if (getTarget().getCXXABI().isMicrosoft() &&
+        GD.getDtorType() == Dtor_Complete &&
+        MD->getParent()->getNumVBases() == 0)
+      llvm_unreachable("NYI");
+  }
+
+  if (!FnType) {
+    if (!FnInfo)
+      FnInfo = &getTypes().arrangeCXXStructorDeclaration(GD);
+    FnType = getTypes().GetFunctionType(*FnInfo);
+  }
+
+  auto Fn = GetOrCreateCIRFunction(getMangledName(GD), FnType, GD,
+                                   /*ForVtable=*/false, Dontdefer,
+                                   /*IsThunk=*/false, IsForDefinition);
+
+  return {FnType, Fn};
+}
+
+mlir::cir::FuncOp
+CIRGenModule::GetAddrOfFunction(clang::GlobalDecl GD, mlir::Type Ty,
+                                bool ForVTable, bool DontDefer,
+                                ForDefinition_t IsForDefinition) {
+  assert(!cast<FunctionDecl>(GD.getDecl())->isConsteval() &&
+         "consteval function should never be emitted");
+
+  if (!Ty) {
+    const auto *FD = cast<FunctionDecl>(GD.getDecl());
+    Ty = getTypes().ConvertType(FD->getType());
+  }
+
+  // Devirtualized destructor calls may come through here instead of via
+  // getAddrOfCXXStructor. Make sure we use the MS ABI base destructor instead
+  // of the complete destructor when necessary.
+  if (const auto *DD = dyn_cast<CXXDestructorDecl>(GD.getDecl())) {
+    if (getTarget().getCXXABI().isMicrosoft() &&
+        GD.getDtorType() == Dtor_Complete &&
+        DD->getParent()->getNumVBases() == 0)
+      llvm_unreachable("NYI");
+  }
+
+  StringRef MangledName = getMangledName(GD);
+  auto F = GetOrCreateCIRFunction(MangledName, Ty, GD, ForVTable, DontDefer,
+                                  /*IsThunk=*/false, IsForDefinition);
+
+  assert(!langOpts.CUDA && "NYI");
+
+  return F;
+}
+
+// Returns true if GD is a function decl with internal linkage and needs a
+// unique suffix after the mangled name.
+static bool isUniqueInternalLinkageDecl(GlobalDecl GD, CIRGenModule &CGM) {
+  assert(CGM.getModuleNameHash().empty() &&
+         "Unique internal linkage names NYI");
+
+  return false;
+}
+
+static std::string getMangledNameImpl(CIRGenModule &CGM, GlobalDecl GD,
+                                      const NamedDecl *ND,
+                                      bool OmitMultiVersionMangling = false) {
+  assert(!OmitMultiVersionMangling && "NYI");
+
+  SmallString<256> Buffer;
+
+  llvm::raw_svector_ostream Out(Buffer);
+  MangleContext &MC = CGM.getCXXABI().getMangleContext();
+
+  assert(CGM.getModuleNameHash().empty() && "NYI");
+  auto ShouldMangle = MC.shouldMangleDeclName(ND);
+
+  if (ShouldMangle) {
+    MC.mangleName(GD.getWithDecl(ND), Out);
+  } else {
+    auto *II = ND->getIdentifier();
+    assert(II && "Attempt to mangle unnamed decl.");
+
+    const auto *FD = dyn_cast<FunctionDecl>(ND);
+
+    if (FD &&
+        FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) {
+      assert(0 && "NYI");
+    } else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
+               GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
+      assert(0 && "NYI");
+    } else {
+      Out << II->getName();
+    }
+  }
+
+  // Check if the module name hash should be appended for internal linkage
+  // symbols. This should come before multi-version target suffixes are
+  // appendded. This is to keep the name and module hash suffix of the internal
+  // linkage function together. The unique suffix should only be added when name
+  // mangling is done to make sure that the final name can be properly
+  // demangled. For example, for C functions without prototypes, name mangling
+  // is not done and the unique suffix should not be appended then.
+  assert(!isUniqueInternalLinkageDecl(GD, CGM) && "NYI");
+
+  if (const auto *FD = dyn_cast<FunctionDecl>(ND)) {
+    assert(!FD->isMultiVersion() && "NYI");
+  }
+  assert(!CGM.getLangOpts().GPURelocatableDeviceCode && "NYI");
+
+  return std::string(Out.str());
+}
+
+StringRef CIRGenModule::getMangledName(GlobalDecl GD) {
+  auto CanonicalGD = GD.getCanonicalDecl();
+
+  // Some ABIs don't have constructor variants. Make sure that base and complete
+  // constructors get mangled the same.
+  if (const auto *CD = dyn_cast<CXXConstructorDecl>(CanonicalGD.getDecl())) {
+    if (!getTarget().getCXXABI().hasConstructorVariants()) {
+      assert(false && "NYI");
+    }
+  }
+
+  assert(!langOpts.CUDAIsDevice && "NYI");
+
+  // Keep the first result in the case of a mangling collision.
+  const auto *ND = cast<NamedDecl>(GD.getDecl());
+  std::string MangledName = getMangledNameImpl(*this, GD, ND);
+
+  auto Result = Manglings.insert(std::make_pair(MangledName, GD));
+  return MangledDeclNames[CanonicalGD] = Result.first->first();
+}
+
+void CIRGenModule::buildTentativeDefinition(const VarDecl *D) {
+  assert(!D->getInit() && "Cannot emit definite definitions here!");
+
+  StringRef MangledName = getMangledName(D);
+  auto *GV = getGlobalValue(MangledName);
+
+  // TODO(cir): can a tentative definition come from something other than a
+  // global op? If not, the assertion below is wrong and should be removed. If
+  // so, getGlobalValue might be better of returining a global value interface
+  // that alows use to manage different globals value types transparently.
+  if (GV)
+    assert(isa<mlir::cir::GlobalOp>(GV) &&
+           "tentative definition can only be built from a cir.global_op");
+
+  // We already have a definition, not declaration, with the same mangled name.
+  // Emitting of declaration is not required (and actually overwrites emitted
+  // definition).
+  if (GV && !dyn_cast<mlir::cir::GlobalOp>(GV).isDeclaration())
+    return;
+
+  // If we have not seen a reference to this variable yet, place it into the
+  // deferred declarations table to be emitted if needed later.
+  if (!MustBeEmitted(D) && !GV) {
+    DeferredDecls[MangledName] = D;
+    return;
+  }
+
+  // The tentative definition is the only definition.
+  buildGlobalVarDefinition(D);
+}
+
+void CIRGenModule::setGlobalVisibility(mlir::Operation *GV,
+                                       const NamedDecl *D) const {
+  assert(!MissingFeatures::setGlobalVisibility());
+}
+
+void CIRGenModule::setDSOLocal(mlir::Operation *Op) const {
+  assert(!MissingFeatures::setDSOLocal());
+  if (auto globalValue = dyn_cast<mlir::cir::CIRGlobalValueInterface>(Op)) {
+    setDSOLocal(globalValue);
+  }
+}
+
+void CIRGenModule::setGVProperties(mlir::Operation *Op,
+                                   const NamedDecl *D) const {
+  assert(!MissingFeatures::setDLLImportDLLExport());
+  setGVPropertiesAux(Op, D);
+}
+
+void CIRGenModule::setGVPropertiesAux(mlir::Operation *Op,
+                                      const NamedDecl *D) const {
+  setGlobalVisibility(Op, D);
+  setDSOLocal(Op);
+  assert(!MissingFeatures::setPartition());
+}
+
+bool CIRGenModule::lookupRepresentativeDecl(StringRef MangledName,
+                                            GlobalDecl &Result) const {
+  auto Res = Manglings.find(MangledName);
+  if (Res == Manglings.end())
+    return false;
+  Result = Res->getValue();
+  return true;
+}
+
+mlir::cir::FuncOp
+CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name,
+                                mlir::cir::FuncType Ty,
+                                const clang::FunctionDecl *FD) {
+  // At the point we need to create the function, the insertion point
+  // could be anywhere (e.g. callsite). Do not rely on whatever it might
+  // be, properly save, find the appropriate place and restore.
+  FuncOp f;
+  {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+
+    // Some global emissions are triggered while emitting a function, e.g.
+    // void s() { x.method() }
+    //
+    // Be sure to insert a new function before a current one.
+    auto *curCGF = getCurrCIRGenFun();
+    if (curCGF)
+      builder.setInsertionPoint(curCGF->CurFn);
+
+    f = builder.create<mlir::cir::FuncOp>(loc, name, Ty);
+
+    if (FD)
+      f.setAstAttr(makeFuncDeclAttr(FD, builder.getContext()));
+
+    if (FD && !FD->hasPrototype())
+      f.setNoProtoAttr(builder.getUnitAttr());
+
+    assert(f.isDeclaration() && "expected empty body");
+
+    // A declaration gets private visibility by default, but external linkage
+    // as the default linkage.
+    f.setLinkageAttr(mlir::cir::GlobalLinkageKindAttr::get(
+        builder.getContext(), mlir::cir::GlobalLinkageKind::ExternalLinkage));
+    mlir::SymbolTable::setSymbolVisibility(
+        f, mlir::SymbolTable::Visibility::Private);
+
+    // Initialize with empty dict of extra attributes.
+    f.setExtraAttrsAttr(mlir::cir::ExtraFuncAttributesAttr::get(
+        builder.getContext(), builder.getDictionaryAttr({})));
+
+    if (!curCGF)
+      theModule.push_back(f);
+  }
+  return f;
+}
+
+mlir::cir::FuncOp CIRGenModule::createRuntimeFunction(
+    mlir::cir::FuncType Ty, StringRef Name, mlir::ArrayAttr,
+    [[maybe_unused]] bool Local, bool AssumeConvergent) {
+  if (AssumeConvergent) {
+    llvm_unreachable("NYI");
+  }
+
+  auto entry = GetOrCreateCIRFunction(Name, Ty, GlobalDecl(),
+                                      /*ForVtable=*/false);
+
+  // Traditional codegen checks for a valid dyn_cast llvm::Function for `entry`,
+  // no testcase that cover this path just yet though.
+  if (!entry) {
+    // Setup runtime CC, DLL support for windows and set dso local.
+    llvm_unreachable("NYI");
+  }
+
+  return entry;
+}
+
+bool isDefaultedMethod(const clang::FunctionDecl *FD) {
+  if (FD->isDefaulted() && isa<CXXMethodDecl>(FD) &&
+      (cast<CXXMethodDecl>(FD)->isCopyAssignmentOperator() ||
+       cast<CXXMethodDecl>(FD)->isMoveAssignmentOperator()))
+    return true;
+  return false;
+}
+
+mlir::Location CIRGenModule::getLocForFunction(const clang::FunctionDecl *FD) {
+  bool invalidLoc = !FD || (FD->getSourceRange().getBegin().isInvalid() ||
+                            FD->getSourceRange().getEnd().isInvalid());
+  if (!invalidLoc)
+    return getLoc(FD->getSourceRange());
+
+  // Use the module location
+  return theModule->getLoc();
+}
+
+/// Determines whether the language options require us to model
+/// unwind exceptions.  We treat -fexceptions as mandating this
+/// except under the fragile ObjC ABI with only ObjC exceptions
+/// enabled.  This means, for example, that C with -fexceptions
+/// enables this.
+/// TODO(cir): can be shared with traditional LLVM codegen.
+static bool hasUnwindExceptions(const LangOptions &LangOpts) {
+  // If exceptions are completely disabled, obviously this is false.
+  if (!LangOpts.Exceptions)
+    return false;
+
+  // If C++ exceptions are enabled, this is true.
+  if (LangOpts.CXXExceptions)
+    return true;
+
+  // If ObjC exceptions are enabled, this depends on the ABI.
+  if (LangOpts.ObjCExceptions) {
+    return LangOpts.ObjCRuntime.hasUnwindExceptions();
+  }
+
+  return true;
+}
+
+void CIRGenModule::setCIRFunctionAttributesForDefinition(const Decl *decl,
+                                                         FuncOp f) {
+  mlir::NamedAttrList attrs{f.getExtraAttrs().getElements().getValue()};
+
+  if (!hasUnwindExceptions(getLangOpts())) {
+    auto attr = mlir::cir::NoThrowAttr::get(builder.getContext());
+    attrs.set(attr.getMnemonic(), attr);
+  }
+
+  if (!decl) {
+    // If we don't have a declaration to control inlining, the function isn't
+    // explicitly marked as alwaysinline for semantic reasons, and inlining is
+    // disabled, mark the function as noinline.
+    if (codeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) {
+      auto attr = mlir::cir::InlineAttr::get(
+          builder.getContext(), mlir::cir::InlineKind::AlwaysInline);
+      attrs.set(attr.getMnemonic(), attr);
+    }
+  } else if (decl->hasAttr<NoInlineAttr>()) {
+    // Add noinline if the function isn't always_inline.
+    auto attr = mlir::cir::InlineAttr::get(builder.getContext(),
+                                           mlir::cir::InlineKind::NoInline);
+    attrs.set(attr.getMnemonic(), attr);
+  } else if (decl->hasAttr<AlwaysInlineAttr>()) {
+    // (noinline wins over always_inline, and we can't specify both in IR)
+    auto attr = mlir::cir::InlineAttr::get(builder.getContext(),
+                                           mlir::cir::InlineKind::AlwaysInline);
+    attrs.set(attr.getMnemonic(), attr);
+  } else if (codeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) {
+    // If we're not inlining, then force everything that isn't always_inline
+    // to carry an explicit noinline attribute.
+    auto attr = mlir::cir::InlineAttr::get(builder.getContext(),
+                                           mlir::cir::InlineKind::NoInline);
+    attrs.set(attr.getMnemonic(), attr);
+  } else {
+    // Otherwise, propagate the inline hint attribute and potentially use its
+    // absence to mark things as noinline.
+    // Search function and template pattern redeclarations for inline.
+    auto CheckForInline = [](const FunctionDecl *decl) {
+      auto CheckRedeclForInline = [](const FunctionDecl *Redecl) {
+        return Redecl->isInlineSpecified();
+      };
+      if (any_of(decl->redecls(), CheckRedeclForInline))
+        return true;
+      const FunctionDecl *Pattern = decl->getTemplateInstantiationPattern();
+      if (!Pattern)
+        return false;
+      return any_of(Pattern->redecls(), CheckRedeclForInline);
+    };
+    if (CheckForInline(cast<FunctionDecl>(decl))) {
+      auto attr = mlir::cir::InlineAttr::get(builder.getContext(),
+                                             mlir::cir::InlineKind::InlineHint);
+      attrs.set(attr.getMnemonic(), attr);
+    } else if (codeGenOpts.getInlining() == CodeGenOptions::OnlyHintInlining) {
+      auto attr = mlir::cir::InlineAttr::get(builder.getContext(),
+                                             mlir::cir::InlineKind::NoInline);
+      attrs.set(attr.getMnemonic(), attr);
+    }
+  }
+
+  // Track whether we need to add the optnone attribute,
+  // starting with the default for this optimization level.
+  bool ShouldAddOptNone =
+      !codeGenOpts.DisableO0ImplyOptNone && codeGenOpts.OptimizationLevel == 0;
+  if (decl) {
+    ShouldAddOptNone &= !decl->hasAttr<MinSizeAttr>();
+    ShouldAddOptNone &= !decl->hasAttr<AlwaysInlineAttr>();
+    ShouldAddOptNone |= decl->hasAttr<OptimizeNoneAttr>();
+  }
+
+  if (ShouldAddOptNone) {
+    auto optNoneAttr = mlir::cir::OptNoneAttr::get(builder.getContext());
+    attrs.set(optNoneAttr.getMnemonic(), optNoneAttr);
+
+    // OptimizeNone implies noinline; we should not be inlining such functions.
+    auto noInlineAttr = mlir::cir::InlineAttr::get(
+        builder.getContext(), mlir::cir::InlineKind::NoInline);
+    attrs.set(noInlineAttr.getMnemonic(), noInlineAttr);
+  }
+
+  f.setExtraAttrsAttr(mlir::cir::ExtraFuncAttributesAttr::get(
+      builder.getContext(), attrs.getDictionary(builder.getContext())));
+}
+
+void CIRGenModule::setCIRFunctionAttributes(GlobalDecl GD,
+                                            const CIRGenFunctionInfo &info,
+                                            mlir::cir::FuncOp func,
+                                            bool isThunk) {
+  // TODO(cir): More logic of constructAttributeList is needed.
+  mlir::cir::CallingConv callingConv;
+
+  // Initialize PAL with existing attributes to merge attributes.
+  mlir::NamedAttrList PAL{func.getExtraAttrs().getElements().getValue()};
+  constructAttributeList(func.getName(), info, GD, PAL, callingConv,
+                         /*AttrOnCallSite=*/false, isThunk);
+  func.setExtraAttrsAttr(mlir::cir::ExtraFuncAttributesAttr::get(
+      builder.getContext(), PAL.getDictionary(builder.getContext())));
+
+  // TODO(cir): Check X86_VectorCall incompatibility with WinARM64EC
+
+  func.setCallingConv(callingConv);
+}
+
+void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl,
+                                         mlir::cir::FuncOp func,
+                                         bool isIncompleteFunction,
+                                         bool isThunk) {
+  // NOTE(cir): Original CodeGen checks if this is an intrinsic. In CIR we
+  // represent them in dedicated ops. The correct attributes are ensured during
+  // translation to LLVM. Thus, we don't need to check for them here.
+
+  if (!isIncompleteFunction) {
+    setCIRFunctionAttributes(globalDecl,
+                             getTypes().arrangeGlobalDeclaration(globalDecl),
+                             func, isThunk);
+  }
+
+  // TODO(cir): Complete the remaining part of the function.
+  assert(!MissingFeatures::setFunctionAttributes());
+  auto decl = globalDecl.getDecl();
+  func.setGlobalVisibilityAttr(getGlobalVisibilityAttrFromDecl(decl));
+}
+
+/// If the specified mangled name is not in the module,
+/// create and return a CIR Function with the specified type. If there is
+/// something in the module with the specified name, return it potentially
+/// bitcasted to the right type.
+///
+/// If D is non-null, it specifies a decl that corresponded to this. This is
+/// used to set the attributes on the function when it is first created.
+mlir::cir::FuncOp CIRGenModule::GetOrCreateCIRFunction(
+    StringRef MangledName, mlir::Type Ty, GlobalDecl GD, bool ForVTable,
+    bool DontDefer, bool IsThunk, ForDefinition_t IsForDefinition,
+    mlir::ArrayAttr ExtraAttrs) {
+  assert(!IsThunk && "NYI");
+
+  const auto *D = GD.getDecl();
+
+  // Any attempts to use a MultiVersion function should result in retrieving the
+  // iFunc instead. Name mangling will handle the rest of the changes.
+  if (const auto *FD = cast_or_null<FunctionDecl>(D)) {
+    // For the device mark the function as one that should be emitted.
+    if (getLangOpts().OpenMPIsTargetDevice && FD->isDefined() && !DontDefer &&
+        !IsForDefinition) {
+      assert(0 && "OpenMP target functions NYI");
+    }
+    if (FD->isMultiVersion())
+      llvm_unreachable("NYI");
+  }
+
+  // Lookup the entry, lazily creating it if necessary.
+  mlir::Operation *Entry = getGlobalValue(MangledName);
+  if (Entry) {
+    assert(isa<mlir::cir::FuncOp>(Entry) &&
+           "not implemented, only supports FuncOp for now");
+
+    if (WeakRefReferences.erase(Entry)) {
+      llvm_unreachable("NYI");
+    }
+
+    // Handle dropped DLL attributes.
+    if (D && !D->hasAttr<DLLImportAttr>() && !D->hasAttr<DLLExportAttr>()) {
+      // TODO(CIR): Entry->setDLLStorageClass
+      setDSOLocal(Entry);
+    }
+
+    // If there are two attempts to define the same mangled name, issue an
+    // error.
+    auto Fn = cast<mlir::cir::FuncOp>(Entry);
+    if (IsForDefinition && Fn && !Fn.isDeclaration()) {
+      GlobalDecl OtherGD;
+      // CHeck that GD is not yet in DiagnosedConflictingDefinitions is required
+      // to make sure that we issue and error only once.
+      if (lookupRepresentativeDecl(MangledName, OtherGD) &&
+          (GD.getCanonicalDecl().getDecl()) &&
+          DiagnosedConflictingDefinitions.insert(GD).second) {
+        getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name)
+            << MangledName;
+        getDiags().Report(OtherGD.getDecl()->getLocation(),
+                          diag::note_previous_definition);
+      }
+    }
+
+    if (Fn && Fn.getFunctionType() == Ty) {
+      return Fn;
+    }
+
+    if (!IsForDefinition) {
+      return Fn;
+    }
+
+    // TODO: clang checks here if this is a llvm::GlobalAlias... how will we
+    // support this?
+  }
+
+  // This function doesn't have a complete type (for example, the return type is
+  // an incomplete struct). Use a fake type instead, and make sure not to try to
+  // set attributes.
+  bool IsIncompleteFunction = false;
+
+  mlir::cir::FuncType FTy;
+  if (mlir::isa<mlir::cir::FuncType>(Ty)) {
+    FTy = mlir::cast<mlir::cir::FuncType>(Ty);
+  } else {
+    assert(false && "NYI");
+    // FTy = mlir::FunctionType::get(VoidTy, false);
+    IsIncompleteFunction = true;
+  }
+
+  auto *FD = llvm::cast_or_null<FunctionDecl>(D);
+
+  // TODO: CodeGen includeds the linkage (ExternalLinkage) and only passes the
+  // mangledname if Entry is nullptr
+  auto F = createCIRFunction(getLocForFunction(FD), MangledName, FTy, FD);
+
+  // If we already created a function with the same mangled name (but different
+  // type) before, take its name and add it to the list of functions to be
+  // replaced with F at the end of CodeGen.
+  //
+  // This happens if there is a prototype for a function (e.g. "int f()") and
+  // then a definition of a different type (e.g. "int f(int x)").
+  if (Entry) {
+
+    // Fetch a generic symbol-defining operation and its uses.
+    auto SymbolOp = dyn_cast<mlir::SymbolOpInterface>(Entry);
+    assert(SymbolOp && "Expected a symbol-defining operation");
+
+    // TODO(cir): When can this symbol be something other than a function?
+    assert(isa<mlir::cir::FuncOp>(Entry) && "NYI");
+
+    // This might be an implementation of a function without a prototype, in
+    // which case, try to do special replacement of calls which match the new
+    // prototype. The really key thing here is that we also potentially drop
+    // arguments from the call site so as to make a direct call, which makes the
+    // inliner happier and suppresses a number of optimizer warnings (!) about
+    // dropping arguments.
+    if (SymbolOp.getSymbolUses(SymbolOp->getParentOp())) {
+      ReplaceUsesOfNonProtoTypeWithRealFunction(Entry, F);
+    }
+
+    // Obliterate no-proto declaration.
+    Entry->erase();
+  }
+
+  if (D)
+    setFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk);
+  if (ExtraAttrs) {
+    llvm_unreachable("NYI");
+  }
+
+  if (!DontDefer) {
+    // All MSVC dtors other than the base dtor are linkonce_odr and delegate to
+    // each other bottoming out wiht the base dtor. Therefore we emit non-base
+    // dtors on usage, even if there is no dtor definition in the TU.
+    if (isa_and_nonnull<CXXDestructorDecl>(D) &&
+        getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
+                                           GD.getDtorType())) {
+      llvm_unreachable("NYI"); // addDeferredDeclToEmit(GD);
+    }
+
+    // This is the first use or definition of a mangled name. If there is a
+    // deferred decl with this name, remember that we need to emit it at the end
+    // of the file.
+    auto DDI = DeferredDecls.find(MangledName);
+    if (DDI != DeferredDecls.end()) {
+      // Move the potentially referenced deferred decl to the
+      // DeferredDeclsToEmit list, and remove it from DeferredDecls (since we
+      // don't need it anymore).
+      addDeferredDeclToEmit(DDI->second);
+      DeferredDecls.erase(DDI);
+
+      // Otherwise, there are cases we have to worry about where we're using a
+      // declaration for which we must emit a definition but where we might not
+      // find a top-level definition.
+      //   - member functions defined inline in their classes
+      //   - friend functions defined inline in some class
+      //   - special member functions with implicit definitions
+      // If we ever change our AST traversal to walk into class methods, this
+      // will be unnecessary.
+      //
+      // We also don't emit a definition for a function if it's going to be an
+      // entry in a vtable, unless it's already marked as used.
+    } else if (getLangOpts().CPlusPlus && D) {
+      // Look for a declaration that's lexically in a record.
+      for (const auto *FD = cast<FunctionDecl>(D)->getMostRecentDecl(); FD;
+           FD = FD->getPreviousDecl()) {
+        if (isa<CXXRecordDecl>(FD->getLexicalDeclContext())) {
+          if (FD->doesThisDeclarationHaveABody()) {
+            if (isDefaultedMethod(FD))
+              addDefaultMethodsToEmit(GD.getWithDecl(FD));
+            else
+              addDeferredDeclToEmit(GD.getWithDecl(FD));
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  if (!IsIncompleteFunction) {
+    assert(F.getFunctionType() == Ty);
+    return F;
+  }
+
+  // TODO(cir): Might need bitcast to different address space.
+  assert(!MissingFeatures::addressSpace());
+  return F;
+}
+
+mlir::Location CIRGenModule::getLoc(SourceLocation SLoc) {
+  assert(SLoc.isValid() && "expected valid source location");
+  const SourceManager &SM = astCtx.getSourceManager();
+  PresumedLoc PLoc = SM.getPresumedLoc(SLoc);
+  StringRef Filename = PLoc.getFilename();
+  return mlir::FileLineColLoc::get(builder.getStringAttr(Filename),
+                                   PLoc.getLine(), PLoc.getColumn());
+}
+
+mlir::Location CIRGenModule::getLoc(SourceRange SLoc) {
+  assert(SLoc.isValid() && "expected valid source location");
+  mlir::Location B = getLoc(SLoc.getBegin());
+  mlir::Location E = getLoc(SLoc.getEnd());
+  SmallVector<mlir::Location, 2> locs = {B, E};
+  mlir::Attribute metadata;
+  return mlir::FusedLoc::get(locs, metadata, builder.getContext());
+}
+
+mlir::Location CIRGenModule::getLoc(mlir::Location lhs, mlir::Location rhs) {
+  SmallVector<mlir::Location, 2> locs = {lhs, rhs};
+  mlir::Attribute metadata;
+  return mlir::FusedLoc::get(locs, metadata, builder.getContext());
+}
+
+void CIRGenModule::buildGlobalDecl(clang::GlobalDecl &D) {
+  // We should call GetAddrOfGlobal with IsForDefinition set to true in order
+  // to get a Value with exactly the type we need, not something that might
+  // have been created for another decl with the same mangled name but
+  // different type.
+  auto *Op = GetAddrOfGlobal(D, ForDefinition);
+
+  // In case of different address spaces, we may still get a cast, even with
+  // IsForDefinition equal to true. Query mangled names table to get
+  // GlobalValue.
+  if (!Op) {
+    Op = getGlobalValue(getMangledName(D));
+  }
+
+  // In case of different address spaces, we may still get a cast, even with
+  // IsForDefinition equal to true. Query mangled names table to get
+  // GlobalValue.
+  if (!Op)
+    llvm_unreachable("Address spaces NYI");
+
+  // Make sure getGlobalValue returned non-null.
+  assert(Op);
+
+  // Check to see if we've already emitted this. This is necessary for a
+  // couple of reasons: first, decls can end up in deferred-decls queue
+  // multiple times, and second, decls can end up with definitions in unusual
+  // ways (e.g. by an extern inline function acquiring a strong function
+  // redefinition). Just ignore those cases.
+  // TODO: Not sure what to map this to for MLIR
+  auto globalValueOp = Op;
+  if (auto Gv = dyn_cast<mlir::cir::GetGlobalOp>(Op)) {
+    auto *result =
+        mlir::SymbolTable::lookupSymbolIn(getModule(), Gv.getNameAttr());
+    globalValueOp = result;
+  }
+
+  if (auto cirGlobalValue =
+          dyn_cast<mlir::cir::CIRGlobalValueInterface>(globalValueOp)) {
+    if (!cirGlobalValue.isDeclaration())
+      return;
+  }
+
+  // If this is OpenMP, check if it is legal to emit this global normally.
+  if (getLangOpts().OpenMP && openMPRuntime &&
+      openMPRuntime->emitTargetGlobal(D))
+    return;
+
+  // Otherwise, emit the definition and move on to the next one.
+  buildGlobalDefinition(D, Op);
+}
+
+void CIRGenModule::buildDeferred(unsigned recursionLimit) {
+  // Emit deferred declare target declarations
+  if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd)
+    getOpenMPRuntime().emitDeferredTargetDecls();
+
+  // Emit code for any potentially referenced deferred decls. Since a previously
+  // unused static decl may become used during the generation of code for a
+  // static function, iterate until no changes are made.
+
+  if (!DeferredVTables.empty()) {
+    buildDeferredVTables();
+
+    // Emitting a vtable doesn't directly cause more vtables to
+    // become deferred, although it can cause functions to be
+    // emitted that then need those vtables.
+    assert(DeferredVTables.empty());
+  }
+
+  // Emit CUDA/HIP static device variables referenced by host code only. Note we
+  // should not clear CUDADeviceVarODRUsedByHost since it is still needed for
+  // further handling.
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    llvm_unreachable("NYI");
+  }
+
+  // Stop if we're out of both deferred vtables and deferred declarations.
+  if (DeferredDeclsToEmit.empty())
+    return;
+
+  // Grab the list of decls to emit. If buildGlobalDefinition schedules more
+  // work, it will not interfere with this.
+  std::vector<GlobalDecl> CurDeclsToEmit;
+  CurDeclsToEmit.swap(DeferredDeclsToEmit);
+  if (recursionLimit == 0)
+    return;
+  recursionLimit--;
+
+  for (auto &D : CurDeclsToEmit) {
+    if (getCodeGenOpts().ClangIRSkipFunctionsFromSystemHeaders) {
+      auto *decl = D.getDecl();
+      assert(decl && "expected decl");
+      if (astCtx.getSourceManager().isInSystemHeader(decl->getLocation()))
+        continue;
+    }
+
+    buildGlobalDecl(D);
+
+    // If we found out that we need to emit more decls, do that recursively.
+    // This has the advantage that the decls are emitted in a DFS and related
+    // ones are close together, which is convenient for testing.
+    if (!DeferredVTables.empty() || !DeferredDeclsToEmit.empty()) {
+      buildDeferred(recursionLimit);
+      assert(DeferredVTables.empty() && DeferredDeclsToEmit.empty());
+    }
+  }
+}
+
+void CIRGenModule::buildDefaultMethods() {
+  // Differently from DeferredDeclsToEmit, there's no recurrent use of
+  // DefaultMethodsToEmit, so use it directly for emission.
+  for (auto &D : DefaultMethodsToEmit)
+    buildGlobalDecl(D);
+}
+
+mlir::IntegerAttr CIRGenModule::getSize(CharUnits size) {
+  return builder.getSizeFromCharUnits(builder.getContext(), size);
+}
+
+mlir::Operation *
+CIRGenModule::GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition) {
+  const Decl *D = GD.getDecl();
+
+  if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D))
+    return getAddrOfCXXStructor(GD, /*FnInfo=*/nullptr, /*FnType=*/nullptr,
+                                /*DontDefer=*/false, IsForDefinition);
+
+  if (isa<CXXMethodDecl>(D)) {
+    auto FInfo =
+        &getTypes().arrangeCXXMethodDeclaration(cast<CXXMethodDecl>(D));
+    auto Ty = getTypes().GetFunctionType(*FInfo);
+    return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false,
+                             IsForDefinition);
+  }
+
+  if (isa<FunctionDecl>(D)) {
+    const CIRGenFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
+    auto Ty = getTypes().GetFunctionType(FI);
+    return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false,
+                             IsForDefinition);
+  }
+
+  return getAddrOfGlobalVar(cast<VarDecl>(D), /*Ty=*/nullptr, IsForDefinition)
+      .getDefiningOp();
+}
+
+void CIRGenModule::Release() {
+  buildDeferred(getCodeGenOpts().ClangIRBuildDeferredThreshold);
+  // TODO: buildVTablesOpportunistically();
+  // TODO: applyGlobalValReplacements();
+  applyReplacements();
+  // TODO: checkAliases();
+  // TODO: buildMultiVersionFunctions();
+  buildCXXGlobalInitFunc();
+  // TODO: buildCXXGlobalCleanUpFunc();
+  // TODO: registerGlobalDtorsWithAtExit();
+  // TODO: buildCXXThreadLocalInitFunc();
+  // TODO: ObjCRuntime
+  if (astCtx.getLangOpts().CUDA) {
+    llvm_unreachable("NYI");
+  }
+  // TODO: OpenMPRuntime
+  // TODO: PGOReader
+  // TODO: buildCtorList(GlobalCtors);
+  // TODO: builtCtorList(GlobalDtors);
+  buildGlobalAnnotations();
+  // TODO: buildDeferredUnusedCoverageMappings();
+  // TODO: CIRGenPGO
+  // TODO: CoverageMapping
+  if (getCodeGenOpts().SanitizeCfiCrossDso) {
+    llvm_unreachable("NYI");
+  }
+  // TODO: buildAtAvailableLinkGuard();
+  if (astCtx.getTargetInfo().getTriple().isWasm() &&
+      !astCtx.getTargetInfo().getTriple().isOSEmscripten()) {
+    llvm_unreachable("NYI");
+  }
+
+  // Emit reference of __amdgpu_device_library_preserve_asan_functions to
+  // preserve ASAN functions in bitcode libraries.
+  if (getLangOpts().Sanitize.has(SanitizerKind::Address)) {
+    llvm_unreachable("NYI");
+  }
+
+  // TODO: buildLLVMUsed();
+  // TODO: SanStats
+
+  if (getCodeGenOpts().Autolink) {
+    // TODO: buildModuleLinkOptions
+  }
+
+  // Emit OpenCL specific module metadata: OpenCL/SPIR version.
+  if (langOpts.CUDAIsDevice && getTriple().isSPIRV())
+    llvm_unreachable("CUDA SPIR-V NYI");
+  if (langOpts.OpenCL) {
+    buildOpenCLMetadata();
+    // Emit SPIR version.
+    if (getTriple().isSPIR())
+      llvm_unreachable("SPIR target NYI");
+  }
+
+  // TODO: FINISH THE REST OF THIS
+}
+
+bool CIRGenModule::shouldEmitFunction(GlobalDecl GD) {
+  // TODO: implement this -- requires defining linkage for CIR
+  return true;
+}
+
+bool CIRGenModule::supportsCOMDAT() const {
+  return getTriple().supportsCOMDAT();
+}
+
+void CIRGenModule::maybeSetTrivialComdat(const Decl &d, mlir::Operation *op) {
+  if (!shouldBeInCOMDAT(*this, d))
+    return;
+  auto globalOp = dyn_cast_or_null<mlir::cir::GlobalOp>(op);
+  if (globalOp)
+    globalOp.setComdat(true);
+  // Keep it as missing feature as we need to implement comdat for FuncOp.
+  // in the future.
+  assert(!MissingFeatures::setComdat() && "NYI");
+}
+
+bool CIRGenModule::isInNoSanitizeList(SanitizerMask Kind, mlir::cir::FuncOp Fn,
+                                      SourceLocation Loc) const {
+  const auto &NoSanitizeL = getASTContext().getNoSanitizeList();
+  // NoSanitize by function name.
+  if (NoSanitizeL.containsFunction(Kind, Fn.getName()))
+    llvm_unreachable("NYI");
+  // NoSanitize by location.
+  if (Loc.isValid())
+    return NoSanitizeL.containsLocation(Kind, Loc);
+  // If location is unknown, this may be a compiler-generated function. Assume
+  // it's located in the main file.
+  auto &SM = getASTContext().getSourceManager();
+  FileEntryRef MainFile = *SM.getFileEntryRefForID(SM.getMainFileID());
+  if (NoSanitizeL.containsFile(Kind, MainFile.getName()))
+    return true;
+
+  // Check "src" prefix.
+  if (Loc.isValid())
+    return NoSanitizeL.containsLocation(Kind, Loc);
+  // If location is unknown, this may be a compiler-generated function. Assume
+  // it's located in the main file.
+  return NoSanitizeL.containsFile(Kind, MainFile.getName());
+}
+
+void CIRGenModule::AddDeferredUnusedCoverageMapping(Decl *D) {
+  // Do we need to generate coverage mapping?
+  if (!codeGenOpts.CoverageMapping)
+    return;
+
+  llvm_unreachable("NYI");
+}
+
+void CIRGenModule::UpdateCompletedType(const TagDecl *TD) {
+  // Make sure that this type is translated.
+  genTypes.UpdateCompletedType(TD);
+}
+
+void CIRGenModule::addReplacement(StringRef Name, mlir::Operation *Op) {
+  Replacements[Name] = Op;
+}
+
+void CIRGenModule::applyReplacements() {
+  for (auto &I : Replacements) {
+    StringRef MangledName = I.first();
+    mlir::Operation *Replacement = I.second;
+    auto *Entry = getGlobalValue(MangledName);
+    if (!Entry)
+      continue;
+    assert(isa<mlir::cir::FuncOp>(Entry) && "expected function");
+    auto OldF = cast<mlir::cir::FuncOp>(Entry);
+    auto NewF = dyn_cast<mlir::cir::FuncOp>(Replacement);
+    assert(NewF && "not implemented");
+
+    // Replace old with new, but keep the old order.
+    if (OldF.replaceAllSymbolUses(NewF.getSymNameAttr(), theModule).failed())
+      llvm_unreachable("internal error, cannot RAUW symbol");
+    if (NewF) {
+      NewF->moveBefore(OldF);
+      OldF->erase();
+    }
+  }
+}
+
+void CIRGenModule::buildExplicitCastExprType(const ExplicitCastExpr *E,
+                                             CIRGenFunction *CGF) {
+  // Bind VLAs in the cast type.
+  if (CGF && E->getType()->isVariablyModifiedType())
+    llvm_unreachable("NYI");
+
+  assert(!MissingFeatures::generateDebugInfo() && "NYI");
+}
+
+void CIRGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
+  auto DK = VD->isThisDeclarationADefinition();
+  if (DK == VarDecl::Definition && VD->hasAttr<DLLImportAttr>())
+    return;
+
+  TemplateSpecializationKind TSK = VD->getTemplateSpecializationKind();
+  // If we have a definition, this might be a deferred decl. If the
+  // instantiation is explicit, make sure we emit it at the end.
+  if (VD->getDefinition() && TSK == TSK_ExplicitInstantiationDefinition) {
+    llvm_unreachable("NYI");
+  }
+
+  buildTopLevelDecl(VD);
+}
+
+mlir::cir::GlobalOp CIRGenModule::createOrReplaceCXXRuntimeVariable(
+    mlir::Location loc, StringRef Name, mlir::Type Ty,
+    mlir::cir::GlobalLinkageKind Linkage, clang::CharUnits Alignment) {
+  mlir::cir::GlobalOp OldGV{};
+  auto GV = dyn_cast_or_null<mlir::cir::GlobalOp>(
+      mlir::SymbolTable::lookupSymbolIn(getModule(), Name));
+
+  if (GV) {
+    // Check if the variable has the right type.
+    if (GV.getSymType() == Ty)
+      return GV;
+
+    // Because C++ name mangling, the only way we can end up with an already
+    // existing global with the same name is if it has been declared extern
+    // "C".
+    assert(GV.isDeclaration() && "Declaration has wrong type!");
+    OldGV = GV;
+  }
+
+  // Create a new variable.
+  GV = CIRGenModule::createGlobalOp(*this, loc, Name, Ty);
+
+  // Set up extra information and add to the module
+  GV.setLinkageAttr(
+      mlir::cir::GlobalLinkageKindAttr::get(builder.getContext(), Linkage));
+  mlir::SymbolTable::setSymbolVisibility(GV,
+                                         CIRGenModule::getMLIRVisibility(GV));
+
+  if (OldGV) {
+    // Replace occurrences of the old variable if needed.
+    GV.setName(OldGV.getName());
+    if (!OldGV->use_empty()) {
+      // TODO(cir): remove erase call above and use replaceGlobal here.
+      llvm_unreachable("NYI");
+    }
+    OldGV->erase();
+  }
+
+  if (supportsCOMDAT() && mlir::cir::isWeakForLinker(Linkage) &&
+      !GV.hasAvailableExternallyLinkage()) {
+    GV.setComdat(true);
+  }
+
+  GV.setAlignmentAttr(getSize(Alignment));
+  setDSOLocal(static_cast<mlir::Operation *>(GV));
+  return GV;
+}
+
+bool CIRGenModule::shouldOpportunisticallyEmitVTables() {
+  if (codeGenOpts.OptimizationLevel != 0)
+    llvm_unreachable("NYI");
+  return codeGenOpts.OptimizationLevel > 0;
+}
+
+void CIRGenModule::buildVTableTypeMetadata(const CXXRecordDecl *RD,
+                                           mlir::cir::GlobalOp VTable,
+                                           const VTableLayout &VTLayout) {
+  if (!getCodeGenOpts().LTOUnit)
+    return;
+  llvm_unreachable("NYI");
+}
+
+mlir::Attribute CIRGenModule::getAddrOfRTTIDescriptor(mlir::Location loc,
+                                                      QualType Ty, bool ForEH) {
+  // Return a bogus pointer if RTTI is disabled, unless it's for EH.
+  // FIXME: should we even be calling this method if RTTI is disabled
+  // and it's not for EH?
+  if (!shouldEmitRTTI(ForEH))
+    return getBuilder().getConstNullPtrAttr(builder.getUInt8PtrTy());
+
+  if (ForEH && Ty->isObjCObjectPointerType() &&
+      getLangOpts().ObjCRuntime.isGNUFamily()) {
+    llvm_unreachable("NYI");
+  }
+
+  return getCXXABI().getAddrOfRTTIDescriptor(loc, Ty);
+}
+
+/// TODO(cir): once we have cir.module, add this as a convenience method there.
+///
+/// Look up the specified global in the module symbol table.
+///   1. If it does not exist, add a declaration of the global and return it.
+///   2. Else, the global exists but has the wrong type: return the function
+///      with a constantexpr cast to the right type.
+///   3. Finally, if the existing global is the correct declaration, return the
+///      existing global.
+mlir::cir::GlobalOp CIRGenModule::getOrInsertGlobal(
+    mlir::Location loc, StringRef Name, mlir::Type Ty,
+    llvm::function_ref<mlir::cir::GlobalOp()> CreateGlobalCallback) {
+  // See if we have a definition for the specified global already.
+  auto GV = dyn_cast_or_null<mlir::cir::GlobalOp>(getGlobalValue(Name));
+  if (!GV) {
+    GV = CreateGlobalCallback();
+  }
+  assert(GV && "The CreateGlobalCallback is expected to create a global");
+
+  // If the variable exists but has the wrong type, return a bitcast to the
+  // right type.
+  auto GVTy = GV.getSymType();
+  assert(!MissingFeatures::addressSpace());
+  auto PTy = builder.getPointerTo(Ty);
+
+  if (GVTy != PTy)
+    llvm_unreachable("NYI");
+
+  // Otherwise, we just found the existing function or a prototype.
+  return GV;
+}
+
+// Overload to construct a global variable using its constructor's defaults.
+mlir::cir::GlobalOp CIRGenModule::getOrInsertGlobal(mlir::Location loc,
+                                                    StringRef Name,
+                                                    mlir::Type Ty) {
+  return getOrInsertGlobal(loc, Name, Ty, [&] {
+    return CIRGenModule::createGlobalOp(*this, loc, Name,
+                                        builder.getPointerTo(Ty));
+  });
+}
+
+// TODO(cir): this can be shared with LLVM codegen.
+CharUnits CIRGenModule::computeNonVirtualBaseClassOffset(
+    const CXXRecordDecl *DerivedClass, CastExpr::path_const_iterator Start,
+    CastExpr::path_const_iterator End) {
+  CharUnits Offset = CharUnits::Zero();
+
+  const ASTContext &Context = getASTContext();
+  const CXXRecordDecl *RD = DerivedClass;
+
+  for (CastExpr::path_const_iterator I = Start; I != End; ++I) {
+    const CXXBaseSpecifier *Base = *I;
+    assert(!Base->isVirtual() && "Should not see virtual bases here!");
+
+    // Get the layout.
+    const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
+
+    const auto *BaseDecl =
+        cast<CXXRecordDecl>(Base->getType()->castAs<RecordType>()->getDecl());
+
+    // Add the offset.
+    Offset += Layout.getBaseClassOffset(BaseDecl);
+
+    RD = BaseDecl;
+  }
+
+  return Offset;
+}
+
+void CIRGenModule::Error(SourceLocation loc, StringRef message) {
+  unsigned diagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "%0");
+  getDiags().Report(astCtx.getFullLoc(loc), diagID) << message;
+}
+
+/// Print out an error that codegen doesn't support the specified stmt yet.
+void CIRGenModule::ErrorUnsupported(const Stmt *S, const char *Type) {
+  unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error,
+                                               "cannot compile this %0 yet");
+  std::string Msg = Type;
+  getDiags().Report(astCtx.getFullLoc(S->getBeginLoc()), DiagID)
+      << Msg << S->getSourceRange();
+}
+
+/// Print out an error that codegen doesn't support the specified decl yet.
+void CIRGenModule::ErrorUnsupported(const Decl *D, const char *Type) {
+  unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error,
+                                               "cannot compile this %0 yet");
+  std::string Msg = Type;
+  getDiags().Report(astCtx.getFullLoc(D->getLocation()), DiagID) << Msg;
+}
+
+mlir::cir::SourceLanguage CIRGenModule::getCIRSourceLanguage() {
+  using ClangStd = clang::LangStandard;
+  using CIRLang = mlir::cir::SourceLanguage;
+  auto opts = getLangOpts();
+
+  if (opts.OpenCL && !opts.OpenCLCPlusPlus)
+    return CIRLang::OpenCLC;
+
+  if (opts.CPlusPlus || opts.CPlusPlus11 || opts.CPlusPlus14 ||
+      opts.CPlusPlus17 || opts.CPlusPlus20 || opts.CPlusPlus23 ||
+      opts.CPlusPlus26)
+    return CIRLang::CXX;
+  if (opts.C99 || opts.C11 || opts.C17 || opts.C23 ||
+      opts.LangStd == ClangStd::lang_c89 ||
+      opts.LangStd == ClangStd::lang_gnu89)
+    return CIRLang::C;
+
+  // TODO(cir): support remaining source languages.
+  llvm_unreachable("CIR does not yet support the given source language");
+}
+
+LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *D) {
+  if (langOpts.OpenCL) {
+    LangAS AS = D ? D->getType().getAddressSpace() : LangAS::opencl_global;
+    assert(AS == LangAS::opencl_global || AS == LangAS::opencl_global_device ||
+           AS == LangAS::opencl_global_host || AS == LangAS::opencl_constant ||
+           AS == LangAS::opencl_local || AS >= LangAS::FirstTargetAddressSpace);
+    return AS;
+  }
+
+  if (langOpts.SYCLIsDevice &&
+      (!D || D->getType().getAddressSpace() == LangAS::Default))
+    llvm_unreachable("NYI");
+
+  if (langOpts.CUDA && langOpts.CUDAIsDevice)
+    llvm_unreachable("NYI");
+
+  if (langOpts.OpenMP)
+    llvm_unreachable("NYI");
+
+  return getTargetCIRGenInfo().getGlobalVarAddressSpace(*this, D);
+}
+
+mlir::ArrayAttr CIRGenModule::buildAnnotationArgs(AnnotateAttr *attr) {
+  ArrayRef<Expr *> exprs = {attr->args_begin(), attr->args_size()};
+  if (exprs.empty()) {
+    return mlir::ArrayAttr::get(builder.getContext(), {});
+  }
+  llvm::FoldingSetNodeID id;
+  for (Expr *e : exprs) {
+    id.Add(cast<clang::ConstantExpr>(e)->getAPValueResult());
+  }
+  mlir::ArrayAttr &lookup = annotationArgs[id.ComputeHash()];
+  if (lookup)
+    return lookup;
+
+  llvm::SmallVector<mlir::Attribute, 4> args;
+  args.reserve(exprs.size());
+  for (Expr *e : exprs) {
+    auto &ce = *cast<clang::ConstantExpr>(e);
+    if (auto *const strE =
+            clang::dyn_cast<clang::StringLiteral>(ce.IgnoreParenCasts())) {
+      // Add trailing null character as StringLiteral->getString() does not
+      args.push_back(builder.getStringAttr(strE->getString()));
+    } else if (ce.hasAPValueResult()) {
+      // Handle case which can be evaluated to some numbers, not only literals
+      const auto &ap = ce.getAPValueResult();
+      if (ap.isInt()) {
+        args.push_back(mlir::IntegerAttr::get(
+            mlir::IntegerType::get(builder.getContext(),
+                                   ap.getInt().getBitWidth()),
+            ap.getInt()));
+      } else {
+        llvm_unreachable("NYI like float, fixed-point, array...");
+      }
+    } else {
+      llvm_unreachable("NYI");
+    }
+  }
+
+  lookup = builder.getArrayAttr(args);
+  return lookup;
+}
+
+mlir::cir::AnnotationAttr
+CIRGenModule::buildAnnotateAttr(clang::AnnotateAttr *aa) {
+  mlir::StringAttr annoGV = builder.getStringAttr(aa->getAnnotation());
+  mlir::ArrayAttr args = buildAnnotationArgs(aa);
+  return mlir::cir::AnnotationAttr::get(builder.getContext(), annoGV, args);
+}
+
+void CIRGenModule::addGlobalAnnotations(const ValueDecl *d,
+                                        mlir::Operation *gv) {
+  assert(d->hasAttr<AnnotateAttr>() && "no annotate attribute");
+  assert((isa<GlobalOp>(gv) || isa<FuncOp>(gv)) &&
+         "annotation only on globals");
+  llvm::SmallVector<mlir::Attribute, 4> annotations;
+  for (auto *i : d->specific_attrs<AnnotateAttr>())
+    annotations.push_back(buildAnnotateAttr(i));
+  if (auto global = dyn_cast<mlir::cir::GlobalOp>(gv))
+    global.setAnnotationsAttr(builder.getArrayAttr(annotations));
+  else if (auto func = dyn_cast<mlir::cir::FuncOp>(gv))
+    func.setAnnotationsAttr(builder.getArrayAttr(annotations));
+}
+
+void CIRGenModule::buildGlobalAnnotations() {
+  for (const auto &[mangledName, vd] : deferredAnnotations) {
+    mlir::Operation *gv = getGlobalValue(mangledName);
+    if (gv)
+      addGlobalAnnotations(vd, gv);
+  }
+  deferredAnnotations.clear();
+}
+
+TBAAAccessInfo CIRGenModule::getTBAAAccessInfo(QualType accessType) {
+  if (!tbaa)
+    return TBAAAccessInfo();
+  llvm_unreachable("NYI");
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
new file mode 100644
index 000000000000..b980ed411c41
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -0,0 +1,834 @@
+//===--- CIRGenModule.h - Per-Module state for CIR gen ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the internal per-translation-unit state used for CIR translation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_CIRGENMODULE_H
+#define LLVM_CLANG_LIB_CODEGEN_CIRGENMODULE_H
+
+#include "Address.h"
+#include "CIRGenBuilder.h"
+#include "CIRGenCall.h"
+#include "CIRGenOpenCLRuntime.h"
+#include "CIRGenTBAA.h"
+#include "CIRGenTypeCache.h"
+#include "CIRGenTypes.h"
+#include "CIRGenVTables.h"
+#include "CIRGenValue.h"
+#include "clang/CIR/MissingFeatures.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/Interfaces/CIROpInterfaces.h"
+
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/Value.h"
+
+using namespace clang;
+namespace cir {
+
+class CIRGenFunction;
+class CIRGenCXXABI;
+class TargetCIRGenInfo;
+class CIRGenOpenMPRuntime;
+
+enum ForDefinition_t : bool { NotForDefinition = false, ForDefinition = true };
+
+/// Implementation of a CIR/MLIR emission from Clang AST.
+///
+/// This will emit operations that are specific to C(++)/ObjC(++) language,
+/// preserving the semantics of the language and (hopefully) allow to perform
+/// accurate analysis and transformation based on these high level semantics.
+class CIRGenModule : public CIRGenTypeCache {
+  CIRGenModule(CIRGenModule &) = delete;
+  CIRGenModule &operator=(CIRGenModule &) = delete;
+
+public:
+  CIRGenModule(mlir::MLIRContext &context, clang::ASTContext &astctx,
+               const clang::CodeGenOptions &CGO,
+               clang::DiagnosticsEngine &Diags);
+
+  ~CIRGenModule();
+
+  const std::string &getModuleNameHash() const { return ModuleNameHash; }
+
+private:
+  mutable std::unique_ptr<TargetCIRGenInfo> TheTargetCIRGenInfo;
+
+  /// The builder is a helper class to create IR inside a function. The
+  /// builder is stateful, in particular it keeps an "insertion point": this
+  /// is where the next operations will be introduced.
+  CIRGenBuilderTy builder;
+
+  /// Hold Clang AST information.
+  clang::ASTContext &astCtx;
+
+  const clang::LangOptions &langOpts;
+
+  const clang::CodeGenOptions &codeGenOpts;
+
+  /// A "module" matches a c/cpp source file: containing a list of functions.
+  mlir::ModuleOp theModule;
+
+  clang::DiagnosticsEngine &Diags;
+
+  const clang::TargetInfo &target;
+
+  std::unique_ptr<CIRGenCXXABI> ABI;
+
+  std::unique_ptr<CIRGenTBAA> tbaa;
+
+  /// Used for `UniqueInternalLinkageNames` option
+  std::string ModuleNameHash = "";
+
+  /// Per-module type mapping from clang AST to CIR.
+  CIRGenTypes genTypes;
+
+  /// Holds information about C++ vtables.
+  CIRGenVTables VTables;
+
+  /// Holds the OpenCL runtime
+  std::unique_ptr<CIRGenOpenCLRuntime> openCLRuntime;
+
+  /// Holds the OpenMP runtime
+  std::unique_ptr<CIRGenOpenMPRuntime> openMPRuntime;
+
+  /// Per-function codegen information. Updated everytime buildCIR is called
+  /// for FunctionDecls's.
+  CIRGenFunction *CurCGF = nullptr;
+
+  // A set of references that have only been set via a weakref so far. This is
+  // used to remove the weak of the reference if we ever see a direct reference
+  // or a definition.
+  llvm::SmallPtrSet<mlir::Operation *, 10> WeakRefReferences;
+
+  /// -------
+  /// Declaring variables
+  /// -------
+
+  /// Set of global decls for which we already diagnosed mangled name conflict.
+  /// Required to not issue a warning (on a mangling conflict) multiple times
+  /// for the same decl.
+  llvm::DenseSet<clang::GlobalDecl> DiagnosedConflictingDefinitions;
+
+  /// -------
+  /// Annotations
+  /// -------
+
+  /// We do not store global annotations in the module here, instead, we store
+  /// each annotation as attribute of GlobalOp and FuncOp.
+  /// We defer creation of global annotation variable to LoweringPrepare
+  /// as CIR passes do not need to have a global view of all annotations.
+
+  /// Used for uniquing of annotation arguments.
+  llvm::DenseMap<unsigned, mlir::ArrayAttr> annotationArgs;
+
+  /// Store deferred function annotations so they can be emitted at the end with
+  /// most up to date ValueDecl that will have all the inherited annotations.
+  llvm::DenseMap<StringRef, const ValueDecl *> deferredAnnotations;
+
+  llvm::DenseMap<const Expr *, mlir::Operation *>
+      materializedGlobalTemporaryMap;
+
+public:
+  mlir::ModuleOp getModule() const { return theModule; }
+  CIRGenBuilderTy &getBuilder() { return builder; }
+  clang::ASTContext &getASTContext() const { return astCtx; }
+  const clang::TargetInfo &getTarget() const { return target; }
+  const clang::CodeGenOptions &getCodeGenOpts() const { return codeGenOpts; }
+  clang::DiagnosticsEngine &getDiags() const { return Diags; }
+  CIRGenTypes &getTypes() { return genTypes; }
+  const clang::LangOptions &getLangOpts() const { return langOpts; }
+  CIRGenFunction *getCurrCIRGenFun() const { return CurCGF; }
+  const CIRDataLayout getDataLayout() const {
+    // FIXME(cir): instead of creating a CIRDataLayout every time, set it as an
+    // attribute for the CIRModule class.
+    return {theModule};
+  }
+
+  CIRGenCXXABI &getCXXABI() const { return *ABI; }
+
+  /// -------
+  /// Handling globals
+  /// -------
+
+  // TODO(cir): does this really need to be a state for CIR emission?
+  GlobalDecl initializedGlobalDecl;
+
+  /// Global variables with initializers that need to run before main.
+  /// TODO(cir): for now track a generation operation, this is so far only
+  /// used to sync with DelayedCXXInitPosition. Improve it when we actually
+  /// use function calls for initialization
+  std::vector<mlir::Operation *> CXXGlobalInits;
+
+  /// Emit the function that initializes C++ globals.
+  void buildCXXGlobalInitFunc();
+
+  /// Track whether the CIRGenModule is currently building an initializer
+  /// for a global (e.g. as opposed to a regular cir.func).
+  mlir::cir::GlobalOp globalOpContext = nullptr;
+
+  /// When a C++ decl with an initializer is deferred, null is
+  /// appended to CXXGlobalInits, and the index of that null is placed
+  /// here so that the initializer will be performed in the correct
+  /// order. Once the decl is emitted, the index is replaced with ~0U to ensure
+  /// that we don't re-emit the initializer.
+  llvm::DenseMap<const Decl *, unsigned> DelayedCXXInitPosition;
+
+  /// Keep track of a map between lambda fields and names, this needs to be per
+  /// module since lambdas might get generated later as part of defered work,
+  /// and since the pointers are supposed to be uniqued, should be fine. Revisit
+  /// this if it ends up taking too much memory.
+  llvm::DenseMap<const clang::FieldDecl *, llvm::StringRef> LambdaFieldToName;
+
+  /// If the declaration has internal linkage but is inside an
+  /// extern "C" linkage specification, prepare to emit an alias for it
+  /// to the expected name.
+  template <typename SomeDecl>
+  void maybeHandleStaticInExternC(const SomeDecl *D, mlir::cir::GlobalOp GV);
+
+  /// Tell the consumer that this variable has been instantiated.
+  void HandleCXXStaticMemberVarInstantiation(VarDecl *VD);
+
+  llvm::DenseMap<const Decl *, mlir::cir::GlobalOp> StaticLocalDeclMap;
+  llvm::DenseMap<StringRef, mlir::Value> Globals;
+  mlir::Operation *getGlobalValue(StringRef Ref);
+  mlir::Value getGlobalValue(const clang::Decl *D);
+
+  /// If the specified mangled name is not in the module, create and return an
+  /// mlir::GlobalOp value
+  mlir::cir::GlobalOp
+  getOrCreateCIRGlobal(StringRef MangledName, mlir::Type Ty, LangAS AddrSpace,
+                       const VarDecl *D,
+                       ForDefinition_t IsForDefinition = NotForDefinition);
+
+  mlir::cir::GlobalOp getStaticLocalDeclAddress(const VarDecl *D) {
+    return StaticLocalDeclMap[D];
+  }
+
+  void setStaticLocalDeclAddress(const VarDecl *D, mlir::cir::GlobalOp C) {
+    StaticLocalDeclMap[D] = C;
+  }
+
+  mlir::cir::GlobalOp
+  getOrCreateStaticVarDecl(const VarDecl &D,
+                           mlir::cir::GlobalLinkageKind Linkage);
+
+  mlir::cir::GlobalOp buildGlobal(const VarDecl *D, mlir::Type Ty,
+                                  ForDefinition_t IsForDefinition);
+
+  /// TODO(cir): once we have cir.module, add this as a convenience method
+  /// there instead of here.
+  ///
+  /// Look up the specified global in the module symbol table.
+  ///   1. If it does not exist, add a declaration of the global and return it.
+  ///   2. Else, the global exists but has the wrong type: return the function
+  ///      with a constantexpr cast to the right type.
+  ///   3. Finally, if the existing global is the correct declaration, return
+  ///      the existing global.
+  mlir::cir::GlobalOp getOrInsertGlobal(
+      mlir::Location loc, StringRef Name, mlir::Type Ty,
+      llvm::function_ref<mlir::cir::GlobalOp()> CreateGlobalCallback);
+
+  // Overload to construct a global variable using its constructor's defaults.
+  mlir::cir::GlobalOp getOrInsertGlobal(mlir::Location loc, StringRef Name,
+                                        mlir::Type Ty);
+
+  static mlir::cir::GlobalOp
+  createGlobalOp(CIRGenModule &cgm, mlir::Location loc, StringRef name,
+                 mlir::Type t, bool isConstant = false,
+                 mlir::cir::AddressSpaceAttr addrSpace = {},
+                 mlir::Operation *insertPoint = nullptr,
+                 mlir::cir::GlobalLinkageKind linkage =
+                     mlir::cir::GlobalLinkageKind::ExternalLinkage);
+
+  // FIXME: Hardcoding priority here is gross.
+  void AddGlobalCtor(mlir::cir::FuncOp Ctor, int Priority = 65535);
+  void AddGlobalDtor(mlir::cir::FuncOp Dtor, int Priority = 65535,
+                     bool IsDtorAttrFunc = false);
+
+  /// Return the mlir::Value for the address of the given global variable.
+  /// If Ty is non-null and if the global doesn't exist, then it will be created
+  /// with the specified type instead of whatever the normal requested type
+  /// would be. If IsForDefinition is true, it is guaranteed that an actual
+  /// global with type Ty will be returned, not conversion of a variable with
+  /// the same mangled name but some other type.
+  mlir::Value
+  getAddrOfGlobalVar(const VarDecl *D, mlir::Type Ty = {},
+                     ForDefinition_t IsForDefinition = NotForDefinition);
+
+  /// Return the mlir::GlobalViewAttr for the address of the given global.
+  mlir::cir::GlobalViewAttr
+  getAddrOfGlobalVarAttr(const VarDecl *D, mlir::Type Ty = {},
+                         ForDefinition_t IsForDefinition = NotForDefinition);
+
+  /// Get a reference to the target of VD.
+  mlir::Operation *getWeakRefReference(const ValueDecl *VD);
+
+  CharUnits
+  computeNonVirtualBaseClassOffset(const CXXRecordDecl *DerivedClass,
+                                   CastExpr::path_const_iterator Start,
+                                   CastExpr::path_const_iterator End);
+
+  /// Get the CIR attributes and calling convention to use for a particular
+  /// function type.
+  ///
+  /// \param Name - The function name.
+  /// \param Info - The function type information.
+  /// \param CalleeInfo - The callee information these attributes are being
+  /// constructed for. If valid, the attributes applied to this decl may
+  /// contribute to the function attributes and calling convention.
+  /// \param Attrs [out] - On return, the attribute list to use.
+  void constructAttributeList(StringRef Name, const CIRGenFunctionInfo &Info,
+                              CIRGenCalleeInfo CalleeInfo,
+                              mlir::NamedAttrList &Attrs,
+                              mlir::cir::CallingConv &callingConv,
+                              bool AttrOnCallSite, bool IsThunk);
+
+  /// Helper function for getDefaultFunctionAttributes. Builds a set of function
+  /// attributes which can be simply added to a function.
+  void getTrivialDefaultFunctionAttributes(StringRef name, bool hasOptnone,
+                                           bool attrOnCallSite,
+                                           mlir::NamedAttrList &funcAttrs);
+
+  /// Helper function for constructAttributeList and
+  /// addDefaultFunctionDefinitionAttributes.  Builds a set of function
+  /// attributes to add to a function with the given properties.
+  void getDefaultFunctionAttributes(StringRef name, bool hasOptnone,
+                                    bool attrOnCallSite,
+                                    mlir::NamedAttrList &funcAttrs);
+
+  /// Will return a global variable of the given type. If a variable with a
+  /// different type already exists then a new variable with the right type
+  /// will be created and all uses of the old variable will be replaced with a
+  /// bitcast to the new variable.
+  mlir::cir::GlobalOp createOrReplaceCXXRuntimeVariable(
+      mlir::Location loc, StringRef Name, mlir::Type Ty,
+      mlir::cir::GlobalLinkageKind Linkage, clang::CharUnits Alignment);
+
+  /// Emit any vtables which we deferred and still have a use for.
+  void buildDeferredVTables();
+  bool shouldOpportunisticallyEmitVTables();
+
+  void setDSOLocal(mlir::cir::CIRGlobalValueInterface GV) const;
+
+  /// Return the appropriate linkage for the vtable, VTT, and type information
+  /// of the given class.
+  mlir::cir::GlobalLinkageKind getVTableLinkage(const CXXRecordDecl *RD);
+
+  /// Emit type metadata for the given vtable using the given layout.
+  void buildVTableTypeMetadata(const CXXRecordDecl *RD,
+                               mlir::cir::GlobalOp VTable,
+                               const VTableLayout &VTLayout);
+
+  /// Get the address of the RTTI descriptor for the given type.
+  mlir::Attribute getAddrOfRTTIDescriptor(mlir::Location loc, QualType Ty,
+                                          bool ForEH = false);
+
+  /// TODO(cir): add CIR visibility bits.
+  static mlir::SymbolTable::Visibility getCIRVisibility(Visibility V) {
+    switch (V) {
+    case DefaultVisibility:
+      return mlir::SymbolTable::Visibility::Public;
+    case HiddenVisibility:
+      return mlir::SymbolTable::Visibility::Private;
+    case ProtectedVisibility:
+      llvm_unreachable("NYI");
+    }
+    llvm_unreachable("unknown visibility!");
+  }
+
+  llvm::DenseMap<mlir::Attribute, mlir::cir::GlobalOp> ConstantStringMap;
+
+  /// Return a constant array for the given string.
+  mlir::Attribute getConstantArrayFromStringLiteral(const StringLiteral *E);
+
+  /// Return a global symbol reference to a constant array for the given string
+  /// literal.
+  mlir::cir::GlobalViewAttr
+  getAddrOfConstantStringFromLiteral(const StringLiteral *S,
+                                     StringRef Name = ".str");
+  unsigned StringLiteralCnt = 0;
+
+  unsigned CompoundLitaralCnt = 0;
+  /// Return the unique name for global compound literal
+  std::string createGlobalCompoundLiteralName() {
+    return (Twine(".compoundLiteral.") + Twine(CompoundLitaralCnt++)).str();
+  }
+
+  /// Return the AST address space of the underlying global variable for D, as
+  /// determined by its declaration. Normally this is the same as the address
+  /// space of D's type, but in CUDA, address spaces are associated with
+  /// declarations, not types. If D is nullptr, return the default address
+  /// space for global variable.
+  ///
+  /// For languages without explicit address spaces, if D has default address
+  /// space, target-specific global or constant address space may be returned.
+  LangAS getGlobalVarAddressSpace(const VarDecl *D);
+
+  /// Return the AST address space of constant literal, which is used to emit
+  /// the constant literal as global variable in LLVM IR.
+  /// Note: This is not necessarily the address space of the constant literal
+  /// in AST. For address space agnostic language, e.g. C++, constant literal
+  /// in AST is always in default address space.
+  LangAS getGlobalConstantAddressSpace() const;
+
+  /// Returns the address space for temporary allocations in the language. This
+  /// ensures that the allocated variable's address space matches the
+  /// expectations of the AST, rather than using the target's allocation address
+  /// space, which may lead to type mismatches in other parts of the IR.
+  LangAS getLangTempAllocaAddressSpace() const;
+
+  /// Set attributes which are common to any form of a global definition (alias,
+  /// Objective-C method, function, global variable).
+  ///
+  /// NOTE: This should only be called for definitions.
+  void setCommonAttributes(GlobalDecl GD, mlir::Operation *GV);
+
+  // TODO: this obviously overlaps with
+  const TargetCIRGenInfo &getTargetCIRGenInfo();
+
+  /// Helpers to convert Clang's SourceLocation to a MLIR Location.
+  mlir::Location getLoc(clang::SourceLocation SLoc);
+  mlir::Location getLoc(clang::SourceRange SLoc);
+  mlir::Location getLoc(mlir::Location lhs, mlir::Location rhs);
+
+  /// Helper to convert Clang's alignment to CIR alignment
+  mlir::IntegerAttr getSize(CharUnits size);
+
+  /// Returns whether the given record has public LTO visibility (regardless of
+  /// -lto-whole-program-visibility) and therefore may not participate in
+  /// (single-module) CFI and whole-program vtable optimization.
+  bool AlwaysHasLTOVisibilityPublic(const CXXRecordDecl *RD);
+
+  /// Returns whether the given record has hidden LTO visibility and therefore
+  /// may participate in (single-module) CFI and whole-program vtable
+  /// optimization.
+  bool HasHiddenLTOVisibility(const CXXRecordDecl *RD);
+
+  /// Determine whether an object of this type can be emitted
+  /// as a constant.
+  ///
+  /// If ExcludeCtor is true, the duration when the object's constructor runs
+  /// will not be considered. The caller will need to verify that the object is
+  /// not written to during its construction.
+  /// FIXME: in LLVM codegen path this is part of CGM, which doesn't seem
+  /// like necessary, since (1) it doesn't use CGM at all and (2) is AST type
+  /// query specific.
+  bool isTypeConstant(clang::QualType Ty, bool ExcludeCtor, bool ExcludeDtor);
+
+  /// FIXME: this could likely be a common helper and not necessarily related
+  /// with codegen.
+  /// Return the best known alignment for an unknown pointer to a
+  /// particular class.
+  clang::CharUnits getClassPointerAlignment(const clang::CXXRecordDecl *RD);
+
+  /// FIXME: this could likely be a common helper and not necessarily related
+  /// with codegen.
+  clang::CharUnits
+  getNaturalPointeeTypeAlignment(clang::QualType ty,
+                                 LValueBaseInfo *baseInfo = nullptr,
+                                 TBAAAccessInfo *tbaaInfo = nullptr);
+
+  /// FIXME: this could likely be a common helper and not necessarily related
+  /// with codegen.
+  clang::CharUnits getNaturalTypeAlignment(clang::QualType T,
+                                           LValueBaseInfo *BaseInfo = nullptr,
+                                           TBAAAccessInfo *tbaaInfo = nullptr,
+                                           bool forPointeeType = false);
+
+  /// TODO: Add TBAAAccessInfo
+  clang::CharUnits
+  getDynamicOffsetAlignment(clang::CharUnits actualBaseAlign,
+                            const clang::CXXRecordDecl *baseDecl,
+                            clang::CharUnits expectedTargetAlign);
+
+  mlir::cir::FuncOp getAddrOfCXXStructor(
+      clang::GlobalDecl GD, const CIRGenFunctionInfo *FnInfo = nullptr,
+      mlir::cir::FuncType FnType = nullptr, bool DontDefer = false,
+      ForDefinition_t IsForDefinition = NotForDefinition) {
+
+    return getAddrAndTypeOfCXXStructor(GD, FnInfo, FnType, DontDefer,
+                                       IsForDefinition)
+        .second;
+  }
+
+  /// A queue of (optional) vtables to consider emitting.
+  std::vector<const clang::CXXRecordDecl *> DeferredVTables;
+
+  mlir::Type getVTableComponentType();
+  CIRGenVTables &getVTables() { return VTables; }
+
+  ItaniumVTableContext &getItaniumVTableContext() {
+    return VTables.getItaniumVTableContext();
+  }
+  const ItaniumVTableContext &getItaniumVTableContext() const {
+    return VTables.getItaniumVTableContext();
+  }
+
+  /// getTBAAAccessInfo - Gte TBAA information that describes an access to an
+  /// object of the given type.
+  TBAAAccessInfo getTBAAAccessInfo(QualType accessType);
+
+  /// This contains all the decls which have definitions but which are deferred
+  /// for emission and therefore should only be output if they are actually
+  /// used. If a decl is in this, then it is known to have not been referenced
+  /// yet.
+  std::map<llvm::StringRef, clang::GlobalDecl> DeferredDecls;
+
+  // This is a list of deferred decls which we have seen that *are* actually
+  // referenced. These get code generated when the module is done.
+  std::vector<clang::GlobalDecl> DeferredDeclsToEmit;
+  void addDeferredDeclToEmit(clang::GlobalDecl GD) {
+    DeferredDeclsToEmit.emplace_back(GD);
+  }
+
+  // After HandleTranslation finishes, differently from DeferredDeclsToEmit,
+  // DefaultMethodsToEmit is only called after a set of CIR passes run. See
+  // addDefaultMethodsToEmit usage for examples.
+  std::vector<clang::GlobalDecl> DefaultMethodsToEmit;
+  void addDefaultMethodsToEmit(clang::GlobalDecl GD) {
+    DefaultMethodsToEmit.emplace_back(GD);
+  }
+
+  std::pair<mlir::cir::FuncType, mlir::cir::FuncOp> getAddrAndTypeOfCXXStructor(
+      clang::GlobalDecl GD, const CIRGenFunctionInfo *FnInfo = nullptr,
+      mlir::cir::FuncType FnType = nullptr, bool Dontdefer = false,
+      ForDefinition_t IsForDefinition = NotForDefinition);
+
+  void buildTopLevelDecl(clang::Decl *decl);
+  void buildLinkageSpec(const LinkageSpecDecl *D);
+
+  /// Emit code for a single global function or var decl. Forward declarations
+  /// are emitted lazily.
+  void buildGlobal(clang::GlobalDecl D);
+
+  bool tryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D);
+
+  void buildAliasForGlobal(StringRef mangledName, mlir::Operation *op,
+                           GlobalDecl aliasGD, mlir::cir::FuncOp aliasee,
+                           mlir::cir::GlobalLinkageKind linkage);
+
+  mlir::Type getCIRType(const clang::QualType &type);
+
+  /// Set the visibility for the given global.
+  void setGlobalVisibility(mlir::Operation *Op, const NamedDecl *D) const;
+  void setDSOLocal(mlir::Operation *Op) const;
+  /// Set visibility, dllimport/dllexport and dso_local.
+  /// This must be called after dllimport/dllexport is set.
+  void setGVProperties(mlir::Operation *Op, const NamedDecl *D) const;
+  void setGVPropertiesAux(mlir::Operation *Op, const NamedDecl *D) const;
+
+  /// Set the TLS mode for the given global Op for the thread-local
+  /// variable declaration D.
+  void setTLSMode(mlir::Operation *Op, const VarDecl &D) const;
+
+  /// Get TLS mode from CodeGenOptions.
+  mlir::cir::TLS_Model GetDefaultCIRTLSModel() const;
+
+  /// Replace the present global `Old` with the given global `New`. Their symbol
+  /// names must match; their types can be different. Usages of the old global
+  /// will be automatically updated if their types mismatch.
+  ///
+  /// This function will erase the old global. This function will NOT insert the
+  /// new global into the module.
+  void replaceGlobal(mlir::cir::GlobalOp Old, mlir::cir::GlobalOp New);
+
+  /// Determine whether the definition must be emitted; if this returns \c
+  /// false, the definition can be emitted lazily if it's used.
+  bool MustBeEmitted(const clang::ValueDecl *D);
+
+  /// Whether this function's return type has no side effects, and thus may be
+  /// trivially discared if it is unused.
+  bool MayDropFunctionReturn(const clang::ASTContext &Context,
+                             clang::QualType ReturnType);
+
+  bool isInNoSanitizeList(clang::SanitizerMask Kind, mlir::cir::FuncOp Fn,
+                          clang::SourceLocation) const;
+
+  /// Determine whether the definition can be emitted eagerly, or should be
+  /// delayed until the end of the translation unit. This is relevant for
+  /// definitions whose linkage can change, e.g. implicit function instantions
+  /// which may later be explicitly instantiated.
+  bool MayBeEmittedEagerly(const clang::ValueDecl *D);
+
+  bool verifyModule();
+
+  /// Return the address of the given function. If Ty is non-null, then this
+  /// function will use the specified type if it has to create it.
+  // TODO: this is a bit weird as `GetAddr` given we give back a FuncOp?
+  mlir::cir::FuncOp
+  GetAddrOfFunction(clang::GlobalDecl GD, mlir::Type Ty = nullptr,
+                    bool ForVTable = false, bool Dontdefer = false,
+                    ForDefinition_t IsForDefinition = NotForDefinition);
+
+  mlir::Operation *
+  GetAddrOfGlobal(clang::GlobalDecl GD,
+                  ForDefinition_t IsForDefinition = NotForDefinition);
+
+  // Return whether RTTI information should be emitted for this target.
+  bool shouldEmitRTTI(bool ForEH = false) {
+    return (ForEH || getLangOpts().RTTI) && !getLangOpts().CUDAIsDevice &&
+           !(getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice &&
+             getTriple().isNVPTX());
+  }
+
+  // C++ related functions.
+  void buildDeclContext(const DeclContext *DC);
+
+  /// Return the result of value-initializing the given type, i.e. a null
+  /// expression of the given type.  This is usually, but not always, an LLVM
+  /// null constant.
+  mlir::Value buildNullConstant(QualType T, mlir::Location loc);
+
+  mlir::Value buildMemberPointerConstant(const UnaryOperator *E);
+
+  llvm::StringRef getMangledName(clang::GlobalDecl GD);
+
+  void buildTentativeDefinition(const VarDecl *D);
+
+  // Make sure that this type is translated.
+  void UpdateCompletedType(const clang::TagDecl *TD);
+
+  /// Set function attributes for a function declaration.
+  void setFunctionAttributes(GlobalDecl GD, mlir::cir::FuncOp F,
+                             bool IsIncompleteFunction, bool IsThunk);
+
+  /// Set the CIR function attributes (sext, zext, etc).
+  void setCIRFunctionAttributes(GlobalDecl GD, const CIRGenFunctionInfo &info,
+                                mlir::cir::FuncOp func, bool isThunk);
+
+  /// Set the CIR function attributes which only apply to a function
+  /// definition.
+  void setCIRFunctionAttributesForDefinition(const Decl *decl,
+                                             mlir::cir::FuncOp func);
+
+  void buildGlobalDefinition(clang::GlobalDecl D,
+                             mlir::Operation *Op = nullptr);
+  void buildGlobalFunctionDefinition(clang::GlobalDecl D, mlir::Operation *Op);
+  void buildGlobalVarDefinition(const clang::VarDecl *D,
+                                bool IsTentative = false);
+
+  /// Emit the function that initializes the specified global
+  void buildCXXGlobalVarDeclInit(const VarDecl *D, mlir::cir::GlobalOp Addr,
+                                 bool PerformInit);
+
+  void buildCXXGlobalVarDeclInitFunc(const VarDecl *D, mlir::cir::GlobalOp Addr,
+                                     bool PerformInit);
+
+  void addDeferredVTable(const CXXRecordDecl *RD) {
+    DeferredVTables.push_back(RD);
+  }
+
+  /// Stored a deferred empty coverage mapping for an unused and thus
+  /// uninstrumented top level declaration.
+  void AddDeferredUnusedCoverageMapping(clang::Decl *D);
+
+  std::nullptr_t getModuleDebugInfo() { return nullptr; }
+
+  /// Emit any needed decls for which code generation was deferred.
+  void buildDeferred(unsigned recursionLimit);
+
+  /// Helper for `buildDeferred` to apply actual codegen.
+  void buildGlobalDecl(clang::GlobalDecl &D);
+
+  /// Build default methods not emitted before this point.
+  void buildDefaultMethods();
+
+  const llvm::Triple &getTriple() const { return target.getTriple(); }
+
+  // Finalize CIR code generation.
+  void Release();
+
+  bool shouldEmitFunction(clang::GlobalDecl GD);
+
+  /// Returns a pointer to a global variable representing a temporary with
+  /// static or thread storage duration.
+  mlir::Operation *
+  getAddrOfGlobalTemporary(const MaterializeTemporaryExpr *expr,
+                           const Expr *init);
+
+  // Produce code for this constructor/destructor. This method doesn't try to
+  // apply any ABI rules about which other constructors/destructors are needed
+  // or if they are alias to each other.
+  mlir::cir::FuncOp codegenCXXStructor(clang::GlobalDecl GD);
+
+  // Produce code for this constructor/destructor for global initialzation.
+  void codegenGlobalInitCxxStructor(const clang::VarDecl *D,
+                                    mlir::cir::GlobalOp Addr, bool NeedsCtor,
+                                    bool NeedsDtor, bool isCstStorage);
+
+  bool lookupRepresentativeDecl(llvm::StringRef MangledName,
+                                clang::GlobalDecl &Result) const;
+
+  bool supportsCOMDAT() const;
+  void maybeSetTrivialComdat(const clang::Decl &d, mlir::Operation *op);
+
+  void emitError(const llvm::Twine &message) { theModule.emitError(message); }
+
+  /// -------
+  /// Visibility and Linkage
+  /// -------
+
+  static void setInitializer(mlir::cir::GlobalOp &op, mlir::Attribute value);
+  static mlir::SymbolTable::Visibility
+  getMLIRVisibilityFromCIRLinkage(mlir::cir::GlobalLinkageKind GLK);
+  static mlir::cir::VisibilityKind getGlobalVisibilityKindFromClangVisibility(
+      clang::VisibilityAttr::VisibilityType visibility);
+  mlir::cir::VisibilityAttr getGlobalVisibilityAttrFromDecl(const Decl *decl);
+  static mlir::SymbolTable::Visibility
+  getMLIRVisibility(mlir::cir::GlobalOp op);
+  mlir::cir::GlobalLinkageKind getFunctionLinkage(GlobalDecl GD);
+  mlir::cir::GlobalLinkageKind
+  getCIRLinkageForDeclarator(const DeclaratorDecl *D, GVALinkage Linkage,
+                             bool IsConstantVariable);
+  void setFunctionLinkage(GlobalDecl GD, mlir::cir::FuncOp f) {
+    auto L = getFunctionLinkage(GD);
+    f.setLinkageAttr(
+        mlir::cir::GlobalLinkageKindAttr::get(builder.getContext(), L));
+    mlir::SymbolTable::setSymbolVisibility(f,
+                                           getMLIRVisibilityFromCIRLinkage(L));
+  }
+
+  mlir::cir::GlobalLinkageKind getCIRLinkageVarDefinition(const VarDecl *VD,
+                                                          bool IsConstant);
+
+  void addReplacement(StringRef Name, mlir::Operation *Op);
+
+  mlir::Location getLocForFunction(const clang::FunctionDecl *FD);
+
+  void ReplaceUsesOfNonProtoTypeWithRealFunction(mlir::Operation *Old,
+                                                 mlir::cir::FuncOp NewFn);
+
+  // TODO: CodeGen also passes an AttributeList here. We'll have to match that
+  // in CIR
+  mlir::cir::FuncOp
+  GetOrCreateCIRFunction(llvm::StringRef MangledName, mlir::Type Ty,
+                         clang::GlobalDecl D, bool ForVTable,
+                         bool DontDefer = false, bool IsThunk = false,
+                         ForDefinition_t IsForDefinition = NotForDefinition,
+                         mlir::ArrayAttr ExtraAttrs = {});
+  // Effectively create the CIR instruction, properly handling insertion
+  // points.
+  mlir::cir::FuncOp createCIRFunction(mlir::Location loc, StringRef name,
+                                      mlir::cir::FuncType Ty,
+                                      const clang::FunctionDecl *FD);
+
+  mlir::cir::FuncOp createRuntimeFunction(mlir::cir::FuncType Ty,
+                                          StringRef Name, mlir::ArrayAttr = {},
+                                          bool Local = false,
+                                          bool AssumeConvergent = false);
+
+  /// Emit type info if type of an expression is a variably modified
+  /// type. Also emit proper debug info for cast types.
+  void buildExplicitCastExprType(const ExplicitCastExpr *E,
+                                 CIRGenFunction *CGF = nullptr);
+
+  static constexpr const char *builtinCoroId = "__builtin_coro_id";
+  static constexpr const char *builtinCoroAlloc = "__builtin_coro_alloc";
+  static constexpr const char *builtinCoroBegin = "__builtin_coro_begin";
+  static constexpr const char *builtinCoroEnd = "__builtin_coro_end";
+
+  /// Given a builtin id for a function like "__builtin_fabsf", return a
+  /// Function* for "fabsf".
+  mlir::cir::FuncOp getBuiltinLibFunction(const FunctionDecl *FD,
+                                          unsigned BuiltinID);
+
+  /// Emit a general error that something can't be done.
+  void Error(SourceLocation loc, StringRef error);
+
+  /// Print out an error that codegen doesn't support the specified stmt yet.
+  void ErrorUnsupported(const Stmt *S, const char *Type);
+
+  /// Print out an error that codegen doesn't support the specified decl yet.
+  void ErrorUnsupported(const Decl *D, const char *Type);
+
+  /// Return a reference to the configured OpenMP runtime.
+  CIRGenOpenCLRuntime &getOpenCLRuntime() {
+    assert(openCLRuntime != nullptr);
+    return *openCLRuntime;
+  }
+
+  void createOpenCLRuntime() {
+    openCLRuntime.reset(new CIRGenOpenCLRuntime(*this));
+  }
+
+  /// Return a reference to the configured OpenMP runtime.
+  CIRGenOpenMPRuntime &getOpenMPRuntime() {
+    assert(openMPRuntime != nullptr);
+    return *openMPRuntime;
+  }
+
+  /// OpenCL v1.2 s5.6.4.6 allows the compiler to store kernel argument
+  /// information in the program executable. The argument information stored
+  /// includes the argument name, its type, the address and access qualifiers
+  /// used. This helper can be used to generate metadata for source code kernel
+  /// function as well as generated implicitly kernels. If a kernel is generated
+  /// implicitly null value has to be passed to the last two parameters,
+  /// otherwise all parameters must have valid non-null values.
+  /// \param FN is a pointer to IR function being generated.
+  /// \param FD is a pointer to function declaration if any.
+  /// \param CGF is a pointer to CIRGenFunction that generates this function.
+  void genKernelArgMetadata(mlir::cir::FuncOp FN,
+                            const FunctionDecl *FD = nullptr,
+                            CIRGenFunction *CGF = nullptr);
+
+  /// Emits OpenCL specific Metadata e.g. OpenCL version.
+  void buildOpenCLMetadata();
+
+private:
+  // An ordered map of canonical GlobalDecls to their mangled names.
+  llvm::MapVector<clang::GlobalDecl, llvm::StringRef> MangledDeclNames;
+  llvm::StringMap<clang::GlobalDecl, llvm::BumpPtrAllocator> Manglings;
+
+  // FIXME: should we use llvm::TrackingVH<mlir::Operation> here?
+  typedef llvm::StringMap<mlir::Operation *> ReplacementsTy;
+  ReplacementsTy Replacements;
+  /// Call replaceAllUsesWith on all pairs in Replacements.
+  void applyReplacements();
+
+  void setNonAliasAttributes(GlobalDecl GD, mlir::Operation *GV);
+  /// Map source language used to a CIR attribute.
+  mlir::cir::SourceLanguage getCIRSourceLanguage();
+
+  /// Emit all the global annotations.
+  /// This actually only emits annotations for deffered declarations of
+  /// functions, because global variables need no deffred emission.
+  void buildGlobalAnnotations();
+
+  /// Emit additional args of the annotation.
+  mlir::ArrayAttr buildAnnotationArgs(clang::AnnotateAttr *attr);
+
+  /// Create cir::AnnotationAttr which contains the annotation
+  /// information for a given GlobalValue. Notice that a GlobalValue could
+  /// have multiple annotations, and this function creates attribute for
+  /// one of them.
+  mlir::cir::AnnotationAttr buildAnnotateAttr(clang::AnnotateAttr *aa);
+
+  /// Add global annotations for a global value.
+  /// Those annotations are emitted during lowering to the LLVM code.
+  void addGlobalAnnotations(const ValueDecl *d, mlir::Operation *gv);
+};
+} // namespace cir
+
+#endif // LLVM_CLANG_LIB_CODEGEN_CIRGENMODULE_H
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenCL.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenCL.cpp
new file mode 100644
index 000000000000..6c2e7542fbbb
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenCL.cpp
@@ -0,0 +1,265 @@
+//===- CIRGenOpenCL.cpp - OpenCL-specific logic for CIR generation --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code dealing with OpenCL-specific logic of CIR generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+
+using namespace cir;
+using namespace clang;
+
+// Returns the address space id that should be produced to the
+// kernel_arg_addr_space metadata. This is always fixed to the ids
+// as specified in the SPIR 2.0 specification in order to differentiate
+// for example in clGetKernelArgInfo() implementation between the address
+// spaces with targets without unique mapping to the OpenCL address spaces
+// (basically all single AS CPUs).
+static unsigned ArgInfoAddressSpace(LangAS AS) {
+  switch (AS) {
+  case LangAS::opencl_global:
+    return 1;
+  case LangAS::opencl_constant:
+    return 2;
+  case LangAS::opencl_local:
+    return 3;
+  case LangAS::opencl_generic:
+    return 4; // Not in SPIR 2.0 specs.
+  case LangAS::opencl_global_device:
+    return 5;
+  case LangAS::opencl_global_host:
+    return 6;
+  default:
+    return 0; // Assume private.
+  }
+}
+
+void CIRGenModule::genKernelArgMetadata(mlir::cir::FuncOp Fn,
+                                        const FunctionDecl *FD,
+                                        CIRGenFunction *CGF) {
+  assert(((FD && CGF) || (!FD && !CGF)) &&
+         "Incorrect use - FD and CGF should either be both null or not!");
+  // Create MDNodes that represent the kernel arg metadata.
+  // Each MDNode is a list in the form of "key", N number of values which is
+  // the same number of values as their are kernel arguments.
+
+  const PrintingPolicy &Policy = getASTContext().getPrintingPolicy();
+
+  // Integer values for the kernel argument address space qualifiers.
+  SmallVector<int32_t, 8> addressQuals;
+
+  // Attrs for the kernel argument access qualifiers (images only).
+  SmallVector<mlir::Attribute, 8> accessQuals;
+
+  // Attrs for the kernel argument type names.
+  SmallVector<mlir::Attribute, 8> argTypeNames;
+
+  // Attrs for the kernel argument base type names.
+  SmallVector<mlir::Attribute, 8> argBaseTypeNames;
+
+  // Attrs for the kernel argument type qualifiers.
+  SmallVector<mlir::Attribute, 8> argTypeQuals;
+
+  // Attrs for the kernel argument names.
+  SmallVector<mlir::Attribute, 8> argNames;
+
+  // OpenCL image and pipe types require special treatments for some metadata
+  assert(!MissingFeatures::openCLBuiltinTypes());
+
+  if (FD && CGF)
+    for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) {
+      const ParmVarDecl *parm = FD->getParamDecl(i);
+      // Get argument name.
+      argNames.push_back(builder.getStringAttr(parm->getName()));
+
+      if (!getLangOpts().OpenCL)
+        continue;
+      QualType ty = parm->getType();
+      std::string typeQuals;
+
+      // Get image and pipe access qualifier:
+      if (ty->isImageType() || ty->isPipeType()) {
+        llvm_unreachable("NYI");
+      } else
+        accessQuals.push_back(builder.getStringAttr("none"));
+
+      auto getTypeSpelling = [&](QualType Ty) {
+        auto typeName = Ty.getUnqualifiedType().getAsString(Policy);
+
+        if (Ty.isCanonical()) {
+          StringRef typeNameRef = typeName;
+          // Turn "unsigned type" to "utype"
+          if (typeNameRef.consume_front("unsigned "))
+            return std::string("u") + typeNameRef.str();
+          if (typeNameRef.consume_front("signed "))
+            return typeNameRef.str();
+        }
+
+        return typeName;
+      };
+
+      if (ty->isPointerType()) {
+        QualType pointeeTy = ty->getPointeeType();
+
+        // Get address qualifier.
+        addressQuals.push_back(
+            ArgInfoAddressSpace(pointeeTy.getAddressSpace()));
+
+        // Get argument type name.
+        std::string typeName = getTypeSpelling(pointeeTy) + "*";
+        std::string baseTypeName =
+            getTypeSpelling(pointeeTy.getCanonicalType()) + "*";
+        argTypeNames.push_back(builder.getStringAttr(typeName));
+        argBaseTypeNames.push_back(builder.getStringAttr(baseTypeName));
+
+        // Get argument type qualifiers:
+        if (ty.isRestrictQualified())
+          typeQuals = "restrict";
+        if (pointeeTy.isConstQualified() ||
+            (pointeeTy.getAddressSpace() == LangAS::opencl_constant))
+          typeQuals += typeQuals.empty() ? "const" : " const";
+        if (pointeeTy.isVolatileQualified())
+          typeQuals += typeQuals.empty() ? "volatile" : " volatile";
+      } else {
+        uint32_t AddrSpc = 0;
+        bool isPipe = ty->isPipeType();
+        if (ty->isImageType() || isPipe)
+          llvm_unreachable("NYI");
+
+        addressQuals.push_back(AddrSpc);
+
+        // Get argument type name.
+        ty = isPipe ? ty->castAs<PipeType>()->getElementType() : ty;
+        std::string typeName = getTypeSpelling(ty);
+        std::string baseTypeName = getTypeSpelling(ty.getCanonicalType());
+
+        // Remove access qualifiers on images
+        // (as they are inseparable from type in clang implementation,
+        // but OpenCL spec provides a special query to get access qualifier
+        // via clGetKernelArgInfo with CL_KERNEL_ARG_ACCESS_QUALIFIER):
+        if (ty->isImageType()) {
+          llvm_unreachable("NYI");
+        }
+
+        argTypeNames.push_back(builder.getStringAttr(typeName));
+        argBaseTypeNames.push_back(builder.getStringAttr(baseTypeName));
+
+        if (isPipe)
+          llvm_unreachable("NYI");
+      }
+      argTypeQuals.push_back(builder.getStringAttr(typeQuals));
+    }
+
+  bool shouldEmitArgName = getCodeGenOpts().EmitOpenCLArgMetadata ||
+                           getCodeGenOpts().HIPSaveKernelArgName;
+
+  if (getLangOpts().OpenCL) {
+    // The kernel arg name is emitted only when `-cl-kernel-arg-info` is on,
+    // since it is only used to support `clGetKernelArgInfo` which requires
+    // `-cl-kernel-arg-info` to work. The other metadata are mandatory because
+    // they are necessary for OpenCL runtime to set kernel argument.
+    mlir::ArrayAttr resArgNames = {};
+    if (shouldEmitArgName)
+      resArgNames = builder.getArrayAttr(argNames);
+
+    // Update the function's extra attributes with the kernel argument metadata.
+    auto value = mlir::cir::OpenCLKernelArgMetadataAttr::get(
+        Fn.getContext(), builder.getI32ArrayAttr(addressQuals),
+        builder.getArrayAttr(accessQuals), builder.getArrayAttr(argTypeNames),
+        builder.getArrayAttr(argBaseTypeNames),
+        builder.getArrayAttr(argTypeQuals), resArgNames);
+    mlir::NamedAttrList items{Fn.getExtraAttrs().getElements().getValue()};
+    auto oldValue = items.set(value.getMnemonic(), value);
+    if (oldValue != value) {
+      Fn.setExtraAttrsAttr(mlir::cir::ExtraFuncAttributesAttr::get(
+          builder.getContext(), builder.getDictionaryAttr(items)));
+    }
+  } else {
+    if (shouldEmitArgName)
+      llvm_unreachable("NYI HIPSaveKernelArgName");
+  }
+}
+
+void CIRGenFunction::buildKernelMetadata(const FunctionDecl *FD,
+                                         mlir::cir::FuncOp Fn) {
+  if (!FD->hasAttr<OpenCLKernelAttr>() && !FD->hasAttr<CUDAGlobalAttr>())
+    return;
+
+  CGM.genKernelArgMetadata(Fn, FD, this);
+
+  if (!getLangOpts().OpenCL)
+    return;
+
+  using mlir::cir::OpenCLKernelMetadataAttr;
+
+  mlir::ArrayAttr workGroupSizeHintAttr, reqdWorkGroupSizeAttr;
+  mlir::TypeAttr vecTypeHintAttr;
+  std::optional<bool> vecTypeHintSignedness;
+  mlir::IntegerAttr intelReqdSubGroupSizeAttr;
+
+  if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
+    mlir::Type typeHintValue = getTypes().ConvertType(A->getTypeHint());
+    vecTypeHintAttr = mlir::TypeAttr::get(typeHintValue);
+    vecTypeHintSignedness =
+        OpenCLKernelMetadataAttr::isSignedHint(typeHintValue);
+  }
+
+  if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) {
+    workGroupSizeHintAttr = builder.getI32ArrayAttr({
+        static_cast<int32_t>(A->getXDim()),
+        static_cast<int32_t>(A->getYDim()),
+        static_cast<int32_t>(A->getZDim()),
+    });
+  }
+
+  if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
+    reqdWorkGroupSizeAttr = builder.getI32ArrayAttr({
+        static_cast<int32_t>(A->getXDim()),
+        static_cast<int32_t>(A->getYDim()),
+        static_cast<int32_t>(A->getZDim()),
+    });
+  }
+
+  if (const OpenCLIntelReqdSubGroupSizeAttr *A =
+          FD->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
+    intelReqdSubGroupSizeAttr = builder.getI32IntegerAttr(A->getSubGroupSize());
+  }
+
+  // Skip the metadata attr if no hints are present.
+  if (!vecTypeHintAttr && !workGroupSizeHintAttr && !reqdWorkGroupSizeAttr &&
+      !intelReqdSubGroupSizeAttr)
+    return;
+
+  // Append the kernel metadata to the extra attributes dictionary.
+  mlir::NamedAttrList attrs;
+  attrs.append(Fn.getExtraAttrs().getElements());
+
+  auto kernelMetadataAttr = OpenCLKernelMetadataAttr::get(
+      builder.getContext(), workGroupSizeHintAttr, reqdWorkGroupSizeAttr,
+      vecTypeHintAttr, vecTypeHintSignedness, intelReqdSubGroupSizeAttr);
+  attrs.append(kernelMetadataAttr.getMnemonic(), kernelMetadataAttr);
+
+  Fn.setExtraAttrsAttr(mlir::cir::ExtraFuncAttributesAttr::get(
+      builder.getContext(), attrs.getDictionary(builder.getContext())));
+}
+
+void CIRGenModule::buildOpenCLMetadata() {
+  // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
+  // opencl.ocl.version named metadata node.
+  // C++ for OpenCL has a distinct mapping for versions compatibile with OpenCL.
+  unsigned version = langOpts.getOpenCLCompatibleVersion();
+  unsigned major = version / 100;
+  unsigned minor = (version % 100) / 10;
+
+  auto clVersionAttr =
+      mlir::cir::OpenCLVersionAttr::get(builder.getContext(), major, minor);
+
+  theModule->setAttr("cir.cl.version", clVersionAttr);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenCLRuntime.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenCLRuntime.cpp
new file mode 100644
index 000000000000..863caf8629d2
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenCLRuntime.cpp
@@ -0,0 +1,29 @@
+//===-- CIRGenOpenCLRuntime.cpp - Interface to OpenCL Runtimes ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides an abstract class for OpenCL CIR generation. Concrete
+// subclasses of this implement code generation for specific OpenCL
+// runtime libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenOpenCLRuntime.h"
+#include "CIRGenFunction.h"
+
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+
+using namespace clang;
+using namespace cir;
+
+CIRGenOpenCLRuntime::~CIRGenOpenCLRuntime() {}
+
+void CIRGenOpenCLRuntime::buildWorkGroupLocalVarDecl(CIRGenFunction &CGF,
+                                                     const VarDecl &D) {
+  return CGF.buildStaticVarDecl(D,
+                                mlir::cir::GlobalLinkageKind::InternalLinkage);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenCLRuntime.h b/clang/lib/CIR/CodeGen/CIRGenOpenCLRuntime.h
new file mode 100644
index 000000000000..891b5bb5fb79
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenCLRuntime.h
@@ -0,0 +1,46 @@
+//===-- CIRGenOpenCLRuntime.h - Interface to OpenCL Runtimes -----*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides an abstract class for OpenCL CIR generation. Concrete
+// subclasses of this implement code generation for specific OpenCL
+// runtime libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CIRGENOPENCLRUNTIME_H
+#define LLVM_CLANG_LIB_CIR_CIRGENOPENCLRUNTIME_H
+
+namespace clang {
+
+class VarDecl;
+
+} // namespace clang
+
+namespace cir {
+
+class CIRGenFunction;
+class CIRGenModule;
+
+class CIRGenOpenCLRuntime {
+protected:
+  CIRGenModule &CGM;
+
+public:
+  CIRGenOpenCLRuntime(CIRGenModule &CGM) : CGM(CGM) {}
+  virtual ~CIRGenOpenCLRuntime();
+
+  /// Emit the IR required for a work-group-local variable declaration, and add
+  /// an entry to CGF's LocalDeclMap for D.  The base class does this using
+  /// CIRGenFunction::EmitStaticVarDecl to emit an internal global for D.
+  virtual void buildWorkGroupLocalVarDecl(CIRGenFunction &CGF,
+                                          const clang::VarDecl &D);
+};
+
+} // namespace cir
+
+#endif // LLVM_CLANG_LIB_CIR_CIRGENOPENCLRUNTIME_H
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenMPRuntime.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenMPRuntime.cpp
new file mode 100644
index 000000000000..337ecc0820df
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenMPRuntime.cpp
@@ -0,0 +1,106 @@
+//===--- CIRGenStmtOpenMP.cpp - Interface to OpenMP Runtimes --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a class for OpenMP runtime MLIR code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenOpenMPRuntime.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+
+using namespace cir;
+using namespace clang;
+
+CIRGenOpenMPRuntime::CIRGenOpenMPRuntime(CIRGenModule &CGM) : CGM(CGM) {}
+
+Address CIRGenOpenMPRuntime::getAddressOfLocalVariable(CIRGenFunction &CGF,
+                                                       const VarDecl *VD) {
+  assert(!MissingFeatures::openMPRuntime());
+  return Address::invalid();
+}
+
+void CIRGenOpenMPRuntime::checkAndEmitLastprivateConditional(
+    CIRGenFunction &CGF, const Expr *LHS) {
+  assert(!MissingFeatures::openMPRuntime());
+  return;
+}
+
+void CIRGenOpenMPRuntime::registerTargetGlobalVariable(
+    const clang::VarDecl *VD, mlir::cir::GlobalOp globalOp) {
+  assert(!MissingFeatures::openMPRuntime());
+  return;
+}
+
+void CIRGenOpenMPRuntime::emitDeferredTargetDecls() const {
+  assert(!MissingFeatures::openMPRuntime());
+  return;
+}
+
+void CIRGenOpenMPRuntime::emitFunctionProlog(CIRGenFunction &CGF,
+                                             const clang::Decl *D) {
+  assert(!MissingFeatures::openMPRuntime());
+  return;
+}
+
+bool CIRGenOpenMPRuntime::emitTargetGlobal(clang::GlobalDecl &GD) {
+  assert(!MissingFeatures::openMPRuntime());
+  return false;
+}
+
+void CIRGenOpenMPRuntime::emitTaskWaitCall(CIRGenBuilderTy &builder,
+                                           CIRGenFunction &CGF,
+                                           mlir::Location Loc,
+                                           const OMPTaskDataTy &Data) {
+
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
+    // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
+    // TODO(cir): This could change in the near future when OpenMP 5.0 gets
+    // supported by MLIR
+    builder.create<mlir::omp::TaskwaitOp>(Loc);
+  } else {
+    llvm_unreachable("NYI");
+  }
+  assert(!MissingFeatures::openMPRegionInfo());
+}
+
+void CIRGenOpenMPRuntime::emitBarrierCall(CIRGenBuilderTy &builder,
+                                          CIRGenFunction &CGF,
+                                          mlir::Location Loc) {
+
+  assert(!MissingFeatures::openMPRegionInfo());
+
+  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
+    builder.create<mlir::omp::BarrierOp>(Loc);
+    return;
+  }
+
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  llvm_unreachable("NYI");
+}
+
+void CIRGenOpenMPRuntime::emitTaskyieldCall(CIRGenBuilderTy &builder,
+                                            CIRGenFunction &CGF,
+                                            mlir::Location Loc) {
+
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
+    builder.create<mlir::omp::TaskyieldOp>(Loc);
+  } else {
+    llvm_unreachable("NYI");
+  }
+
+  assert(!MissingFeatures::openMPRegionInfo());
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenMPRuntime.h b/clang/lib/CIR/CodeGen/CIRGenOpenMPRuntime.h
new file mode 100644
index 000000000000..15a47eddd58c
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenMPRuntime.h
@@ -0,0 +1,113 @@
+//===--- CIRGenOpenMPRuntime.h - Interface to OpenMP Runtimes -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a class for OpenMP runtime MLIR code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENOPENMPRUNTIME_H
+#define LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENOPENMPRUNTIME_H
+
+#include "CIRGenBuilder.h"
+#include "CIRGenValue.h"
+
+#include "clang/AST/Redeclarable.h"
+#include "clang/Basic/OpenMPKinds.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+
+#include "llvm/Support/ErrorHandling.h"
+
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/Location.h"
+
+#include "clang/CIR/MissingFeatures.h"
+
+namespace clang {
+class Decl;
+class Expr;
+class GlobalDecl;
+class VarDecl;
+} // namespace clang
+
+namespace cir {
+class CIRGenModule;
+class CIRGenFunction;
+
+struct OMPTaskDataTy final {
+  struct DependData {
+    clang::OpenMPDependClauseKind DepKind = clang::OMPC_DEPEND_unknown;
+    const clang::Expr *IteratorExpr = nullptr;
+    llvm::SmallVector<const clang::Expr *, 4> DepExprs;
+    explicit DependData() = default;
+    DependData(clang::OpenMPDependClauseKind DepKind,
+               const clang::Expr *IteratorExpr)
+        : DepKind(DepKind), IteratorExpr(IteratorExpr) {}
+  };
+  llvm::SmallVector<DependData, 4> Dependences;
+  bool HasNowaitClause = false;
+};
+
+class CIRGenOpenMPRuntime {
+public:
+  explicit CIRGenOpenMPRuntime(CIRGenModule &CGM);
+  virtual ~CIRGenOpenMPRuntime() {}
+
+  /// Gets the OpenMP-specific address of the local variable.
+  virtual Address getAddressOfLocalVariable(CIRGenFunction &CGF,
+                                            const clang::VarDecl *VD);
+
+  /// Checks if the provided \p LVal is lastprivate conditional and emits the
+  /// code to update the value of the original variable.
+  /// \code
+  /// lastprivate(conditional: a)
+  /// ...
+  /// <type> a;
+  /// lp_a = ...;
+  /// #pragma omp critical(a)
+  /// if (last_iv_a <= iv) {
+  ///   last_iv_a = iv;
+  ///   global_a = lp_a;
+  /// }
+  /// \endcode
+  virtual void checkAndEmitLastprivateConditional(CIRGenFunction &CGF,
+                                                  const clang::Expr *LHS);
+
+  /// Checks if the provided global decl \a GD is a declare target variable and
+  /// registers it when emitting code for the host.
+  virtual void registerTargetGlobalVariable(const clang::VarDecl *VD,
+                                            mlir::cir::GlobalOp globalOp);
+
+  /// Emit deferred declare target variables marked for deferred emission.
+  void emitDeferredTargetDecls() const;
+
+  /// Emits OpenMP-specific function prolog.
+  /// Required for device constructs.
+  virtual void emitFunctionProlog(CIRGenFunction &CGF, const clang::Decl *D);
+
+  /// Emit the global \a GD if it is meaningful for the target. Returns
+  /// if it was emitted successfully.
+  /// \param GD Global to scan.
+  virtual bool emitTargetGlobal(clang::GlobalDecl &D);
+
+  /// Emit code for 'taskwait' directive
+  virtual void emitTaskWaitCall(CIRGenBuilderTy &builder, CIRGenFunction &CGF,
+                                mlir::Location Loc, const OMPTaskDataTy &Data);
+
+  virtual void emitBarrierCall(CIRGenBuilderTy &builder, CIRGenFunction &CGF,
+                               mlir::Location Loc);
+
+  virtual void emitTaskyieldCall(CIRGenBuilderTy &builder, CIRGenFunction &CGF,
+                                 mlir::Location Loc);
+
+protected:
+  CIRGenModule &CGM;
+};
+} // namespace cir
+
+#endif // LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENOPENMPRUNTIME_H
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
new file mode 100644
index 000000000000..16a8a1e2894e
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -0,0 +1,210 @@
+//===--- CIRGenRecordLayout.h - CIR Record Layout Information ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CIRGENRECORDLAYOUT_H
+#define LLVM_CLANG_LIB_CIR_CIRGENRECORDLAYOUT_H
+
+#include "clang/AST/Decl.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace cir {
+
+/// Structure with information about how a bitfield should be accessed. This is
+/// very similar to what LLVM codegen does, once CIR evolves it's possible we
+/// can use a more higher level representation.
+/// TODO(cir): the comment below is extracted from LLVM, build a CIR version of
+/// this.
+///
+/// Often we layout a sequence of bitfields as a contiguous sequence of bits.
+/// When the AST record layout does this, we represent it in the LLVM IR's type
+/// as either a sequence of i8 members or a byte array to reserve the number of
+/// bytes touched without forcing any particular alignment beyond the basic
+/// character alignment.
+///
+/// Then accessing a particular bitfield involves converting this byte array
+/// into a single integer of that size (i24 or i40 -- may not be power-of-two
+/// size), loading it, and shifting and masking to extract the particular
+/// subsequence of bits which make up that particular bitfield. This structure
+/// encodes the information used to construct the extraction code sequences.
+/// The CIRGenRecordLayout also has a field index which encodes which
+/// byte-sequence this bitfield falls within. Let's assume the following C
+/// struct:
+///
+///   struct S {
+///     char a, b, c;
+///     unsigned bits : 3;
+///     unsigned more_bits : 4;
+///     unsigned still_more_bits : 7;
+///   };
+///
+/// This will end up as the following LLVM type. The first array is the
+/// bitfield, and the second is the padding out to a 4-byte alignment.
+///
+///   %t = type { i8, i8, i8, i8, i8, [3 x i8] }
+///
+/// When generating code to access more_bits, we'll generate something
+/// essentially like this:
+///
+///   define i32 @foo(%t* %base) {
+///     %0 = gep %t* %base, i32 0, i32 3
+///     %2 = load i8* %1
+///     %3 = lshr i8 %2, 3
+///     %4 = and i8 %3, 15
+///     %5 = zext i8 %4 to i32
+///     ret i32 %i
+///   }
+///
+struct CIRGenBitFieldInfo {
+  /// The offset within a contiguous run of bitfields that are represented as
+  /// a single "field" within the LLVM struct type. This offset is in bits.
+  unsigned Offset : 16;
+
+  /// The total size of the bit-field, in bits.
+  unsigned Size : 15;
+
+  /// Whether the bit-field is signed.
+  unsigned IsSigned : 1;
+
+  /// The storage size in bits which should be used when accessing this
+  /// bitfield.
+  unsigned StorageSize;
+
+  /// The offset of the bitfield storage from the start of the struct.
+  clang::CharUnits StorageOffset;
+
+  /// The offset within a contiguous run of bitfields that are represented as a
+  /// single "field" within the LLVM struct type, taking into account the AAPCS
+  /// rules for volatile bitfields. This offset is in bits.
+  unsigned VolatileOffset : 16;
+
+  /// The storage size in bits which should be used when accessing this
+  /// bitfield.
+  unsigned VolatileStorageSize;
+
+  /// The offset of the bitfield storage from the start of the struct.
+  clang::CharUnits VolatileStorageOffset;
+
+  /// The name of a bitfield
+  llvm::StringRef Name;
+
+  // The actual storage type for the bitfield
+  mlir::Type StorageType;
+
+  CIRGenBitFieldInfo()
+      : Offset(), Size(), IsSigned(), StorageSize(), VolatileOffset(),
+        VolatileStorageSize() {}
+
+  CIRGenBitFieldInfo(unsigned Offset, unsigned Size, bool IsSigned,
+                     unsigned StorageSize, clang::CharUnits StorageOffset)
+      : Offset(Offset), Size(Size), IsSigned(IsSigned),
+        StorageSize(StorageSize), StorageOffset(StorageOffset) {}
+
+  void print(llvm::raw_ostream &OS) const;
+  void dump() const;
+
+  /// Given a bit-field decl, build an appropriate helper object for
+  /// accessing that field (which is expected to have the given offset and
+  /// size).
+  static CIRGenBitFieldInfo MakeInfo(class CIRGenTypes &Types,
+                                     const clang::FieldDecl *FD,
+                                     uint64_t Offset, uint64_t Size,
+                                     uint64_t StorageSize,
+                                     clang::CharUnits StorageOffset);
+};
+
+/// This class handles struct and union layout info while lowering AST types
+/// to CIR types.
+///
+/// These layout objects are only created on demand as CIR generation requires.
+class CIRGenRecordLayout {
+  friend class CIRGenTypes;
+
+  CIRGenRecordLayout(const CIRGenRecordLayout &) = delete;
+  void operator=(const CIRGenRecordLayout &) = delete;
+
+private:
+  /// The CIR type corresponding to this record layout; used when laying it out
+  /// as a complete object.
+  mlir::cir::StructType CompleteObjectType;
+
+  /// The CIR type for the non-virtual part of this record layout; used when
+  /// laying it out as a base subobject.
+  mlir::cir::StructType BaseSubobjectType;
+
+  /// Map from (non-bit-field) struct field to the corresponding cir struct type
+  /// field no. This info is populated by the record builder.
+  llvm::DenseMap<const clang::FieldDecl *, unsigned> FieldInfo;
+
+  /// Map from (bit-field) struct field to the corresponding CIR struct type
+  /// field no. This info is populated by record builder.
+  /// TODO(CIR): value is an int for now, fix when we support bitfields
+  llvm::DenseMap<const clang::FieldDecl *, CIRGenBitFieldInfo> BitFields;
+
+  // FIXME: Maybe we could use CXXBaseSpecifier as the key and use a single map
+  // for both virtual and non-virtual bases.
+  llvm::DenseMap<const clang::CXXRecordDecl *, unsigned> NonVirtualBases;
+
+  /// Map from virtual bases to their field index in the complete object.
+  llvm::DenseMap<const clang::CXXRecordDecl *, unsigned>
+      CompleteObjectVirtualBases;
+
+  /// False if any direct or indirect subobject of this class, when considered
+  /// as a complete object, requires a non-zero bitpattern when
+  /// zero-initialized.
+  bool IsZeroInitializable : 1;
+
+  /// False if any direct or indirect subobject of this class, when considered
+  /// as a base subobject, requires a non-zero bitpattern when zero-initialized.
+  bool IsZeroInitializableAsBase : 1;
+
+public:
+  CIRGenRecordLayout(mlir::cir::StructType CompleteObjectType,
+                     mlir::cir::StructType BaseSubobjectType,
+                     bool IsZeroInitializable, bool IsZeroInitializableAsBase)
+      : CompleteObjectType(CompleteObjectType),
+        BaseSubobjectType(BaseSubobjectType),
+        IsZeroInitializable(IsZeroInitializable),
+        IsZeroInitializableAsBase(IsZeroInitializableAsBase) {}
+
+  /// Return the "complete object" LLVM type associated with
+  /// this record.
+  mlir::cir::StructType getCIRType() const { return CompleteObjectType; }
+
+  /// Return the "base subobject" LLVM type associated with
+  /// this record.
+  mlir::cir::StructType getBaseSubobjectCIRType() const {
+    return BaseSubobjectType;
+  }
+
+  /// Return cir::StructType element number that corresponds to the field FD.
+  unsigned getCIRFieldNo(const clang::FieldDecl *FD) const {
+    FD = FD->getCanonicalDecl();
+    assert(FieldInfo.count(FD) && "Invalid field for record!");
+    return FieldInfo.lookup(FD);
+  }
+
+  /// Check whether this struct can be C++ zero-initialized with a
+  /// zeroinitializer.
+  bool isZeroInitializable() const { return IsZeroInitializable; }
+
+  /// Return the BitFieldInfo that corresponds to the field FD.
+  const CIRGenBitFieldInfo &getBitFieldInfo(const clang::FieldDecl *FD) const {
+    FD = FD->getCanonicalDecl();
+    assert(FD->isBitField() && "Invalid call for non-bit-field decl!");
+    llvm::DenseMap<const clang::FieldDecl *, CIRGenBitFieldInfo>::const_iterator
+        it = BitFields.find(FD);
+    assert(it != BitFields.end() && "Unable to find bitfield info");
+    return it->second;
+  }
+};
+
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
new file mode 100644
index 000000000000..1b0829c8e8bb
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -0,0 +1,1128 @@
+//===--- CIRGenStmt.cpp - Emit CIR Code from Statements -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit Stmt nodes as CIR code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Address.h"
+#include "CIRGenBuilder.h"
+#include "CIRGenFunction.h"
+#include "mlir/IR/Value.h"
+#include "clang/AST/CharUnits.h"
+#include "clang/AST/Stmt.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace cir;
+using namespace clang;
+using namespace mlir::cir;
+
+Address CIRGenFunction::buildCompoundStmtWithoutScope(const CompoundStmt &S,
+                                                      bool getLast,
+                                                      AggValueSlot slot) {
+  const Stmt *ExprResult = S.getStmtExprResult();
+  assert((!getLast || (getLast && ExprResult)) &&
+         "If getLast is true then the CompoundStmt must have a StmtExprResult");
+
+  Address retAlloca = Address::invalid();
+
+  for (auto *CurStmt : S.body()) {
+    if (getLast && ExprResult == CurStmt) {
+      while (!isa<Expr>(ExprResult)) {
+        if (const auto *LS = dyn_cast<LabelStmt>(ExprResult))
+          llvm_unreachable("labels are NYI");
+        else if (const auto *AS = dyn_cast<AttributedStmt>(ExprResult))
+          llvm_unreachable("statement attributes are NYI");
+        else
+          llvm_unreachable("Unknown value statement");
+      }
+
+      const Expr *E = cast<Expr>(ExprResult);
+      QualType exprTy = E->getType();
+      if (hasAggregateEvaluationKind(exprTy)) {
+        buildAggExpr(E, slot);
+      } else {
+        // We can't return an RValue here because there might be cleanups at
+        // the end of the StmtExpr.  Because of that, we have to emit the result
+        // here into a temporary alloca.
+        retAlloca = CreateMemTemp(exprTy, getLoc(E->getSourceRange()));
+        buildAnyExprToMem(E, retAlloca, Qualifiers(),
+                          /*IsInit*/ false);
+      }
+    } else {
+      if (buildStmt(CurStmt, /*useCurrentScope=*/false).failed())
+        llvm_unreachable("failed to build statement");
+    }
+  }
+
+  return retAlloca;
+}
+
+Address CIRGenFunction::buildCompoundStmt(const CompoundStmt &S, bool getLast,
+                                          AggValueSlot slot) {
+  Address retAlloca = Address::invalid();
+
+  // Add local scope to track new declared variables.
+  SymTableScopeTy varScope(symbolTable);
+  auto scopeLoc = getLoc(S.getSourceRange());
+  builder.create<mlir::cir::ScopeOp>(
+      scopeLoc, /*scopeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Type &type, mlir::Location loc) {
+        LexicalScope lexScope{*this, loc, builder.getInsertionBlock()};
+        retAlloca = buildCompoundStmtWithoutScope(S, getLast, slot);
+      });
+
+  return retAlloca;
+}
+
+void CIRGenFunction::buildStopPoint(const Stmt *S) {
+  assert(!MissingFeatures::generateDebugInfo());
+}
+
+// Build CIR for a statement. useCurrentScope should be true if no
+// new scopes need be created when finding a compound statement.
+mlir::LogicalResult CIRGenFunction::buildStmt(const Stmt *S,
+                                              bool useCurrentScope,
+                                              ArrayRef<const Attr *> Attrs) {
+  if (mlir::succeeded(buildSimpleStmt(S, useCurrentScope)))
+    return mlir::success();
+
+  if (getContext().getLangOpts().OpenMP &&
+      getContext().getLangOpts().OpenMPSimd)
+    assert(0 && "not implemented");
+
+  switch (S->getStmtClass()) {
+  case Stmt::OMPScopeDirectiveClass:
+    llvm_unreachable("NYI");
+  case Stmt::OpenACCComputeConstructClass:
+  case Stmt::OpenACCLoopConstructClass:
+  case Stmt::OMPErrorDirectiveClass:
+  case Stmt::NoStmtClass:
+  case Stmt::CXXCatchStmtClass:
+  case Stmt::SEHExceptStmtClass:
+  case Stmt::SEHFinallyStmtClass:
+  case Stmt::MSDependentExistsStmtClass:
+    llvm_unreachable("invalid statement class to emit generically");
+  case Stmt::NullStmtClass:
+  case Stmt::CompoundStmtClass:
+  case Stmt::DeclStmtClass:
+  case Stmt::LabelStmtClass:
+  case Stmt::AttributedStmtClass:
+  case Stmt::GotoStmtClass:
+  case Stmt::BreakStmtClass:
+  case Stmt::ContinueStmtClass:
+  case Stmt::DefaultStmtClass:
+  case Stmt::CaseStmtClass:
+  case Stmt::SEHLeaveStmtClass:
+    llvm_unreachable("should have emitted these statements as simple");
+
+#define STMT(Type, Base)
+#define ABSTRACT_STMT(Op)
+#define EXPR(Type, Base) case Stmt::Type##Class:
+#include "clang/AST/StmtNodes.inc"
+    {
+      // Remember the block we came in on.
+      mlir::Block *incoming = builder.getInsertionBlock();
+      assert(incoming && "expression emission must have an insertion point");
+
+      buildIgnoredExpr(cast<Expr>(S));
+
+      mlir::Block *outgoing = builder.getInsertionBlock();
+      assert(outgoing && "expression emission cleared block!");
+
+      break;
+    }
+
+  case Stmt::IfStmtClass:
+    if (buildIfStmt(cast<IfStmt>(*S)).failed())
+      return mlir::failure();
+    break;
+  case Stmt::SwitchStmtClass:
+    if (buildSwitchStmt(cast<SwitchStmt>(*S)).failed())
+      return mlir::failure();
+    break;
+  case Stmt::ForStmtClass:
+    if (buildForStmt(cast<ForStmt>(*S)).failed())
+      return mlir::failure();
+    break;
+  case Stmt::WhileStmtClass:
+    if (buildWhileStmt(cast<WhileStmt>(*S)).failed())
+      return mlir::failure();
+    break;
+  case Stmt::DoStmtClass:
+    if (buildDoStmt(cast<DoStmt>(*S)).failed())
+      return mlir::failure();
+    break;
+
+  case Stmt::CoroutineBodyStmtClass:
+    return buildCoroutineBody(cast<CoroutineBodyStmt>(*S));
+  case Stmt::CoreturnStmtClass:
+    return buildCoreturnStmt(cast<CoreturnStmt>(*S));
+
+  case Stmt::CXXTryStmtClass:
+    return buildCXXTryStmt(cast<CXXTryStmt>(*S));
+
+  case Stmt::CXXForRangeStmtClass:
+    return buildCXXForRangeStmt(cast<CXXForRangeStmt>(*S), Attrs);
+
+  case Stmt::IndirectGotoStmtClass:
+  case Stmt::ReturnStmtClass:
+  // When implemented, GCCAsmStmtClass should fall-through to MSAsmStmtClass.
+  case Stmt::GCCAsmStmtClass:
+  case Stmt::MSAsmStmtClass:
+    return buildAsmStmt(cast<AsmStmt>(*S));
+  // OMP directives:
+  case Stmt::OMPParallelDirectiveClass:
+    return buildOMPParallelDirective(cast<OMPParallelDirective>(*S));
+  case Stmt::OMPTaskwaitDirectiveClass:
+    return buildOMPTaskwaitDirective(cast<OMPTaskwaitDirective>(*S));
+  case Stmt::OMPTaskyieldDirectiveClass:
+    return buildOMPTaskyieldDirective(cast<OMPTaskyieldDirective>(*S));
+  case Stmt::OMPBarrierDirectiveClass:
+    return buildOMPBarrierDirective(cast<OMPBarrierDirective>(*S));
+  // Unsupported AST nodes:
+  case Stmt::CapturedStmtClass:
+  case Stmt::ObjCAtTryStmtClass:
+  case Stmt::ObjCAtThrowStmtClass:
+  case Stmt::ObjCAtSynchronizedStmtClass:
+  case Stmt::ObjCForCollectionStmtClass:
+  case Stmt::ObjCAutoreleasePoolStmtClass:
+  case Stmt::SEHTryStmtClass:
+  case Stmt::OMPMetaDirectiveClass:
+  case Stmt::OMPCanonicalLoopClass:
+  case Stmt::OMPSimdDirectiveClass:
+  case Stmt::OMPTileDirectiveClass:
+  case Stmt::OMPUnrollDirectiveClass:
+  case Stmt::OMPForDirectiveClass:
+  case Stmt::OMPForSimdDirectiveClass:
+  case Stmt::OMPSectionsDirectiveClass:
+  case Stmt::OMPSectionDirectiveClass:
+  case Stmt::OMPSingleDirectiveClass:
+  case Stmt::OMPMasterDirectiveClass:
+  case Stmt::OMPCriticalDirectiveClass:
+  case Stmt::OMPParallelForDirectiveClass:
+  case Stmt::OMPParallelForSimdDirectiveClass:
+  case Stmt::OMPParallelMasterDirectiveClass:
+  case Stmt::OMPParallelSectionsDirectiveClass:
+  case Stmt::OMPTaskDirectiveClass:
+  case Stmt::OMPTaskgroupDirectiveClass:
+  case Stmt::OMPFlushDirectiveClass:
+  case Stmt::OMPDepobjDirectiveClass:
+  case Stmt::OMPScanDirectiveClass:
+  case Stmt::OMPOrderedDirectiveClass:
+  case Stmt::OMPAtomicDirectiveClass:
+  case Stmt::OMPTargetDirectiveClass:
+  case Stmt::OMPTeamsDirectiveClass:
+  case Stmt::OMPCancellationPointDirectiveClass:
+  case Stmt::OMPCancelDirectiveClass:
+  case Stmt::OMPTargetDataDirectiveClass:
+  case Stmt::OMPTargetEnterDataDirectiveClass:
+  case Stmt::OMPTargetExitDataDirectiveClass:
+  case Stmt::OMPTargetParallelDirectiveClass:
+  case Stmt::OMPTargetParallelForDirectiveClass:
+  case Stmt::OMPTaskLoopDirectiveClass:
+  case Stmt::OMPTaskLoopSimdDirectiveClass:
+  case Stmt::OMPMaskedTaskLoopDirectiveClass:
+  case Stmt::OMPMaskedTaskLoopSimdDirectiveClass:
+  case Stmt::OMPMasterTaskLoopDirectiveClass:
+  case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
+  case Stmt::OMPParallelGenericLoopDirectiveClass:
+  case Stmt::OMPParallelMaskedDirectiveClass:
+  case Stmt::OMPParallelMaskedTaskLoopDirectiveClass:
+  case Stmt::OMPParallelMaskedTaskLoopSimdDirectiveClass:
+  case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
+  case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
+  case Stmt::OMPDistributeDirectiveClass:
+  case Stmt::OMPDistributeParallelForDirectiveClass:
+  case Stmt::OMPDistributeParallelForSimdDirectiveClass:
+  case Stmt::OMPDistributeSimdDirectiveClass:
+  case Stmt::OMPTargetParallelGenericLoopDirectiveClass:
+  case Stmt::OMPTargetParallelForSimdDirectiveClass:
+  case Stmt::OMPTargetSimdDirectiveClass:
+  case Stmt::OMPTargetTeamsGenericLoopDirectiveClass:
+  case Stmt::OMPTargetUpdateDirectiveClass:
+  case Stmt::OMPTeamsDistributeDirectiveClass:
+  case Stmt::OMPTeamsDistributeSimdDirectiveClass:
+  case Stmt::OMPTeamsDistributeParallelForSimdDirectiveClass:
+  case Stmt::OMPTeamsDistributeParallelForDirectiveClass:
+  case Stmt::OMPTeamsGenericLoopDirectiveClass:
+  case Stmt::OMPTargetTeamsDirectiveClass:
+  case Stmt::OMPTargetTeamsDistributeDirectiveClass:
+  case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass:
+  case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
+  case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
+  case Stmt::OMPInteropDirectiveClass:
+  case Stmt::OMPDispatchDirectiveClass:
+  case Stmt::OMPGenericLoopDirectiveClass:
+  case Stmt::OMPMaskedDirectiveClass: {
+    llvm::errs() << "CIR codegen for '" << S->getStmtClassName()
+                 << "' not implemented\n";
+    assert(0 && "not implemented");
+    break;
+  }
+  case Stmt::ObjCAtCatchStmtClass:
+    llvm_unreachable(
+        "@catch statements should be handled by EmitObjCAtTryStmt");
+  case Stmt::ObjCAtFinallyStmtClass:
+    llvm_unreachable(
+        "@finally statements should be handled by EmitObjCAtTryStmt");
+  }
+
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRGenFunction::buildSimpleStmt(const Stmt *S,
+                                                    bool useCurrentScope) {
+  switch (S->getStmtClass()) {
+  default:
+    return mlir::failure();
+  case Stmt::DeclStmtClass:
+    return buildDeclStmt(cast<DeclStmt>(*S));
+  case Stmt::CompoundStmtClass:
+    useCurrentScope ? buildCompoundStmtWithoutScope(cast<CompoundStmt>(*S))
+                    : buildCompoundStmt(cast<CompoundStmt>(*S));
+    break;
+  case Stmt::ReturnStmtClass:
+    return buildReturnStmt(cast<ReturnStmt>(*S));
+  case Stmt::GotoStmtClass:
+    return buildGotoStmt(cast<GotoStmt>(*S));
+  case Stmt::ContinueStmtClass:
+    return buildContinueStmt(cast<ContinueStmt>(*S));
+  case Stmt::NullStmtClass:
+    break;
+
+  case Stmt::LabelStmtClass:
+    return buildLabelStmt(cast<LabelStmt>(*S));
+
+  case Stmt::CaseStmtClass:
+  case Stmt::DefaultStmtClass:
+    return buildSwitchCase(cast<SwitchCase>(*S));
+    break;
+
+  case Stmt::BreakStmtClass:
+    return buildBreakStmt(cast<BreakStmt>(*S));
+
+  case Stmt::AttributedStmtClass:
+    return buildAttributedStmt(cast<AttributedStmt>(*S));
+
+  case Stmt::SEHLeaveStmtClass:
+    llvm::errs() << "CIR codegen for '" << S->getStmtClassName()
+                 << "' not implemented\n";
+    assert(0 && "not implemented");
+  }
+
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRGenFunction::buildLabelStmt(const clang::LabelStmt &S) {
+  if (buildLabel(S.getDecl()).failed())
+    return mlir::failure();
+
+  // IsEHa: not implemented.
+  assert(!(getContext().getLangOpts().EHAsynch && S.isSideEntry()));
+
+  return buildStmt(S.getSubStmt(), /* useCurrentScope */ true);
+}
+
+mlir::LogicalResult
+CIRGenFunction::buildAttributedStmt(const AttributedStmt &S) {
+  for (const auto *A : S.getAttrs()) {
+    switch (A->getKind()) {
+    case attr::NoMerge:
+    case attr::NoInline:
+    case attr::AlwaysInline:
+    case attr::MustTail:
+      llvm_unreachable("NIY attributes");
+    default:
+      break;
+    }
+  }
+
+  return buildStmt(S.getSubStmt(), true, S.getAttrs());
+}
+
+// Add terminating yield on body regions (loops, ...) in case there are
+// not other terminators used.
+// FIXME: make terminateCaseRegion use this too.
+static void terminateBody(CIRGenBuilderTy &builder, mlir::Region &r,
+                          mlir::Location loc) {
+  if (r.empty())
+    return;
+
+  SmallVector<mlir::Block *, 4> eraseBlocks;
+  unsigned numBlocks = r.getBlocks().size();
+  for (auto &block : r.getBlocks()) {
+    // Already cleanup after return operations, which might create
+    // empty blocks if emitted as last stmt.
+    if (numBlocks != 1 && block.empty() && block.hasNoPredecessors() &&
+        block.hasNoSuccessors())
+      eraseBlocks.push_back(&block);
+
+    if (block.empty() ||
+        !block.back().hasTrait<mlir::OpTrait::IsTerminator>()) {
+      mlir::OpBuilder::InsertionGuard guardCase(builder);
+      builder.setInsertionPointToEnd(&block);
+      builder.createYield(loc);
+    }
+  }
+
+  for (auto *b : eraseBlocks)
+    b->erase();
+}
+
+mlir::LogicalResult CIRGenFunction::buildIfStmt(const IfStmt &S) {
+  mlir::LogicalResult res = mlir::success();
+  // The else branch of a consteval if statement is always the only branch
+  // that can be runtime evaluated.
+  const Stmt *ConstevalExecuted;
+  if (S.isConsteval()) {
+    ConstevalExecuted = S.isNegatedConsteval() ? S.getThen() : S.getElse();
+    if (!ConstevalExecuted)
+      // No runtime code execution required
+      return res;
+  }
+
+  // C99 6.8.4.1: The first substatement is executed if the expression
+  // compares unequal to 0.  The condition must be a scalar type.
+  auto ifStmtBuilder = [&]() -> mlir::LogicalResult {
+    if (S.isConsteval())
+      return buildStmt(ConstevalExecuted, /*useCurrentScope=*/true);
+
+    if (S.getInit())
+      if (buildStmt(S.getInit(), /*useCurrentScope=*/true).failed())
+        return mlir::failure();
+
+    if (S.getConditionVariable())
+      buildDecl(*S.getConditionVariable());
+
+    // During LLVM codegen, if the condition constant folds and can be elided,
+    // it tries to avoid emitting the condition and the dead arm of the if/else.
+    // TODO(cir): we skip this in CIRGen, but should implement this as part of
+    // SSCP or a specific CIR pass.
+    bool CondConstant;
+    if (ConstantFoldsToSimpleInteger(S.getCond(), CondConstant,
+                                     S.isConstexpr())) {
+      if (S.isConstexpr()) {
+        // Handle "if constexpr" explicitly here to avoid generating some
+        // ill-formed code since in CIR the "if" is no longer simplified
+        // in this lambda like in Clang but postponed to other MLIR
+        // passes.
+        if (const Stmt *Executed = CondConstant ? S.getThen() : S.getElse())
+          return buildStmt(Executed, /*useCurrentScope=*/true);
+        // There is nothing to execute at runtime.
+        // TODO(cir): there is still an empty cir.scope generated by the caller.
+        return mlir::success();
+      }
+      assert(!MissingFeatures::constantFoldsToSimpleInteger());
+    }
+
+    assert(!MissingFeatures::emitCondLikelihoodViaExpectIntrinsic());
+    assert(!MissingFeatures::incrementProfileCounter());
+    return buildIfOnBoolExpr(S.getCond(), S.getThen(), S.getElse());
+  };
+
+  // TODO: Add a new scoped symbol table.
+  // LexicalScope ConditionScope(*this, S.getCond()->getSourceRange());
+  // The if scope contains the full source range for IfStmt.
+  auto scopeLoc = getLoc(S.getSourceRange());
+  builder.create<mlir::cir::ScopeOp>(
+      scopeLoc, /*scopeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        LexicalScope lexScope{*this, scopeLoc, builder.getInsertionBlock()};
+        res = ifStmtBuilder();
+      });
+
+  return res;
+}
+
+mlir::LogicalResult CIRGenFunction::buildDeclStmt(const DeclStmt &S) {
+  if (!builder.getInsertionBlock()) {
+    CGM.emitError("Seems like this is unreachable code, what should we do?");
+    return mlir::failure();
+  }
+
+  for (const auto *I : S.decls()) {
+    buildDecl(*I);
+  }
+
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRGenFunction::buildReturnStmt(const ReturnStmt &S) {
+  assert(!MissingFeatures::requiresReturnValueCheck());
+  auto loc = getLoc(S.getSourceRange());
+
+  // Emit the result value, even if unused, to evaluate the side effects.
+  const Expr *RV = S.getRetValue();
+
+  // TODO(cir): LLVM codegen uses a RunCleanupsScope cleanupScope here, we
+  // should model this in face of dtors.
+
+  bool createNewScope = false;
+  if (const auto *EWC = dyn_cast_or_null<ExprWithCleanups>(RV)) {
+    RV = EWC->getSubExpr();
+    createNewScope = true;
+  }
+
+  auto handleReturnVal = [&]() {
+    if (getContext().getLangOpts().ElideConstructors && S.getNRVOCandidate() &&
+        S.getNRVOCandidate()->isNRVOVariable()) {
+      assert(!MissingFeatures::openMP());
+      // Apply the named return value optimization for this return statement,
+      // which means doing nothing: the appropriate result has already been
+      // constructed into the NRVO variable.
+
+      // If there is an NRVO flag for this variable, set it to 1 into indicate
+      // that the cleanup code should not destroy the variable.
+      if (auto NRVOFlag = NRVOFlags[S.getNRVOCandidate()])
+        getBuilder().createFlagStore(loc, true, NRVOFlag);
+    } else if (!ReturnValue.isValid() || (RV && RV->getType()->isVoidType())) {
+      // Make sure not to return anything, but evaluate the expression
+      // for side effects.
+      if (RV) {
+        buildAnyExpr(RV);
+      }
+    } else if (!RV) {
+      // Do nothing (return value is left uninitialized)
+    } else if (FnRetTy->isReferenceType()) {
+      // If this function returns a reference, take the address of the
+      // expression rather than the value.
+      RValue Result = buildReferenceBindingToExpr(RV);
+      builder.createStore(loc, Result.getScalarVal(), ReturnValue);
+    } else {
+      mlir::Value V = nullptr;
+      switch (CIRGenFunction::getEvaluationKind(RV->getType())) {
+      case TEK_Scalar:
+        V = buildScalarExpr(RV);
+        builder.CIRBaseBuilderTy::createStore(loc, V, *FnRetAlloca);
+        break;
+      case TEK_Complex:
+        buildComplexExprIntoLValue(RV,
+                                   makeAddrLValue(ReturnValue, RV->getType()),
+                                   /*isInit*/ true);
+        break;
+      case TEK_Aggregate:
+        buildAggExpr(
+            RV, AggValueSlot::forAddr(
+                    ReturnValue, Qualifiers(), AggValueSlot::IsDestructed,
+                    AggValueSlot::DoesNotNeedGCBarriers,
+                    AggValueSlot::IsNotAliased, getOverlapForReturnValue()));
+        break;
+      }
+    }
+  };
+
+  if (!createNewScope)
+    handleReturnVal();
+  else {
+    mlir::Location scopeLoc =
+        getLoc(RV ? RV->getSourceRange() : S.getSourceRange());
+    builder.create<mlir::cir::ScopeOp>(
+        scopeLoc, /*scopeBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          CIRGenFunction::LexicalScope lexScope{*this, loc,
+                                                builder.getInsertionBlock()};
+          handleReturnVal();
+        });
+  }
+
+  // Create a new return block (if not existent) and add a branch to
+  // it. The actual return instruction is only inserted during current
+  // scope cleanup handling.
+  auto *retBlock = currLexScope->getOrCreateRetBlock(*this, loc);
+  builder.create<BrOp>(loc, retBlock);
+
+  // Insert the new block to continue codegen after branch to ret block.
+  builder.createBlock(builder.getBlock()->getParent());
+
+  // TODO(cir): LLVM codegen for a cleanup on cleanupScope here.
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRGenFunction::buildGotoStmt(const GotoStmt &S) {
+  // FIXME: LLVM codegen inserts emit stop point here for debug info
+  // sake when the insertion point is available, but doesn't do
+  // anything special when there isn't. We haven't implemented debug
+  // info support just yet, look at this again once we have it.
+  assert(builder.getInsertionBlock() && "not yet implemented");
+
+  builder.create<mlir::cir::GotoOp>(getLoc(S.getSourceRange()),
+                                    S.getLabel()->getName());
+
+  // A goto marks the end of a block, create a new one for codegen after
+  // buildGotoStmt can resume building in that block.
+  // Insert the new block to continue codegen after goto.
+  builder.createBlock(builder.getBlock()->getParent());
+
+  // What here...
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRGenFunction::buildLabel(const LabelDecl *D) {
+  // Create a new block to tag with a label and add a branch from
+  // the current one to it. If the block is empty just call attach it
+  // to this label.
+  mlir::Block *currBlock = builder.getBlock();
+  mlir::Block *labelBlock = currBlock;
+  if (!currBlock->empty()) {
+    {
+      mlir::OpBuilder::InsertionGuard guard(builder);
+      labelBlock = builder.createBlock(builder.getBlock()->getParent());
+    }
+    builder.create<BrOp>(getLoc(D->getSourceRange()), labelBlock);
+  }
+
+  builder.setInsertionPointToEnd(labelBlock);
+  builder.create<mlir::cir::LabelOp>(getLoc(D->getSourceRange()), D->getName());
+  builder.setInsertionPointToEnd(labelBlock);
+
+  //  FIXME: emit debug info for labels, incrementProfileCounter
+  return mlir::success();
+}
+
+mlir::LogicalResult
+CIRGenFunction::buildContinueStmt(const clang::ContinueStmt &S) {
+  builder.createContinue(getLoc(S.getContinueLoc()));
+
+  // Insert the new block to continue codegen after the continue statement.
+  builder.createBlock(builder.getBlock()->getParent());
+
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRGenFunction::buildBreakStmt(const clang::BreakStmt &S) {
+  builder.createBreak(getLoc(S.getBreakLoc()));
+
+  // Insert the new block to continue codegen after the break statement.
+  builder.createBlock(builder.getBlock()->getParent());
+
+  return mlir::success();
+}
+
+const CaseStmt *
+CIRGenFunction::foldCaseStmt(const clang::CaseStmt &S, mlir::Type condType,
+                             SmallVector<mlir::Attribute, 4> &caseAttrs) {
+  auto *ctxt = builder.getContext();
+
+  const CaseStmt *caseStmt = &S;
+  const CaseStmt *lastCase = &S;
+  SmallVector<mlir::Attribute, 4> caseEltValueListAttr;
+
+  int caseAttrCount = 0;
+
+  // Fold cascading cases whenever possible to simplify codegen a bit.
+  while (caseStmt) {
+    lastCase = caseStmt;
+
+    auto intVal = caseStmt->getLHS()->EvaluateKnownConstInt(getContext());
+
+    if (auto *rhs = caseStmt->getRHS()) {
+      auto endVal = rhs->EvaluateKnownConstInt(getContext());
+      SmallVector<mlir::Attribute, 4> rangeCaseAttr = {
+          mlir::cir::IntAttr::get(condType, intVal),
+          mlir::cir::IntAttr::get(condType, endVal)};
+      auto caseAttr = mlir::cir::CaseAttr::get(
+          ctxt, builder.getArrayAttr(rangeCaseAttr),
+          CaseOpKindAttr::get(ctxt, mlir::cir::CaseOpKind::Range));
+      caseAttrs.push_back(caseAttr);
+      ++caseAttrCount;
+    } else {
+      caseEltValueListAttr.push_back(mlir::cir::IntAttr::get(condType, intVal));
+    }
+
+    caseStmt = dyn_cast_or_null<CaseStmt>(caseStmt->getSubStmt());
+  }
+
+  if (!caseEltValueListAttr.empty()) {
+    auto caseOpKind = caseEltValueListAttr.size() > 1
+                          ? mlir::cir::CaseOpKind::Anyof
+                          : mlir::cir::CaseOpKind::Equal;
+    auto caseAttr = mlir::cir::CaseAttr::get(
+        ctxt, builder.getArrayAttr(caseEltValueListAttr),
+        CaseOpKindAttr::get(ctxt, caseOpKind));
+    caseAttrs.push_back(caseAttr);
+    ++caseAttrCount;
+  }
+
+  assert(caseAttrCount > 0 && "there should be at least one valid case attr");
+
+  for (int i = 1; i < caseAttrCount; ++i) {
+    // If there are multiple case attributes, we need to create a new region
+    auto *region = currLexScope->createSwitchRegion();
+    builder.createBlock(region);
+  }
+
+  return lastCase;
+}
+
+template <typename T>
+mlir::LogicalResult CIRGenFunction::buildCaseDefaultCascade(
+    const T *stmt, mlir::Type condType,
+    SmallVector<mlir::Attribute, 4> &caseAttrs) {
+
+  assert((isa<CaseStmt, DefaultStmt>(stmt)) &&
+         "only case or default stmt go here");
+
+  auto res = mlir::success();
+
+  // Update scope information with the current region we are
+  // emitting code for. This is useful to allow return blocks to be
+  // automatically and properly placed during cleanup.
+  auto *region = currLexScope->createSwitchRegion();
+  auto *block = builder.createBlock(region);
+  builder.setInsertionPointToEnd(block);
+
+  auto *sub = stmt->getSubStmt();
+
+  if (isa<DefaultStmt>(sub) && isa<CaseStmt>(stmt)) {
+    builder.createYield(getLoc(stmt->getBeginLoc()));
+    res = buildDefaultStmt(*dyn_cast<DefaultStmt>(sub), condType, caseAttrs);
+  } else if (isa<CaseStmt>(sub) && isa<DefaultStmt>(stmt)) {
+    builder.createYield(getLoc(stmt->getBeginLoc()));
+    res = buildCaseStmt(*dyn_cast<CaseStmt>(sub), condType, caseAttrs);
+  } else {
+    res = buildStmt(sub, /*useCurrentScope=*/!isa<CompoundStmt>(sub));
+  }
+
+  return res;
+}
+
+mlir::LogicalResult
+CIRGenFunction::buildCaseStmt(const CaseStmt &S, mlir::Type condType,
+                              SmallVector<mlir::Attribute, 4> &caseAttrs) {
+  auto *caseStmt = foldCaseStmt(S, condType, caseAttrs);
+  return buildCaseDefaultCascade(caseStmt, condType, caseAttrs);
+}
+
+mlir::LogicalResult
+CIRGenFunction::buildDefaultStmt(const DefaultStmt &S, mlir::Type condType,
+                                 SmallVector<mlir::Attribute, 4> &caseAttrs) {
+  auto ctxt = builder.getContext();
+
+  auto defAttr = mlir::cir::CaseAttr::get(
+      ctxt, builder.getArrayAttr({}),
+      CaseOpKindAttr::get(ctxt, mlir::cir::CaseOpKind::Default));
+
+  caseAttrs.push_back(defAttr);
+  return buildCaseDefaultCascade(&S, condType, caseAttrs);
+}
+
+mlir::LogicalResult CIRGenFunction::buildSwitchCase(const SwitchCase &S) {
+  assert(!caseAttrsStack.empty() &&
+         "build switch case without seeting case attrs");
+  assert(!condTypeStack.empty() &&
+         "build switch case without specifying the type of the condition");
+
+  if (S.getStmtClass() == Stmt::CaseStmtClass)
+    return buildCaseStmt(cast<CaseStmt>(S), condTypeStack.back(),
+                         caseAttrsStack.back());
+
+  if (S.getStmtClass() == Stmt::DefaultStmtClass)
+    return buildDefaultStmt(cast<DefaultStmt>(S), condTypeStack.back(),
+                            caseAttrsStack.back());
+
+  llvm_unreachable("expect case or default stmt");
+}
+
+mlir::LogicalResult
+CIRGenFunction::buildCXXForRangeStmt(const CXXForRangeStmt &S,
+                                     ArrayRef<const Attr *> ForAttrs) {
+  mlir::cir::ForOp forOp;
+
+  // TODO(cir): pass in array of attributes.
+  auto forStmtBuilder = [&]() -> mlir::LogicalResult {
+    auto loopRes = mlir::success();
+    // Evaluate the first pieces before the loop.
+    if (S.getInit())
+      if (buildStmt(S.getInit(), /*useCurrentScope=*/true).failed())
+        return mlir::failure();
+    if (buildStmt(S.getRangeStmt(), /*useCurrentScope=*/true).failed())
+      return mlir::failure();
+    if (buildStmt(S.getBeginStmt(), /*useCurrentScope=*/true).failed())
+      return mlir::failure();
+    if (buildStmt(S.getEndStmt(), /*useCurrentScope=*/true).failed())
+      return mlir::failure();
+
+    assert(!MissingFeatures::loopInfoStack());
+    // From LLVM: if there are any cleanups between here and the loop-exit
+    // scope, create a block to stage a loop exit along.
+    // We probably already do the right thing because of ScopeOp, but make
+    // sure we handle all cases.
+    assert(!MissingFeatures::requiresCleanups());
+
+    forOp = builder.createFor(
+        getLoc(S.getSourceRange()),
+        /*condBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          assert(!MissingFeatures::createProfileWeightsForLoop());
+          assert(!MissingFeatures::emitCondLikelihoodViaExpectIntrinsic());
+          mlir::Value condVal = evaluateExprAsBool(S.getCond());
+          builder.createCondition(condVal);
+        },
+        /*bodyBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          // https://en.cppreference.com/w/cpp/language/for
+          // In C++ the scope of the init-statement and the scope of
+          // statement are one and the same.
+          bool useCurrentScope = true;
+          if (buildStmt(S.getLoopVarStmt(), useCurrentScope).failed())
+            loopRes = mlir::failure();
+          if (buildStmt(S.getBody(), useCurrentScope).failed())
+            loopRes = mlir::failure();
+          buildStopPoint(&S);
+        },
+        /*stepBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          if (S.getInc())
+            if (buildStmt(S.getInc(), /*useCurrentScope=*/true).failed())
+              loopRes = mlir::failure();
+          builder.createYield(loc);
+        });
+    return loopRes;
+  };
+
+  auto res = mlir::success();
+  auto scopeLoc = getLoc(S.getSourceRange());
+  builder.create<mlir::cir::ScopeOp>(
+      scopeLoc, /*scopeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        // Create a cleanup scope for the condition variable cleanups.
+        // Logical equivalent from LLVM codegn for
+        // LexicalScope ConditionScope(*this, S.getSourceRange())...
+        LexicalScope lexScope{*this, loc, builder.getInsertionBlock()};
+        res = forStmtBuilder();
+      });
+
+  if (res.failed())
+    return res;
+
+  terminateBody(builder, forOp.getBody(), getLoc(S.getEndLoc()));
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRGenFunction::buildForStmt(const ForStmt &S) {
+  mlir::cir::ForOp forOp;
+
+  // TODO: pass in array of attributes.
+  auto forStmtBuilder = [&]() -> mlir::LogicalResult {
+    auto loopRes = mlir::success();
+    // Evaluate the first part before the loop.
+    if (S.getInit())
+      if (buildStmt(S.getInit(), /*useCurrentScope=*/true).failed())
+        return mlir::failure();
+    assert(!MissingFeatures::loopInfoStack());
+    // From LLVM: if there are any cleanups between here and the loop-exit
+    // scope, create a block to stage a loop exit along.
+    // We probably already do the right thing because of ScopeOp, but make
+    // sure we handle all cases.
+    assert(!MissingFeatures::requiresCleanups());
+
+    forOp = builder.createFor(
+        getLoc(S.getSourceRange()),
+        /*condBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          assert(!MissingFeatures::createProfileWeightsForLoop());
+          assert(!MissingFeatures::emitCondLikelihoodViaExpectIntrinsic());
+          mlir::Value condVal;
+          if (S.getCond()) {
+            // If the for statement has a condition scope,
+            // emit the local variable declaration.
+            if (S.getConditionVariable())
+              buildDecl(*S.getConditionVariable());
+            // C99 6.8.5p2/p4: The first substatement is executed if the
+            // expression compares unequal to 0. The condition must be a
+            // scalar type.
+            condVal = evaluateExprAsBool(S.getCond());
+          } else {
+            auto boolTy = mlir::cir::BoolType::get(b.getContext());
+            condVal = b.create<mlir::cir::ConstantOp>(
+                loc, boolTy,
+                mlir::cir::BoolAttr::get(b.getContext(), boolTy, true));
+          }
+          builder.createCondition(condVal);
+        },
+        /*bodyBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          // https://en.cppreference.com/w/cpp/language/for
+          // While in C++, the scope of the init-statement and the scope of
+          // statement are one and the same, in C the scope of statement is
+          // nested within the scope of init-statement.
+          bool useCurrentScope =
+              CGM.getASTContext().getLangOpts().CPlusPlus ? true : false;
+          if (buildStmt(S.getBody(), useCurrentScope).failed())
+            loopRes = mlir::failure();
+          buildStopPoint(&S);
+        },
+        /*stepBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          if (S.getInc())
+            if (buildStmt(S.getInc(), /*useCurrentScope=*/true).failed())
+              loopRes = mlir::failure();
+          builder.createYield(loc);
+        });
+    return loopRes;
+  };
+
+  auto res = mlir::success();
+  auto scopeLoc = getLoc(S.getSourceRange());
+  builder.create<mlir::cir::ScopeOp>(
+      scopeLoc, /*scopeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        LexicalScope lexScope{*this, loc, builder.getInsertionBlock()};
+        res = forStmtBuilder();
+      });
+
+  if (res.failed())
+    return res;
+
+  terminateBody(builder, forOp.getBody(), getLoc(S.getEndLoc()));
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRGenFunction::buildDoStmt(const DoStmt &S) {
+  mlir::cir::DoWhileOp doWhileOp;
+
+  // TODO: pass in array of attributes.
+  auto doStmtBuilder = [&]() -> mlir::LogicalResult {
+    auto loopRes = mlir::success();
+    assert(!MissingFeatures::loopInfoStack());
+    // From LLVM: if there are any cleanups between here and the loop-exit
+    // scope, create a block to stage a loop exit along.
+    // We probably already do the right thing because of ScopeOp, but make
+    // sure we handle all cases.
+    assert(!MissingFeatures::requiresCleanups());
+
+    doWhileOp = builder.createDoWhile(
+        getLoc(S.getSourceRange()),
+        /*condBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          assert(!MissingFeatures::createProfileWeightsForLoop());
+          assert(!MissingFeatures::emitCondLikelihoodViaExpectIntrinsic());
+          // C99 6.8.5p2/p4: The first substatement is executed if the
+          // expression compares unequal to 0. The condition must be a
+          // scalar type.
+          mlir::Value condVal = evaluateExprAsBool(S.getCond());
+          builder.createCondition(condVal);
+        },
+        /*bodyBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          if (buildStmt(S.getBody(), /*useCurrentScope=*/true).failed())
+            loopRes = mlir::failure();
+          buildStopPoint(&S);
+        });
+    return loopRes;
+  };
+
+  auto res = mlir::success();
+  auto scopeLoc = getLoc(S.getSourceRange());
+  builder.create<mlir::cir::ScopeOp>(
+      scopeLoc, /*scopeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        LexicalScope lexScope{*this, loc, builder.getInsertionBlock()};
+        res = doStmtBuilder();
+      });
+
+  if (res.failed())
+    return res;
+
+  terminateBody(builder, doWhileOp.getBody(), getLoc(S.getEndLoc()));
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRGenFunction::buildWhileStmt(const WhileStmt &S) {
+  mlir::cir::WhileOp whileOp;
+
+  // TODO: pass in array of attributes.
+  auto whileStmtBuilder = [&]() -> mlir::LogicalResult {
+    auto loopRes = mlir::success();
+    assert(!MissingFeatures::loopInfoStack());
+    // From LLVM: if there are any cleanups between here and the loop-exit
+    // scope, create a block to stage a loop exit along.
+    // We probably already do the right thing because of ScopeOp, but make
+    // sure we handle all cases.
+    assert(!MissingFeatures::requiresCleanups());
+
+    whileOp = builder.createWhile(
+        getLoc(S.getSourceRange()),
+        /*condBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          assert(!MissingFeatures::createProfileWeightsForLoop());
+          assert(!MissingFeatures::emitCondLikelihoodViaExpectIntrinsic());
+          mlir::Value condVal;
+          // If the for statement has a condition scope,
+          // emit the local variable declaration.
+          if (S.getConditionVariable())
+            buildDecl(*S.getConditionVariable());
+          // C99 6.8.5p2/p4: The first substatement is executed if the
+          // expression compares unequal to 0. The condition must be a
+          // scalar type.
+          condVal = evaluateExprAsBool(S.getCond());
+          builder.createCondition(condVal);
+        },
+        /*bodyBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc) {
+          if (buildStmt(S.getBody(), /*useCurrentScope=*/true).failed())
+            loopRes = mlir::failure();
+          buildStopPoint(&S);
+        });
+    return loopRes;
+  };
+
+  auto res = mlir::success();
+  auto scopeLoc = getLoc(S.getSourceRange());
+  builder.create<mlir::cir::ScopeOp>(
+      scopeLoc, /*scopeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        LexicalScope lexScope{*this, loc, builder.getInsertionBlock()};
+        res = whileStmtBuilder();
+      });
+
+  if (res.failed())
+    return res;
+
+  terminateBody(builder, whileOp.getBody(), getLoc(S.getEndLoc()));
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRGenFunction::buildSwitchBody(const Stmt *S) {
+  if (auto *compoundStmt = dyn_cast<CompoundStmt>(S)) {
+    mlir::Block *lastCaseBlock = nullptr;
+    auto res = mlir::success();
+    for (auto *c : compoundStmt->body()) {
+      if (auto *switchCase = dyn_cast<SwitchCase>(c)) {
+        res = buildSwitchCase(*switchCase);
+        lastCaseBlock = builder.getBlock();
+      } else if (lastCaseBlock) {
+        // This means it's a random stmt following up a case, just
+        // emit it as part of previous known case.
+        mlir::OpBuilder::InsertionGuard guardCase(builder);
+        builder.setInsertionPointToEnd(lastCaseBlock);
+        res = buildStmt(c, /*useCurrentScope=*/!isa<CompoundStmt>(c));
+        lastCaseBlock = builder.getBlock();
+      } else {
+        llvm_unreachable("statement doesn't belong to any case region, NYI");
+      }
+
+      if (res.failed())
+        break;
+    }
+    return res;
+  }
+
+  llvm_unreachable("switch body is not CompoundStmt, NYI");
+}
+
+mlir::LogicalResult CIRGenFunction::buildSwitchStmt(const SwitchStmt &S) {
+  // TODO: LLVM codegen does some early optimization to fold the condition and
+  // only emit live cases. CIR should use MLIR to achieve similar things,
+  // nothing to be done here.
+  // if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue))...
+
+  auto res = mlir::success();
+  SwitchOp swop;
+
+  auto switchStmtBuilder = [&]() -> mlir::LogicalResult {
+    if (S.getInit())
+      if (buildStmt(S.getInit(), /*useCurrentScope=*/true).failed())
+        return mlir::failure();
+
+    if (S.getConditionVariable())
+      buildDecl(*S.getConditionVariable());
+
+    mlir::Value condV = buildScalarExpr(S.getCond());
+
+    // TODO: PGO and likelihood (e.g. PGO.haveRegionCounts())
+    // TODO: if the switch has a condition wrapped by __builtin_unpredictable?
+
+    swop = builder.create<SwitchOp>(
+        getLoc(S.getBeginLoc()), condV,
+        /*switchBuilder=*/
+        [&](mlir::OpBuilder &b, mlir::Location loc, mlir::OperationState &os) {
+          currLexScope->setAsSwitch();
+
+          caseAttrsStack.push_back({});
+          condTypeStack.push_back(condV.getType());
+
+          res = buildSwitchBody(S.getBody());
+
+          os.addRegions(currLexScope->getSwitchRegions());
+          os.addAttribute("cases", builder.getArrayAttr(caseAttrsStack.back()));
+
+          caseAttrsStack.pop_back();
+          condTypeStack.pop_back();
+        });
+
+    if (res.failed())
+      return res;
+    return mlir::success();
+  };
+
+  // The switch scope contains the full source range for SwitchStmt.
+  auto scopeLoc = getLoc(S.getSourceRange());
+  builder.create<mlir::cir::ScopeOp>(
+      scopeLoc, /*scopeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        LexicalScope lexScope{*this, loc, builder.getInsertionBlock()};
+        res = switchStmtBuilder();
+      });
+
+  if (res.failed())
+    return res;
+
+  // Any block in a case region without a terminator is considered a
+  // fallthrough yield. In practice there shouldn't be more than one
+  // block without a terminator, we patch any block we see though and
+  // let mlir's SwitchOp verifier enforce rules.
+  auto terminateCaseRegion = [&](mlir::Region &r, mlir::Location loc) {
+    if (r.empty())
+      return;
+
+    SmallVector<mlir::Block *, 4> eraseBlocks;
+    unsigned numBlocks = r.getBlocks().size();
+    for (auto &block : r.getBlocks()) {
+      // Already cleanup after return operations, which might create
+      // empty blocks if emitted as last stmt.
+      if (numBlocks != 1 && block.empty() && block.hasNoPredecessors() &&
+          block.hasNoSuccessors())
+        eraseBlocks.push_back(&block);
+
+      if (block.empty() ||
+          !block.back().hasTrait<mlir::OpTrait::IsTerminator>()) {
+        mlir::OpBuilder::InsertionGuard guardCase(builder);
+        builder.setInsertionPointToEnd(&block);
+        builder.createYield(loc);
+      }
+    }
+
+    for (auto *b : eraseBlocks)
+      b->erase();
+  };
+
+  // Make sure all case regions are terminated by inserting fallthroughs
+  // when necessary.
+  // FIXME: find a better way to get accurante with location here.
+  for (auto &r : swop.getRegions())
+    terminateCaseRegion(r, swop.getLoc());
+  return mlir::success();
+}
+
+void CIRGenFunction::buildReturnOfRValue(mlir::Location loc, RValue RV,
+                                         QualType Ty) {
+  if (RV.isScalar()) {
+    builder.createStore(loc, RV.getScalarVal(), ReturnValue);
+  } else if (RV.isAggregate()) {
+    LValue Dest = makeAddrLValue(ReturnValue, Ty);
+    LValue Src = makeAddrLValue(RV.getAggregateAddress(), Ty);
+    buildAggregateCopy(Dest, Src, Ty, getOverlapForReturnValue());
+  } else {
+    llvm_unreachable("NYI");
+  }
+  buildBranchThroughCleanup(loc, ReturnBlock());
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp
new file mode 100644
index 000000000000..0c996156f71e
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp
@@ -0,0 +1,121 @@
+//===--- CIRGenStmtOpenMP.cpp - Emit MLIR Code from OpenMP Statements -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit OpenMP Stmt nodes as MLIR code.
+//
+//===----------------------------------------------------------------------===//
+#include "clang/AST/ASTFwd.h"
+#include "clang/AST/StmtIterator.h"
+#include "clang/AST/StmtOpenMP.h"
+#include "clang/Basic/OpenMPKinds.h"
+
+#include "CIRGenFunction.h"
+#include "CIRGenOpenMPRuntime.h"
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributeInterfaces.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Support/LogicalResult.h"
+
+using namespace cir;
+using namespace clang;
+using namespace mlir::omp;
+
+static void buildDependences(const OMPExecutableDirective &S,
+                             OMPTaskDataTy &Data) {
+
+  // First look for 'omp_all_memory' and add this first.
+  bool OmpAllMemory = false;
+  if (llvm::any_of(
+          S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) {
+            return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
+                   C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
+          })) {
+    OmpAllMemory = true;
+    // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
+    // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
+    // simplify.
+    OMPTaskDataTy::DependData &DD =
+        Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory,
+                                      /*IteratorExpr=*/nullptr);
+    // Add a nullptr Expr to simplify the codegen in emitDependData.
+    DD.DepExprs.push_back(nullptr);
+  }
+  // Add remaining dependences skipping any 'out' or 'inout' if they are
+  // overridden by 'omp_all_memory'.
+  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
+    OpenMPDependClauseKind Kind = C->getDependencyKind();
+    if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
+      continue;
+    if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
+      continue;
+    OMPTaskDataTy::DependData &DD =
+        Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
+    DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
+  }
+}
+
+mlir::LogicalResult
+CIRGenFunction::buildOMPParallelDirective(const OMPParallelDirective &S) {
+  mlir::LogicalResult res = mlir::success();
+  auto scopeLoc = getLoc(S.getSourceRange());
+  // Create a `omp.parallel` op.
+  auto parallelOp = builder.create<ParallelOp>(scopeLoc);
+  mlir::Block &block = parallelOp.getRegion().emplaceBlock();
+  mlir::OpBuilder::InsertionGuard guardCase(builder);
+  builder.setInsertionPointToEnd(&block);
+  // Create a scope for the OpenMP region.
+  builder.create<mlir::cir::ScopeOp>(
+      scopeLoc, /*scopeBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        LexicalScope lexScope{*this, scopeLoc, builder.getInsertionBlock()};
+        // Emit the body of the region.
+        if (buildStmt(S.getCapturedStmt(OpenMPDirectiveKind::OMPD_parallel)
+                          ->getCapturedStmt(),
+                      /*useCurrentScope=*/true)
+                .failed())
+          res = mlir::failure();
+      });
+  // Add the terminator for `omp.parallel`.
+  builder.create<TerminatorOp>(getLoc(S.getSourceRange().getEnd()));
+  return res;
+}
+
+mlir::LogicalResult
+CIRGenFunction::buildOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
+  mlir::LogicalResult res = mlir::success();
+  OMPTaskDataTy Data;
+  buildDependences(S, Data);
+  Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
+  CGM.getOpenMPRuntime().emitTaskWaitCall(builder, *this,
+                                          getLoc(S.getSourceRange()), Data);
+  return res;
+}
+mlir::LogicalResult
+CIRGenFunction::buildOMPTaskyieldDirective(const OMPTaskyieldDirective &S) {
+  mlir::LogicalResult res = mlir::success();
+  // Creation of an omp.taskyield operation
+  CGM.getOpenMPRuntime().emitTaskyieldCall(builder, *this,
+                                           getLoc(S.getSourceRange()));
+  return res;
+}
+
+mlir::LogicalResult
+CIRGenFunction::buildOMPBarrierDirective(const OMPBarrierDirective &S) {
+  mlir::LogicalResult res = mlir::success();
+  // Creation of an omp.barrier operation
+  CGM.getOpenMPRuntime().emitBarrierCall(builder, *this,
+                                         getLoc(S.getSourceRange()));
+  return res;
+}
\ No newline at end of file
diff --git a/clang/lib/CIR/CodeGen/CIRGenTBAA.cpp b/clang/lib/CIR/CodeGen/CIRGenTBAA.cpp
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/clang/lib/CIR/CodeGen/CIRGenTBAA.h b/clang/lib/CIR/CodeGen/CIRGenTBAA.h
new file mode 100644
index 000000000000..ab5ac9b575c0
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenTBAA.h
@@ -0,0 +1,28 @@
+//===--- CIRGenTBAA.h - TBAA information for LLVM CIRGen --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the code that manages TBAA information and defines the TBAA policy
+// for the optimizer to use.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENTBAA_H
+#define LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENTBAA_H
+
+namespace cir {
+
+// TBAAAccessInfo - Describes a memory access in terms of TBAA.
+struct TBAAAccessInfo {};
+
+/// CIRGenTBAA - This class organizes the cross-module state that is used while
+/// lowering AST types to LLVM types.
+class CIRGenTBAA {};
+
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypeCache.h b/clang/lib/CIR/CodeGen/CIRGenTypeCache.h
new file mode 100644
index 000000000000..e07e46be68e5
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenTypeCache.h
@@ -0,0 +1,134 @@
+//===--- CIRGenTypeCache.h - Commonly used LLVM types and info -*- C++ --*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This structure provides a set of common types useful during CIR emission.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CODEGENTYPECACHE_H
+#define LLVM_CLANG_LIB_CIR_CODEGENTYPECACHE_H
+
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Types.h"
+#include "clang/AST/CharUnits.h"
+#include "clang/Basic/AddressSpaces.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/MissingFeatures.h"
+
+namespace cir {
+
+/// This structure provides a set of types that are commonly used
+/// during IR emission. It's initialized once in CodeGenModule's
+/// constructor and then copied around into new CIRGenFunction's.
+struct CIRGenTypeCache {
+  CIRGenTypeCache() {}
+
+  /// void
+  mlir::cir::VoidType VoidTy;
+  // char, int, short, long
+  mlir::cir::IntType SInt8Ty, SInt16Ty, SInt32Ty, SInt64Ty;
+  // usigned char, unsigned, unsigned short, unsigned long
+  mlir::cir::IntType UInt8Ty, UInt16Ty, UInt32Ty, UInt64Ty;
+  /// half, bfloat, float, double, fp80
+  mlir::cir::FP16Type FP16Ty;
+  mlir::cir::BF16Type BFloat16Ty;
+  mlir::cir::SingleType FloatTy;
+  mlir::cir::DoubleType DoubleTy;
+  mlir::cir::FP80Type FP80Ty;
+
+  /// int
+  mlir::Type UIntTy;
+
+  /// char
+  mlir::Type UCharTy;
+
+  /// intptr_t, size_t, and ptrdiff_t, which we assume are the same size.
+  union {
+    mlir::Type UIntPtrTy;
+    mlir::Type SizeTy;
+  };
+
+  mlir::Type PtrDiffTy;
+
+  /// void* in address space 0
+  mlir::cir::PointerType VoidPtrTy;
+  mlir::cir::PointerType UInt8PtrTy;
+
+  /// void** in address space 0
+  union {
+    mlir::cir::PointerType VoidPtrPtrTy;
+    mlir::cir::PointerType UInt8PtrPtrTy;
+  };
+
+  /// void* in alloca address space
+  union {
+    mlir::cir::PointerType AllocaVoidPtrTy;
+    mlir::cir::PointerType AllocaInt8PtrTy;
+  };
+
+  /// void* in default globals address space
+  //   union {
+  //     mlir::cir::PointerType GlobalsVoidPtrTy;
+  //     mlir::cir::PointerType GlobalsInt8PtrTy;
+  //   };
+
+  /// void* in the address space for constant globals
+  //   mlir::cir::PointerType ConstGlobalsPtrTy;
+
+  /// The size and alignment of the builtin C type 'int'.  This comes
+  /// up enough in various ABI lowering tasks to be worth pre-computing.
+  //   union {
+  //     unsigned char IntSizeInBytes;
+  //     unsigned char IntAlignInBytes;
+  //   };
+  //   clang::CharUnits getIntSize() const {
+  //     return clang::CharUnits::fromQuantity(IntSizeInBytes);
+  //   }
+  //   clang::CharUnits getIntAlign() const {
+  //     return clang::CharUnits::fromQuantity(IntAlignInBytes);
+  //   }
+
+  /// The width of a pointer into the generic address space.
+  //   unsigned char PointerWidthInBits;
+
+  /// The size and alignment of a pointer into the generic address space.
+  union {
+    unsigned char PointerAlignInBytes;
+    unsigned char PointerSizeInBytes;
+  };
+
+  /// The size and alignment of size_t.
+  //   union {
+  //     unsigned char SizeSizeInBytes; // sizeof(size_t)
+  //     unsigned char SizeAlignInBytes;
+  //   };
+
+  mlir::cir::AddressSpaceAttr CIRAllocaAddressSpace;
+
+  //   clang::CharUnits getSizeSize() const {
+  //     return clang::CharUnits::fromQuantity(SizeSizeInBytes);
+  //   }
+  //   clang::CharUnits getSizeAlign() const {
+  //     return clang::CharUnits::fromQuantity(SizeAlignInBytes);
+  //   }
+  clang::CharUnits getPointerSize() const {
+    return clang::CharUnits::fromQuantity(PointerSizeInBytes);
+  }
+  clang::CharUnits getPointerAlign() const {
+    return clang::CharUnits::fromQuantity(PointerAlignInBytes);
+  }
+
+  mlir::cir::AddressSpaceAttr getCIRAllocaAddressSpace() const {
+    return CIRAllocaAddressSpace;
+  }
+};
+
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
new file mode 100644
index 000000000000..826aa234f948
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -0,0 +1,925 @@
+#include "CIRGenTypes.h"
+#include "CIRGenCall.h"
+#include "CIRGenFunctionInfo.h"
+#include "CIRGenModule.h"
+#include "TargetInfo.h"
+
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/GlobalDecl.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/FnInfoOpts.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace clang;
+using namespace cir;
+
+mlir::cir::CallingConv CIRGenTypes::ClangCallConvToCIRCallConv(clang::CallingConv CC) {
+  switch (CC) {
+  case CC_C:
+    return mlir::cir::CallingConv::C;
+  case CC_OpenCLKernel:
+    return CGM.getTargetCIRGenInfo().getOpenCLKernelCallingConv();
+  case CC_SpirFunction:
+    return mlir::cir::CallingConv::SpirFunction;
+  default:
+    llvm_unreachable("No other calling conventions implemented.");
+  }
+}
+
+CIRGenTypes::CIRGenTypes(CIRGenModule &cgm)
+    : Context(cgm.getASTContext()), Builder(cgm.getBuilder()), CGM{cgm},
+      Target(cgm.getTarget()), TheCXXABI(cgm.getCXXABI()),
+      TheABIInfo(cgm.getTargetCIRGenInfo().getABIInfo()) {
+  SkippedLayout = false;
+}
+
+CIRGenTypes::~CIRGenTypes() {
+  for (llvm::FoldingSet<CIRGenFunctionInfo>::iterator I = FunctionInfos.begin(),
+                                                      E = FunctionInfos.end();
+       I != E;)
+    delete &*I++;
+}
+
+// This is CIR's version of CIRGenTypes::addRecordTypeName
+std::string CIRGenTypes::getRecordTypeName(const clang::RecordDecl *recordDecl,
+                                           StringRef suffix) {
+  llvm::SmallString<256> typeName;
+  llvm::raw_svector_ostream outStream(typeName);
+
+  PrintingPolicy policy = recordDecl->getASTContext().getPrintingPolicy();
+  policy.SuppressInlineNamespace = false;
+
+  if (recordDecl->getIdentifier()) {
+    if (recordDecl->getDeclContext())
+      recordDecl->printQualifiedName(outStream, policy);
+    else
+      recordDecl->printName(outStream, policy);
+
+    // Ensure each template specialization has a unique name.
+    if (auto *templateSpecialization =
+            llvm::dyn_cast<ClassTemplateSpecializationDecl>(recordDecl)) {
+      outStream << '<';
+      const auto args = templateSpecialization->getTemplateArgs().asArray();
+      const auto printer = [&policy, &outStream](const TemplateArgument &arg) {
+        /// Print this template argument to the given output stream.
+        arg.print(policy, outStream, /*IncludeType=*/true);
+      };
+      llvm::interleaveComma(args, outStream, printer);
+      outStream << '>';
+    }
+
+  } else if (auto *typedefNameDecl = recordDecl->getTypedefNameForAnonDecl()) {
+    if (typedefNameDecl->getDeclContext())
+      typedefNameDecl->printQualifiedName(outStream, policy);
+    else
+      typedefNameDecl->printName(outStream);
+  } else {
+    outStream << Builder.getUniqueAnonRecordName();
+  }
+
+  if (!suffix.empty())
+    outStream << suffix;
+
+  return Builder.getUniqueRecordName(std::string(typeName));
+}
+
+/// Return true if the specified type is already completely laid out.
+bool CIRGenTypes::isRecordLayoutComplete(const Type *Ty) const {
+  llvm::DenseMap<const Type *, mlir::cir::StructType>::const_iterator I =
+      recordDeclTypes.find(Ty);
+  return I != recordDeclTypes.end() && I->second.isComplete();
+}
+
+static bool
+isSafeToConvert(QualType T, CIRGenTypes &CGT,
+                llvm::SmallPtrSet<const RecordDecl *, 16> &AlreadyChecked);
+
+/// Return true if it is safe to convert the specified record decl to IR and lay
+/// it out, false if doing so would cause us to get into a recursive compilation
+/// mess.
+static bool
+isSafeToConvert(const RecordDecl *RD, CIRGenTypes &CGT,
+                llvm::SmallPtrSet<const RecordDecl *, 16> &AlreadyChecked) {
+  // If we have already checked this type (maybe the same type is used by-value
+  // multiple times in multiple structure fields, don't check again.
+  if (!AlreadyChecked.insert(RD).second)
+    return true;
+
+  const Type *Key = CGT.getContext().getTagDeclType(RD).getTypePtr();
+
+  // If this type is already laid out, converting it is a noop.
+  if (CGT.isRecordLayoutComplete(Key))
+    return true;
+
+  // If this type is currently being laid out, we can't recursively compile it.
+  if (CGT.isRecordBeingLaidOut(Key))
+    return false;
+
+  // If this type would require laying out bases that are currently being laid
+  // out, don't do it.  This includes virtual base classes which get laid out
+  // when a class is translated, even though they aren't embedded by-value into
+  // the class.
+  if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) {
+    for (const auto &I : CRD->bases())
+      if (!isSafeToConvert(I.getType()->castAs<RecordType>()->getDecl(), CGT,
+                           AlreadyChecked))
+        return false;
+  }
+
+  // If this type would require laying out members that are currently being laid
+  // out, don't do it.
+  for (const auto *I : RD->fields())
+    if (!isSafeToConvert(I->getType(), CGT, AlreadyChecked))
+      return false;
+
+  // If there are no problems, lets do it.
+  return true;
+}
+
+/// Return true if it is safe to convert this field type, which requires the
+/// structure elements contained by-value to all be recursively safe to convert.
+static bool
+isSafeToConvert(QualType T, CIRGenTypes &CGT,
+                llvm::SmallPtrSet<const RecordDecl *, 16> &AlreadyChecked) {
+  // Strip off atomic type sugar.
+  if (const auto *AT = T->getAs<AtomicType>())
+    T = AT->getValueType();
+
+  // If this is a record, check it.
+  if (const auto *RT = T->getAs<RecordType>())
+    return isSafeToConvert(RT->getDecl(), CGT, AlreadyChecked);
+
+  // If this is an array, check the elements, which are embedded inline.
+  if (const auto *AT = CGT.getContext().getAsArrayType(T))
+    return isSafeToConvert(AT->getElementType(), CGT, AlreadyChecked);
+
+  // Otherwise, there is no concern about transforming this. We only care about
+  // things that are contained by-value in a structure that can have another
+  // structure as a member.
+  return true;
+}
+
+// Return true if it is safe to convert the specified record decl to CIR and lay
+// it out, false if doing so would cause us to get into a recursive compilation
+// mess.
+static bool isSafeToConvert(const RecordDecl *RD, CIRGenTypes &CGT) {
+  // If no structs are being laid out, we can certainly do this one.
+  if (CGT.noRecordsBeingLaidOut())
+    return true;
+
+  llvm::SmallPtrSet<const RecordDecl *, 16> AlreadyChecked;
+  return isSafeToConvert(RD, CGT, AlreadyChecked);
+}
+
+/// Lay out a tagged decl type like struct or union.
+mlir::Type CIRGenTypes::convertRecordDeclType(const clang::RecordDecl *RD) {
+  // TagDecl's are not necessarily unique, instead use the (clang) type
+  // connected to the decl.
+  const auto *key = Context.getTagDeclType(RD).getTypePtr();
+  mlir::cir::StructType entry = recordDeclTypes[key];
+
+  // Handle forward decl / incomplete types.
+  if (!entry) {
+    auto name = getRecordTypeName(RD, "");
+    entry = Builder.getIncompleteStructTy(name, RD);
+    recordDeclTypes[key] = entry;
+  }
+
+  RD = RD->getDefinition();
+  if (!RD || !RD->isCompleteDefinition() || entry.isComplete())
+    return entry;
+
+  // If converting this type would cause us to infinitely loop, don't do it!
+  if (!isSafeToConvert(RD, *this)) {
+    DeferredRecords.push_back(RD);
+    return entry;
+  }
+
+  // Okay, this is a definition of a type. Compile the implementation now.
+  bool InsertResult = RecordsBeingLaidOut.insert(key).second;
+  (void)InsertResult;
+  assert(InsertResult && "Recursively compiling a struct?");
+
+  // Force conversion of non-virtual base classes recursively.
+  if (const auto *cxxRecordDecl = dyn_cast<CXXRecordDecl>(RD)) {
+    for (const auto &I : cxxRecordDecl->bases()) {
+      if (I.isVirtual())
+        continue;
+      convertRecordDeclType(I.getType()->castAs<RecordType>()->getDecl());
+    }
+  }
+
+  // Layout fields.
+  std::unique_ptr<CIRGenRecordLayout> Layout = computeRecordLayout(RD, &entry);
+  recordDeclTypes[key] = entry;
+  CIRGenRecordLayouts[key] = std::move(Layout);
+
+  // We're done laying out this struct.
+  bool EraseResult = RecordsBeingLaidOut.erase(key);
+  (void)EraseResult;
+  assert(EraseResult && "struct not in RecordsBeingLaidOut set?");
+
+  // If this struct blocked a FunctionType conversion, then recompute whatever
+  // was derived from that.
+  // FIXME: This is hugely overconservative.
+  if (SkippedLayout)
+    TypeCache.clear();
+
+  // If we're done converting the outer-most record, then convert any deferred
+  // structs as well.
+  if (RecordsBeingLaidOut.empty())
+    while (!DeferredRecords.empty())
+      convertRecordDeclType(DeferredRecords.pop_back_val());
+
+  return entry;
+}
+
+mlir::Type CIRGenTypes::convertTypeForMem(clang::QualType qualType,
+                                          bool forBitField) {
+  assert(!qualType->isConstantMatrixType() && "Matrix types NYI");
+
+  mlir::Type convertedType = ConvertType(qualType);
+
+  assert(!forBitField && "Bit fields NYI");
+
+  // If this is a bit-precise integer type in a bitfield representation, map
+  // this integer to the target-specified size.
+  if (forBitField && qualType->isBitIntType())
+    assert(!qualType->isBitIntType() && "Bit field with type _BitInt NYI");
+
+  return convertedType;
+}
+
+mlir::MLIRContext &CIRGenTypes::getMLIRContext() const {
+  return *Builder.getContext();
+}
+
+mlir::Type CIRGenTypes::ConvertFunctionTypeInternal(QualType QFT) {
+  assert(QFT.isCanonical());
+  const Type *Ty = QFT.getTypePtr();
+  const FunctionType *FT = cast<FunctionType>(QFT.getTypePtr());
+  // First, check whether we can build the full fucntion type. If the function
+  // type depends on an incomplete type (e.g. a struct or enum), we cannot lower
+  // the function type.
+  assert(isFuncTypeConvertible(FT) && "NYI");
+
+  // The function type can be built; call the appropriate routines to build it
+  const CIRGenFunctionInfo *FI;
+  if (const auto *FPT = dyn_cast<FunctionProtoType>(FT)) {
+    FI = &arrangeFreeFunctionType(
+        CanQual<FunctionProtoType>::CreateUnsafe(QualType(FPT, 0)));
+  } else {
+    const FunctionNoProtoType *FNPT = cast<FunctionNoProtoType>(FT);
+    FI = &arrangeFreeFunctionType(
+        CanQual<FunctionNoProtoType>::CreateUnsafe(QualType(FNPT, 0)));
+  }
+
+  mlir::Type ResultType = nullptr;
+  // If there is something higher level prodding our CIRGenFunctionInfo, then
+  // don't recurse into it again.
+  assert(!FunctionsBeingProcessed.count(FI) && "NYI");
+
+  // Otherwise, we're good to go, go ahead and convert it.
+  ResultType = GetFunctionType(*FI);
+
+  RecordsBeingLaidOut.erase(Ty);
+
+  assert(!SkippedLayout && "Shouldn't have skipped anything yet");
+
+  if (RecordsBeingLaidOut.empty())
+    while (!DeferredRecords.empty())
+      convertRecordDeclType(DeferredRecords.pop_back_val());
+
+  return ResultType;
+}
+
+/// Return true if the specified type in a function parameter or result position
+/// can be converted to a CIR type at this point. This boils down to being
+/// whether it is complete, as well as whether we've temporarily deferred
+/// expanding the type because we're in a recursive context.
+bool CIRGenTypes::isFuncParamTypeConvertible(clang::QualType Ty) {
+  // Some ABIs cannot have their member pointers represented in LLVM IR unless
+  // certain circumstances have been reached.
+  assert(!Ty->getAs<MemberPointerType>() && "NYI");
+
+  // If this isn't a tagged type, we can convert it!
+  const TagType *TT = Ty->getAs<TagType>();
+  if (!TT)
+    return true;
+
+  // Incomplete types cannot be converted.
+  if (TT->isIncompleteType())
+    return false;
+
+  // If this is an enum, then it is always safe to convert.
+  const RecordType *RT = dyn_cast<RecordType>(TT);
+  if (!RT)
+    return true;
+
+  // Otherwise, we have to be careful.  If it is a struct that we're in the
+  // process of expanding, then we can't convert the function type.  That's ok
+  // though because we must be in a pointer context under the struct, so we can
+  // just convert it to a dummy type.
+  //
+  // We decide this by checking whether ConvertRecordDeclType returns us an
+  // opaque type for a struct that we know is defined.
+  return isSafeToConvert(RT->getDecl(), *this);
+}
+
+/// Code to verify a given function type is complete, i.e. the return type and
+/// all of the parameter types are complete. Also check to see if we are in a
+/// RS_StructPointer context, and if so whether any struct types have been
+/// pended. If so, we don't want to ask the ABI lowering code to handle a type
+/// that cannot be converted to a CIR type.
+bool CIRGenTypes::isFuncTypeConvertible(const FunctionType *FT) {
+  if (!isFuncParamTypeConvertible(FT->getReturnType()))
+    return false;
+
+  if (const auto *FPT = dyn_cast<FunctionProtoType>(FT))
+    for (unsigned i = 0, e = FPT->getNumParams(); i != e; i++)
+      if (!isFuncParamTypeConvertible(FPT->getParamType(i)))
+        return false;
+
+  return true;
+}
+
+/// ConvertType - Convert the specified type to its MLIR form.
+mlir::Type CIRGenTypes::ConvertType(QualType T) {
+  T = Context.getCanonicalType(T);
+  const Type *Ty = T.getTypePtr();
+
+  // For the device-side compilation, CUDA device builtin surface/texture types
+  // may be represented in different types.
+  assert(!Context.getLangOpts().CUDAIsDevice && "not implemented");
+
+  if (const auto *recordType = dyn_cast<RecordType>(T))
+    return convertRecordDeclType(recordType->getDecl());
+
+  // See if type is already cached.
+  TypeCacheTy::iterator TCI = TypeCache.find(Ty);
+  // If type is found in map then use it. Otherwise, convert type T.
+  if (TCI != TypeCache.end())
+    return TCI->second;
+
+  // If we don't have it in the cache, convert it now.
+  mlir::Type ResultType = nullptr;
+  switch (Ty->getTypeClass()) {
+  case Type::Record: // Handled above.
+#define TYPE(Class, Base)
+#define ABSTRACT_TYPE(Class, Base)
+#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
+#define DEPENDENT_TYPE(Class, Base) case Type::Class:
+#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
+#include "clang/AST/TypeNodes.inc"
+    llvm_unreachable("Non-canonical or dependent types aren't possible.");
+
+  case Type::ArrayParameter:
+    llvm_unreachable("NYI");
+
+  case Type::Builtin: {
+    switch (cast<BuiltinType>(Ty)->getKind()) {
+    case BuiltinType::SveBoolx2:
+    case BuiltinType::SveBoolx4:
+    case BuiltinType::SveCount:
+      llvm_unreachable("NYI");
+    case BuiltinType::Void:
+      // TODO(cir): how should we model this?
+      ResultType = CGM.VoidTy;
+      break;
+
+    case BuiltinType::ObjCId:
+    case BuiltinType::ObjCClass:
+    case BuiltinType::ObjCSel:
+      // TODO(cir): probably same as BuiltinType::Void
+      assert(0 && "not implemented");
+      break;
+
+    case BuiltinType::Bool:
+      ResultType = ::mlir::cir::BoolType::get(Builder.getContext());
+      break;
+
+    // Signed types.
+    case BuiltinType::Accum:
+    case BuiltinType::Char_S:
+    case BuiltinType::Fract:
+    case BuiltinType::Int:
+    case BuiltinType::Long:
+    case BuiltinType::LongAccum:
+    case BuiltinType::LongFract:
+    case BuiltinType::LongLong:
+    case BuiltinType::SChar:
+    case BuiltinType::Short:
+    case BuiltinType::ShortAccum:
+    case BuiltinType::ShortFract:
+    case BuiltinType::WChar_S:
+    // Saturated signed types.
+    case BuiltinType::SatAccum:
+    case BuiltinType::SatFract:
+    case BuiltinType::SatLongAccum:
+    case BuiltinType::SatLongFract:
+    case BuiltinType::SatShortAccum:
+    case BuiltinType::SatShortFract:
+      ResultType =
+          mlir::cir::IntType::get(Builder.getContext(), Context.getTypeSize(T),
+                                  /*isSigned=*/true);
+      break;
+    // Unsigned types.
+    case BuiltinType::Char16:
+    case BuiltinType::Char32:
+    case BuiltinType::Char8:
+    case BuiltinType::Char_U:
+    case BuiltinType::UAccum:
+    case BuiltinType::UChar:
+    case BuiltinType::UFract:
+    case BuiltinType::UInt:
+    case BuiltinType::ULong:
+    case BuiltinType::ULongAccum:
+    case BuiltinType::ULongFract:
+    case BuiltinType::ULongLong:
+    case BuiltinType::UShort:
+    case BuiltinType::UShortAccum:
+    case BuiltinType::UShortFract:
+    case BuiltinType::WChar_U:
+    // Saturated unsigned types.
+    case BuiltinType::SatUAccum:
+    case BuiltinType::SatUFract:
+    case BuiltinType::SatULongAccum:
+    case BuiltinType::SatULongFract:
+    case BuiltinType::SatUShortAccum:
+    case BuiltinType::SatUShortFract:
+      ResultType =
+          mlir::cir::IntType::get(Builder.getContext(), Context.getTypeSize(T),
+                                  /*isSigned=*/false);
+      break;
+
+    case BuiltinType::Float16:
+      ResultType = CGM.FP16Ty;
+      break;
+    case BuiltinType::Half:
+      // Should be the same as above?
+      assert(0 && "not implemented");
+      break;
+    case BuiltinType::BFloat16:
+      ResultType = CGM.BFloat16Ty;
+      break;
+    case BuiltinType::Float:
+      ResultType = CGM.FloatTy;
+      break;
+    case BuiltinType::Double:
+      ResultType = CGM.DoubleTy;
+      break;
+    case BuiltinType::LongDouble:
+      ResultType = Builder.getLongDoubleTy(Context.getFloatTypeSemantics(T));
+      break;
+    case BuiltinType::Float128:
+    case BuiltinType::Ibm128:
+      // FIXME: look at Context.getFloatTypeSemantics(T) and getTypeForFormat
+      // on LLVM codegen.
+      assert(0 && "not implemented");
+      break;
+
+    case BuiltinType::NullPtr:
+      // Add proper CIR type for it? this looks mostly useful for sema related
+      // things (like for overloads accepting void), for now, given that
+      // `sizeof(std::nullptr_t)` is equal to `sizeof(void *)`, model
+      // std::nullptr_t as !cir.ptr<!void>
+      ResultType = Builder.getVoidPtrTy();
+      break;
+
+    case BuiltinType::UInt128:
+    case BuiltinType::Int128:
+      assert(0 && "not implemented");
+      // FIXME: ResultType = Builder.getIntegerType(128);
+      break;
+
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix)                   \
+  case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) case BuiltinType::Id:
+#include "clang/Basic/OpenCLExtensionTypes.def"
+    case BuiltinType::OCLSampler:
+    case BuiltinType::OCLEvent:
+    case BuiltinType::OCLClkEvent:
+    case BuiltinType::OCLQueue:
+    case BuiltinType::OCLReserveID:
+      assert(0 && "not implemented");
+      break;
+    case BuiltinType::SveInt8:
+    case BuiltinType::SveUint8:
+    case BuiltinType::SveInt8x2:
+    case BuiltinType::SveUint8x2:
+    case BuiltinType::SveInt8x3:
+    case BuiltinType::SveUint8x3:
+    case BuiltinType::SveInt8x4:
+    case BuiltinType::SveUint8x4:
+    case BuiltinType::SveInt16:
+    case BuiltinType::SveUint16:
+    case BuiltinType::SveInt16x2:
+    case BuiltinType::SveUint16x2:
+    case BuiltinType::SveInt16x3:
+    case BuiltinType::SveUint16x3:
+    case BuiltinType::SveInt16x4:
+    case BuiltinType::SveUint16x4:
+    case BuiltinType::SveInt32:
+    case BuiltinType::SveUint32:
+    case BuiltinType::SveInt32x2:
+    case BuiltinType::SveUint32x2:
+    case BuiltinType::SveInt32x3:
+    case BuiltinType::SveUint32x3:
+    case BuiltinType::SveInt32x4:
+    case BuiltinType::SveUint32x4:
+    case BuiltinType::SveInt64:
+    case BuiltinType::SveUint64:
+    case BuiltinType::SveInt64x2:
+    case BuiltinType::SveUint64x2:
+    case BuiltinType::SveInt64x3:
+    case BuiltinType::SveUint64x3:
+    case BuiltinType::SveInt64x4:
+    case BuiltinType::SveUint64x4:
+    case BuiltinType::SveBool:
+    case BuiltinType::SveFloat16:
+    case BuiltinType::SveFloat16x2:
+    case BuiltinType::SveFloat16x3:
+    case BuiltinType::SveFloat16x4:
+    case BuiltinType::SveFloat32:
+    case BuiltinType::SveFloat32x2:
+    case BuiltinType::SveFloat32x3:
+    case BuiltinType::SveFloat32x4:
+    case BuiltinType::SveFloat64:
+    case BuiltinType::SveFloat64x2:
+    case BuiltinType::SveFloat64x3:
+    case BuiltinType::SveFloat64x4:
+    case BuiltinType::SveBFloat16:
+    case BuiltinType::SveBFloat16x2:
+    case BuiltinType::SveBFloat16x3:
+    case BuiltinType::SveBFloat16x4: {
+      assert(0 && "not implemented");
+      break;
+    }
+#define PPC_VECTOR_TYPE(Name, Id, Size)                                        \
+  case BuiltinType::Id:                                                        \
+    assert(0 && "not implemented");                                            \
+    break;
+#include "clang/Basic/PPCTypes.def"
+#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/RISCVVTypes.def"
+      {
+        assert(0 && "not implemented");
+        break;
+      }
+#define WASM_REF_TYPE(Name, MangledName, Id, SingletonId, AS)                  \
+  case BuiltinType::Id: {                                                      \
+    llvm_unreachable("NYI");                                                   \
+  } break;
+#include "clang/Basic/WebAssemblyReferenceTypes.def"
+#define AMDGPU_OPAQUE_PTR_TYPE(Name, MangledName, AS, Width, Align, Id,        \
+                               SingletonId)                                    \
+  case BuiltinType::Id:                                                        \
+    llvm_unreachable("NYI");
+#include "clang/Basic/AMDGPUTypes.def"
+    case BuiltinType::Dependent:
+#define BUILTIN_TYPE(Id, SingletonId)
+#define PLACEHOLDER_TYPE(Id, SingletonId) case BuiltinType::Id:
+#include "clang/AST/BuiltinTypes.def"
+      llvm_unreachable("Unexpected placeholder builtin type!");
+    }
+    break;
+  }
+  case Type::Auto:
+  case Type::DeducedTemplateSpecialization:
+    llvm_unreachable("Unexpected undeduced type!");
+  case Type::Complex: {
+    const ComplexType *CT = cast<ComplexType>(Ty);
+    auto ElementTy = ConvertType(CT->getElementType());
+    ResultType = ::mlir::cir::ComplexType::get(Builder.getContext(), ElementTy);
+    break;
+  }
+  case Type::LValueReference:
+  case Type::RValueReference: {
+    const ReferenceType *RTy = cast<ReferenceType>(Ty);
+    QualType ETy = RTy->getPointeeType();
+    auto PointeeType = convertTypeForMem(ETy);
+    ResultType = Builder.getPointerTo(PointeeType, ETy.getAddressSpace());
+    assert(ResultType && "Cannot get pointer type?");
+    break;
+  }
+  case Type::Pointer: {
+    const PointerType *PTy = cast<PointerType>(Ty);
+    QualType ETy = PTy->getPointeeType();
+    assert(!ETy->isConstantMatrixType() && "not implemented");
+
+    mlir::Type PointeeType = ConvertType(ETy);
+
+    // Treat effectively as a *i8.
+    // if (PointeeType->isVoidTy())
+    //  PointeeType = Builder.getI8Type();
+
+    ResultType = Builder.getPointerTo(PointeeType, ETy.getAddressSpace());
+    assert(ResultType && "Cannot get pointer type?");
+    break;
+  }
+
+  case Type::VariableArray: {
+    const VariableArrayType *A = cast<VariableArrayType>(Ty);
+    assert(A->getIndexTypeCVRQualifiers() == 0 &&
+           "FIXME: We only handle trivial array types so far!");
+    // VLAs resolve to the innermost element type; this matches
+    // the return of alloca, and there isn't any obviously better choice.
+    ResultType = convertTypeForMem(A->getElementType());
+    break;
+  }
+  case Type::IncompleteArray: {
+    const IncompleteArrayType *A = cast<IncompleteArrayType>(Ty);
+    assert(A->getIndexTypeCVRQualifiers() == 0 &&
+           "FIXME: We only handle trivial array types so far!");
+    // int X[] -> [0 x int], unless the element type is not sized.  If it is
+    // unsized (e.g. an incomplete struct) just use [0 x i8].
+    ResultType = convertTypeForMem(A->getElementType());
+    if (!Builder.isSized(ResultType)) {
+      SkippedLayout = true;
+      ResultType = Builder.getUInt8Ty();
+    }
+    ResultType = Builder.getArrayType(ResultType, 0);
+    break;
+  }
+  case Type::ConstantArray: {
+    const ConstantArrayType *A = cast<ConstantArrayType>(Ty);
+    auto EltTy = convertTypeForMem(A->getElementType());
+
+    // FIXME: In LLVM, "lower arrays of undefined struct type to arrays of
+    // i8 just to have a concrete type". Not sure this makes sense in CIR yet.
+    assert(Builder.isSized(EltTy) && "not implemented");
+    ResultType = ::mlir::cir::ArrayType::get(Builder.getContext(), EltTy,
+                                             A->getSize().getZExtValue());
+    break;
+  }
+  case Type::ExtVector:
+  case Type::Vector: {
+    const VectorType *V = cast<VectorType>(Ty);
+    auto ElementType = convertTypeForMem(V->getElementType());
+    ResultType = ::mlir::cir::VectorType::get(Builder.getContext(), ElementType,
+                                              V->getNumElements());
+    break;
+  }
+  case Type::ConstantMatrix: {
+    assert(0 && "not implemented");
+    break;
+  }
+  case Type::FunctionNoProto:
+  case Type::FunctionProto:
+    ResultType = ConvertFunctionTypeInternal(T);
+    break;
+  case Type::ObjCObject:
+    assert(0 && "not implemented");
+    break;
+
+  case Type::ObjCInterface: {
+    assert(0 && "not implemented");
+    break;
+  }
+
+  case Type::ObjCObjectPointer: {
+    assert(0 && "not implemented");
+    break;
+  }
+
+  case Type::Enum: {
+    const EnumDecl *ED = cast<EnumType>(Ty)->getDecl();
+    if (ED->isCompleteDefinition() || ED->isFixed())
+      return ConvertType(ED->getIntegerType());
+    // Return a placeholder 'i32' type.  This can be changed later when the
+    // type is defined (see UpdateCompletedType), but is likely to be the
+    // "right" answer.
+    ResultType = CGM.UInt32Ty;
+    break;
+  }
+
+  case Type::BlockPointer: {
+    assert(0 && "not implemented");
+    break;
+  }
+
+  case Type::MemberPointer: {
+    const auto *MPT = cast<MemberPointerType>(Ty);
+
+    auto memberTy = ConvertType(MPT->getPointeeType());
+    auto clsTy = mlir::cast<mlir::cir::StructType>(
+        ConvertType(QualType(MPT->getClass(), 0)));
+    if (MPT->isMemberDataPointer())
+      ResultType =
+          mlir::cir::DataMemberType::get(Builder.getContext(), memberTy, clsTy);
+    else {
+      auto memberFuncTy = mlir::cast<mlir::cir::FuncType>(memberTy);
+      ResultType =
+          mlir::cir::MethodType::get(Builder.getContext(), memberFuncTy, clsTy);
+    }
+    break;
+  }
+
+  case Type::Atomic: {
+    QualType valueType = cast<AtomicType>(Ty)->getValueType();
+    ResultType = convertTypeForMem(valueType);
+
+    // Pad out to the inflated size if necessary.
+    uint64_t valueSize = Context.getTypeSize(valueType);
+    uint64_t atomicSize = Context.getTypeSize(Ty);
+    if (valueSize != atomicSize) {
+      llvm_unreachable("NYI");
+    }
+    break;
+  }
+  case Type::Pipe: {
+    assert(0 && "not implemented");
+    break;
+  }
+  case Type::BitInt: {
+    const auto *bitIntTy = cast<BitIntType>(Ty);
+    ResultType = mlir::cir::IntType::get(
+        Builder.getContext(), bitIntTy->getNumBits(), bitIntTy->isSigned());
+    break;
+  }
+  }
+
+  assert(ResultType && "Didn't convert a type?");
+
+  TypeCache[Ty] = ResultType;
+  return ResultType;
+}
+
+const CIRGenFunctionInfo &CIRGenTypes::arrangeCIRFunctionInfo(
+    CanQualType resultType, FnInfoOpts opts,
+    llvm::ArrayRef<CanQualType> argTypes, FunctionType::ExtInfo info,
+    llvm::ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
+    RequiredArgs required) {
+  assert(llvm::all_of(argTypes,
+                      [](CanQualType T) { return T.isCanonicalAsParam(); }));
+  bool instanceMethod = opts == FnInfoOpts::IsInstanceMethod;
+  bool chainCall = opts == FnInfoOpts::IsChainCall;
+
+  // Lookup or create unique function info.
+  llvm::FoldingSetNodeID ID;
+  CIRGenFunctionInfo::Profile(ID, instanceMethod, chainCall, info, paramInfos,
+                              required, resultType, argTypes);
+
+  void *insertPos = nullptr;
+  CIRGenFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos);
+  if (FI)
+    return *FI;
+
+  mlir::cir::CallingConv CC = ClangCallConvToCIRCallConv(info.getCC());
+
+  // Construction the function info. We co-allocate the ArgInfos.
+  FI = CIRGenFunctionInfo::create(CC, instanceMethod, chainCall, info,
+                                  paramInfos, resultType, argTypes, required);
+  FunctionInfos.InsertNode(FI, insertPos);
+
+  bool inserted = FunctionsBeingProcessed.insert(FI).second;
+  (void)inserted;
+  assert(inserted && "Recursively being processed?");
+
+  // Compute ABI information.
+  if (CC == mlir::cir::CallingConv::SpirKernel) {
+    // Force target independent argument handling for the host visible
+    // kernel functions.
+    computeSPIRKernelABIInfo(CGM, *FI);
+  } else if (info.getCC() == CC_Swift || info.getCC() == CC_SwiftAsync) {
+    llvm_unreachable("Swift NYI");
+  } else {
+    getABIInfo().computeInfo(*FI);
+  }
+
+  // Loop over all of the computed argument and return value info. If any of
+  // them are direct or extend without a specified coerce type, specify the
+  // default now.
+  ABIArgInfo &retInfo = FI->getReturnInfo();
+  if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == nullptr)
+    retInfo.setCoerceToType(ConvertType(FI->getReturnType()));
+
+  for (auto &I : FI->arguments())
+    if (I.info.canHaveCoerceToType() && I.info.getCoerceToType() == nullptr)
+      I.info.setCoerceToType(ConvertType(I.type));
+
+  bool erased = FunctionsBeingProcessed.erase(FI);
+  (void)erased;
+  assert(erased && "Not in set?");
+
+  return *FI;
+}
+
+const CIRGenFunctionInfo &CIRGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
+  assert(!dyn_cast<ObjCMethodDecl>(GD.getDecl()) &&
+         "This is reported as a FIXME in LLVM codegen");
+  const auto *FD = cast<FunctionDecl>(GD.getDecl());
+
+  if (isa<CXXConstructorDecl>(GD.getDecl()) ||
+      isa<CXXDestructorDecl>(GD.getDecl()))
+    return arrangeCXXStructorDeclaration(GD);
+
+  return arrangeFunctionDeclaration(FD);
+}
+
+// When we find the full definition for a TagDecl, replace the 'opaque' type we
+// previously made for it if applicable.
+void CIRGenTypes::UpdateCompletedType(const TagDecl *TD) {
+  // If this is an enum being completed, then we flush all non-struct types
+  // from the cache. This allows function types and other things that may be
+  // derived from the enum to be recomputed.
+  if (const auto *ED = dyn_cast<EnumDecl>(TD)) {
+    // Only flush the cache if we've actually already converted this type.
+    if (TypeCache.count(ED->getTypeForDecl())) {
+      // Okay, we formed some types based on this.  We speculated that the enum
+      // would be lowered to i32, so we only need to flush the cache if this
+      // didn't happen.
+      if (!ConvertType(ED->getIntegerType()).isInteger(32))
+        TypeCache.clear();
+    }
+    // If necessary, provide the full definition of a type only used with a
+    // declaration so far.
+    assert(!MissingFeatures::generateDebugInfo());
+    return;
+  }
+
+  // If we completed a RecordDecl that we previously used and converted to an
+  // anonymous type, then go ahead and complete it now.
+  const auto *RD = cast<RecordDecl>(TD);
+  if (RD->isDependentType())
+    return;
+
+  // Only complete if we converted it already. If we haven't converted it yet,
+  // we'll just do it lazily.
+  if (recordDeclTypes.count(Context.getTagDeclType(RD).getTypePtr()))
+    convertRecordDeclType(RD);
+
+  // If necessary, provide the full definition of a type only used with a
+  // declaration so far.
+  if (CGM.getModuleDebugInfo())
+    llvm_unreachable("NYI");
+}
+
+/// Return record layout info for the given record decl.
+const CIRGenRecordLayout &
+CIRGenTypes::getCIRGenRecordLayout(const RecordDecl *RD) {
+  const auto *Key = Context.getTagDeclType(RD).getTypePtr();
+
+  auto I = CIRGenRecordLayouts.find(Key);
+  if (I != CIRGenRecordLayouts.end())
+    return *I->second;
+
+  // Compute the type information.
+  convertRecordDeclType(RD);
+
+  // Now try again.
+  I = CIRGenRecordLayouts.find(Key);
+
+  assert(I != CIRGenRecordLayouts.end() &&
+         "Unable to find record layout information for type");
+  return *I->second;
+}
+
+bool CIRGenTypes::isPointerZeroInitializable(clang::QualType T) {
+  assert((T->isAnyPointerType() || T->isBlockPointerType()) && "Invalid type");
+  return isZeroInitializable(T);
+}
+
+bool CIRGenTypes::isZeroInitializable(QualType T) {
+  if (T->getAs<PointerType>())
+    return Context.getTargetNullPointerValue(T) == 0;
+
+  if (const auto *AT = Context.getAsArrayType(T)) {
+    if (isa<IncompleteArrayType>(AT))
+      return true;
+    if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
+      if (Context.getConstantArrayElementCount(CAT) == 0)
+        return true;
+    T = Context.getBaseElementType(T);
+  }
+
+  // Records are non-zero-initializable if they contain any
+  // non-zero-initializable subobjects.
+  if (const RecordType *RT = T->getAs<RecordType>()) {
+    const RecordDecl *RD = RT->getDecl();
+    return isZeroInitializable(RD);
+  }
+
+  // We have to ask the ABI about member pointers.
+  if (const MemberPointerType *MPT = T->getAs<MemberPointerType>())
+    llvm_unreachable("NYI");
+
+  // Everything else is okay.
+  return true;
+}
+
+bool CIRGenTypes::isZeroInitializable(const RecordDecl *RD) {
+  return getCIRGenRecordLayout(RD).isZeroInitializable();
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.h b/clang/lib/CIR/CodeGen/CIRGenTypes.h
new file mode 100644
index 000000000000..3bb5bafb194d
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.h
@@ -0,0 +1,281 @@
+//===--- CIRGenTypes.h - Type translation for CIR CodeGen -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the code that handles AST -> CIR type lowering.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_CODEGENTYPES_H
+#define LLVM_CLANG_LIB_CODEGEN_CODEGENTYPES_H
+
+#include "ABIInfo.h"
+#include "CIRGenCall.h"
+#include "CIRGenFunctionInfo.h"
+#include "CIRGenRecordLayout.h"
+
+#include "clang/AST/GlobalDecl.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/ABI.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/FnInfoOpts.h"
+
+#include "llvm/ADT/SmallPtrSet.h"
+
+#include "mlir/IR/MLIRContext.h"
+
+#include <utility>
+
+namespace llvm {
+class FunctionType;
+class DataLayout;
+class Type;
+class LLVMContext;
+class StructType;
+} // namespace llvm
+
+namespace clang {
+class ASTContext;
+template <typename> class CanQual;
+class CXXConstructorDecl;
+class CXXDestructorDecl;
+class CXXMethodDecl;
+class CodeGenOptions;
+class FieldDecl;
+class FunctionProtoType;
+class ObjCInterfaceDecl;
+class ObjCIvarDecl;
+class PointerType;
+class QualType;
+class RecordDecl;
+class TagDecl;
+class TargetInfo;
+class Type;
+typedef CanQual<Type> CanQualType;
+class GlobalDecl;
+
+} // end namespace clang
+
+namespace mlir {
+class Type;
+namespace cir {
+class StructType;
+} // namespace cir
+} // namespace mlir
+
+namespace cir {
+class CallArgList;
+class CIRGenCXXABI;
+class CIRGenModule;
+class CIRGenFunctionInfo;
+class CIRGenBuilderTy;
+
+/// This class organizes the cross-module state that is used while lowering
+/// AST types to CIR types.
+class CIRGenTypes {
+  clang::ASTContext &Context;
+  cir::CIRGenBuilderTy &Builder;
+  CIRGenModule &CGM;
+  const clang::TargetInfo &Target;
+  CIRGenCXXABI &TheCXXABI;
+
+  // This should not be moved earlier, since its initialization depends on some
+  // of the previous reference members being already initialized
+  const ABIInfo &TheABIInfo;
+
+  /// Contains the CIR type for any converted RecordDecl.
+  llvm::DenseMap<const clang::Type *, std::unique_ptr<CIRGenRecordLayout>>
+      CIRGenRecordLayouts;
+
+  /// Contains the CIR type for any converted RecordDecl
+  llvm::DenseMap<const clang::Type *, mlir::cir::StructType> recordDeclTypes;
+
+  /// Hold memoized CIRGenFunctionInfo results
+  llvm::FoldingSet<CIRGenFunctionInfo> FunctionInfos;
+
+  /// This set keeps track of records that we're currently converting to a CIR
+  /// type. For example, when converting:
+  /// struct A { struct B { int x; } } when processing 'x', the 'A' and 'B'
+  /// types will be in this set.
+  llvm::SmallPtrSet<const clang::Type *, 4> RecordsBeingLaidOut;
+
+  llvm::SmallPtrSet<const CIRGenFunctionInfo *, 4> FunctionsBeingProcessed;
+
+  /// True if we didn't layout a function due to being inside a recursive struct
+  /// conversion, set this to true.
+  bool SkippedLayout;
+
+  llvm::SmallVector<const clang::RecordDecl *, 8> DeferredRecords;
+
+  /// Heper for ConvertType.
+  mlir::Type ConvertFunctionTypeInternal(clang::QualType FT);
+
+public:
+  CIRGenTypes(CIRGenModule &cgm);
+  ~CIRGenTypes();
+
+  cir::CIRGenBuilderTy &getBuilder() const { return Builder; }
+  CIRGenModule &getModule() const { return CGM; }
+
+  /// Utility to check whether a function type can be converted to a CIR type
+  /// (i.e. doesn't depend on an incomplete tag type).
+  bool isFuncTypeConvertible(const clang::FunctionType *FT);
+  bool isFuncParamTypeConvertible(clang::QualType Ty);
+
+  /// Convert clang calling convention to CIR calling convention.
+  mlir::cir::CallingConv ClangCallConvToCIRCallConv(clang::CallingConv CC);
+
+  /// Derives the 'this' type for CIRGen purposes, i.e. ignoring method CVR
+  /// qualification.
+  clang::CanQualType DeriveThisType(const clang::CXXRecordDecl *RD,
+                                    const clang::CXXMethodDecl *MD);
+
+  /// This map keeps cache of llvm::Types and maps clang::Type to
+  /// corresponding llvm::Type.
+  using TypeCacheTy = llvm::DenseMap<const clang::Type *, mlir::Type>;
+  TypeCacheTy TypeCache;
+
+  clang::ASTContext &getContext() const { return Context; }
+  mlir::MLIRContext &getMLIRContext() const;
+
+  bool isRecordLayoutComplete(const clang::Type *Ty) const;
+  bool noRecordsBeingLaidOut() const { return RecordsBeingLaidOut.empty(); }
+  bool isRecordBeingLaidOut(const clang::Type *Ty) const {
+    return RecordsBeingLaidOut.count(Ty);
+  }
+
+  /// Return whether a type can be zero-initialized (in the C++ sense) with an
+  /// LLVM zeroinitializer.
+  bool isZeroInitializable(clang::QualType T);
+
+  /// Check if the pointer type can be zero-initialized (in the C++ sense)
+  /// with an LLVM zeroinitializer.
+  bool isPointerZeroInitializable(clang::QualType T);
+
+  /// Return whether a record type can be zero-initialized (in the C++ sense)
+  /// with an LLVM zeroinitializer.
+  bool isZeroInitializable(const clang::RecordDecl *RD);
+
+  const ABIInfo &getABIInfo() const { return TheABIInfo; }
+  CIRGenCXXABI &getCXXABI() const { return TheCXXABI; }
+
+  /// Convert type T into a mlir::Type.
+  mlir::Type ConvertType(clang::QualType T);
+
+  mlir::Type convertRecordDeclType(const clang::RecordDecl *recordDecl);
+
+  std::unique_ptr<CIRGenRecordLayout>
+  computeRecordLayout(const clang::RecordDecl *D, mlir::cir::StructType *Ty);
+
+  std::string getRecordTypeName(const clang::RecordDecl *,
+                                llvm::StringRef suffix);
+
+  /// Determine if a C++ inheriting constructor should have parameters matching
+  /// those of its inherited constructor.
+  bool inheritingCtorHasParams(const clang::InheritedConstructor &Inherited,
+                               clang::CXXCtorType Type);
+
+  const CIRGenRecordLayout &getCIRGenRecordLayout(const clang::RecordDecl *RD);
+
+  /// Convert type T into an mlir::Type. This differs from
+  /// convertType in that it is used to convert to the memory representation
+  /// for a type. For example, the scalar representation for _Bool is i1, but
+  /// the memory representation is usually i8 or i32, depending on the target.
+  // TODO: convert this comment to account for MLIR's equivalence
+  mlir::Type convertTypeForMem(clang::QualType, bool forBitField = false);
+
+  /// Get the CIR function type for \arg Info.
+  mlir::cir::FuncType GetFunctionType(const CIRGenFunctionInfo &Info);
+
+  mlir::cir::FuncType GetFunctionType(clang::GlobalDecl GD);
+
+  /// Get the LLVM function type for use in a vtable, given a CXXMethodDecl. If
+  /// the method to has an incomplete return type, and/or incomplete argument
+  /// types, this will return the opaque type.
+  mlir::cir::FuncType GetFunctionTypeForVTable(clang::GlobalDecl GD);
+
+  // The arrangement methods are split into three families:
+  //   - those meant to drive the signature and prologue/epilogue
+  //     of a function declaration or definition,
+  //   - those meant for the computation of the CIR type for an abstract
+  //     appearance of a function, and
+  //   - those meant for performing the CIR-generation of a call.
+  // They differ mainly in how they deal with optional (i.e. variadic)
+  // arguments, as well as unprototyped functions.
+  //
+  // Key points:
+  // - The CIRGenFunctionInfo for emitting a specific call site must include
+  //   entries for the optional arguments.
+  // - The function type used at the call site must reflect the formal
+  // signature
+  //   of the declaration being called, or else the call will go away.
+  // - For the most part, unprototyped functions are called by casting to a
+  //   formal signature inferred from the specific argument types used at the
+  //   call-site. However, some targets (e.g. x86-64) screw with this for
+  //   compatability reasons.
+
+  const CIRGenFunctionInfo &arrangeGlobalDeclaration(clang::GlobalDecl GD);
+
+  /// UpdateCompletedType - when we find the full definition for a TagDecl,
+  /// replace the 'opaque' type we previously made for it if applicable.
+  void UpdateCompletedType(const clang::TagDecl *TD);
+
+  /// Free functions are functions that are compatible with an ordinary C
+  /// function pointer type.
+  const CIRGenFunctionInfo &
+  arrangeFunctionDeclaration(const clang::FunctionDecl *FD);
+
+  const CIRGenFunctionInfo &
+  arrangeBuiltinFunctionCall(clang::QualType resultType,
+                             const CallArgList &args);
+
+  const CIRGenFunctionInfo &arrangeCXXConstructorCall(
+      const CallArgList &Args, const clang::CXXConstructorDecl *D,
+      clang::CXXCtorType CtorKind, unsigned ExtraPrefixArgs,
+      unsigned ExtraSuffixArgs, bool PassProtoArgs = true);
+
+  const CIRGenFunctionInfo &
+  arrangeCXXMethodCall(const CallArgList &args,
+                       const clang::FunctionProtoType *type,
+                       RequiredArgs required, unsigned numPrefixArgs);
+
+  /// C++ methods have some special rules and also have implicit parameters.
+  const CIRGenFunctionInfo &
+  arrangeCXXMethodDeclaration(const clang::CXXMethodDecl *MD);
+  const CIRGenFunctionInfo &arrangeCXXStructorDeclaration(clang::GlobalDecl GD);
+
+  const CIRGenFunctionInfo &
+  arrangeCXXMethodType(const clang::CXXRecordDecl *RD,
+                       const clang::FunctionProtoType *FTP,
+                       const clang::CXXMethodDecl *MD);
+
+  const CIRGenFunctionInfo &
+  arrangeFreeFunctionCall(const CallArgList &Args,
+                          const clang::FunctionType *Ty, bool ChainCall);
+
+  const CIRGenFunctionInfo &
+  arrangeFreeFunctionType(clang::CanQual<clang::FunctionProtoType> Ty);
+
+  const CIRGenFunctionInfo &
+  arrangeFreeFunctionType(clang::CanQual<clang::FunctionNoProtoType> FTNP);
+
+  /// "Arrange" the LLVM information for a call or type with the given
+  /// signature.  This is largely an internal method; other clients
+  /// should use one of the above routines, which ultimately defer to
+  /// this.
+  ///
+  /// \param argTypes - must all actually be canonical as params
+  const CIRGenFunctionInfo &arrangeCIRFunctionInfo(
+      clang::CanQualType returnType, FnInfoOpts opts,
+      llvm::ArrayRef<clang::CanQualType> argTypes,
+      clang::FunctionType::ExtInfo info,
+      llvm::ArrayRef<clang::FunctionProtoType::ExtParameterInfo> paramInfos,
+      RequiredArgs args);
+};
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenVTables.cpp b/clang/lib/CIR/CodeGen/CIRGenVTables.cpp
new file mode 100644
index 000000000000..3c2af8fbbfdf
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenVTables.cpp
@@ -0,0 +1,578 @@
+//===--- CIRGenVTables.cpp - Emit CIR Code for C++ vtables ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code dealing with C++ code generation of virtual tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenCXXABI.h"
+#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
+#include "mlir/IR/Attributes.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/CXXInheritance.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/AST/VTTBuilder.h"
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <algorithm>
+#include <cstdio>
+
+using namespace clang;
+using namespace cir;
+
+CIRGenVTables::CIRGenVTables(CIRGenModule &CGM)
+    : CGM(CGM), VTContext(CGM.getASTContext().getVTableContext()) {}
+
+static bool UseRelativeLayout(const CIRGenModule &CGM) {
+  return CGM.getTarget().getCXXABI().isItaniumFamily() &&
+         CGM.getItaniumVTableContext().isRelativeLayout();
+}
+
+bool CIRGenVTables::useRelativeLayout() const { return UseRelativeLayout(CGM); }
+
+mlir::Type CIRGenModule::getVTableComponentType() {
+  mlir::Type ptrTy = builder.getUInt8PtrTy();
+  if (UseRelativeLayout(*this))
+    ptrTy = builder.getUInt32PtrTy();
+  return ptrTy;
+}
+
+mlir::Type CIRGenVTables::getVTableComponentType() {
+  return CGM.getVTableComponentType();
+}
+
+mlir::Type CIRGenVTables::getVTableType(const VTableLayout &layout) {
+  SmallVector<mlir::Type, 4> tys;
+  auto ctx = CGM.getBuilder().getContext();
+  auto componentType = getVTableComponentType();
+  for (unsigned i = 0, e = layout.getNumVTables(); i != e; ++i)
+    tys.push_back(
+        mlir::cir::ArrayType::get(ctx, componentType, layout.getVTableSize(i)));
+
+  // FIXME(cir): should VTableLayout be encoded like we do for some
+  // AST nodes?
+  return CGM.getBuilder().getAnonStructTy(tys, /*incomplete=*/false);
+}
+
+/// At this point in the translation unit, does it appear that can we
+/// rely on the vtable being defined elsewhere in the program?
+///
+/// The response is really only definitive when called at the end of
+/// the translation unit.
+///
+/// The only semantic restriction here is that the object file should
+/// not contain a vtable definition when that vtable is defined
+/// strongly elsewhere.  Otherwise, we'd just like to avoid emitting
+/// vtables when unnecessary.
+/// TODO(cir): this should be merged into common AST helper for codegen.
+bool CIRGenVTables::isVTableExternal(const CXXRecordDecl *RD) {
+  assert(RD->isDynamicClass() && "Non-dynamic classes have no VTable.");
+
+  // We always synthesize vtables if they are needed in the MS ABI. MSVC doesn't
+  // emit them even if there is an explicit template instantiation.
+  if (CGM.getTarget().getCXXABI().isMicrosoft())
+    return false;
+
+  // If we have an explicit instantiation declaration (and not a
+  // definition), the vtable is defined elsewhere.
+  TemplateSpecializationKind TSK = RD->getTemplateSpecializationKind();
+  if (TSK == TSK_ExplicitInstantiationDeclaration)
+    return true;
+
+  // Otherwise, if the class is an instantiated template, the
+  // vtable must be defined here.
+  if (TSK == TSK_ImplicitInstantiation ||
+      TSK == TSK_ExplicitInstantiationDefinition)
+    return false;
+
+  // Otherwise, if the class doesn't have a key function (possibly
+  // anymore), the vtable must be defined here.
+  const CXXMethodDecl *keyFunction =
+      CGM.getASTContext().getCurrentKeyFunction(RD);
+  if (!keyFunction)
+    return false;
+
+  // Otherwise, if we don't have a definition of the key function, the
+  // vtable must be defined somewhere else.
+  return !keyFunction->hasBody();
+}
+
+static bool shouldEmitAvailableExternallyVTable(const CIRGenModule &CGM,
+                                                const CXXRecordDecl *RD) {
+  return CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+         CGM.getCXXABI().canSpeculativelyEmitVTable(RD);
+}
+
+/// Given that we're currently at the end of the translation unit, and
+/// we've emitted a reference to the vtable for this class, should
+/// we define that vtable?
+static bool shouldEmitVTableAtEndOfTranslationUnit(CIRGenModule &CGM,
+                                                   const CXXRecordDecl *RD) {
+  // If vtable is internal then it has to be done.
+  if (!CGM.getVTables().isVTableExternal(RD))
+    return true;
+
+  // If it's external then maybe we will need it as available_externally.
+  return shouldEmitAvailableExternallyVTable(CGM, RD);
+}
+
+/// Given that at some point we emitted a reference to one or more
+/// vtables, and that we are now at the end of the translation unit,
+/// decide whether we should emit them.
+void CIRGenModule::buildDeferredVTables() {
+#ifndef NDEBUG
+  // Remember the size of DeferredVTables, because we're going to assume
+  // that this entire operation doesn't modify it.
+  size_t savedSize = DeferredVTables.size();
+#endif
+
+  for (const CXXRecordDecl *RD : DeferredVTables)
+    if (shouldEmitVTableAtEndOfTranslationUnit(*this, RD)) {
+      VTables.GenerateClassData(RD);
+    } else if (shouldOpportunisticallyEmitVTables()) {
+      llvm_unreachable("NYI");
+    }
+
+  assert(savedSize == DeferredVTables.size() &&
+         "deferred extra vtables during vtable emission?");
+  DeferredVTables.clear();
+}
+
+void CIRGenVTables::GenerateClassData(const CXXRecordDecl *RD) {
+  assert(!MissingFeatures::generateDebugInfo());
+
+  if (RD->getNumVBases())
+    CGM.getCXXABI().emitVirtualInheritanceTables(RD);
+
+  CGM.getCXXABI().emitVTableDefinitions(*this, RD);
+}
+
+static void AddPointerLayoutOffset(CIRGenModule &CGM,
+                                   ConstantArrayBuilder &builder,
+                                   CharUnits offset) {
+  builder.add(CGM.getBuilder().getConstPtrAttr(CGM.getBuilder().getUInt8PtrTy(),
+                                               offset.getQuantity()));
+}
+
+static void AddRelativeLayoutOffset(CIRGenModule &CGM,
+                                    ConstantArrayBuilder &builder,
+                                    CharUnits offset) {
+  llvm_unreachable("NYI");
+  // builder.add(llvm::ConstantInt::get(CGM.Int32Ty, offset.getQuantity()));
+}
+
+void CIRGenVTables::addVTableComponent(ConstantArrayBuilder &builder,
+                                       const VTableLayout &layout,
+                                       unsigned componentIndex,
+                                       mlir::Attribute rtti,
+                                       unsigned &nextVTableThunkIndex,
+                                       unsigned vtableAddressPoint,
+                                       bool vtableHasLocalLinkage) {
+  auto &component = layout.vtable_components()[componentIndex];
+
+  auto addOffsetConstant =
+      useRelativeLayout() ? AddRelativeLayoutOffset : AddPointerLayoutOffset;
+
+  switch (component.getKind()) {
+  case VTableComponent::CK_VCallOffset:
+    return addOffsetConstant(CGM, builder, component.getVCallOffset());
+
+  case VTableComponent::CK_VBaseOffset:
+    return addOffsetConstant(CGM, builder, component.getVBaseOffset());
+
+  case VTableComponent::CK_OffsetToTop:
+    return addOffsetConstant(CGM, builder, component.getOffsetToTop());
+
+  case VTableComponent::CK_RTTI:
+    if (useRelativeLayout()) {
+      llvm_unreachable("NYI");
+      // return addRelativeComponent(builder, rtti, vtableAddressPoint,
+      //                             vtableHasLocalLinkage,
+      //                             /*isCompleteDtor=*/false);
+    } else {
+      assert((mlir::isa<mlir::cir::GlobalViewAttr>(rtti) ||
+              mlir::isa<mlir::cir::ConstPtrAttr>(rtti)) &&
+             "expected GlobalViewAttr or ConstPtrAttr");
+      return builder.add(rtti);
+    }
+
+  case VTableComponent::CK_FunctionPointer:
+  case VTableComponent::CK_CompleteDtorPointer:
+  case VTableComponent::CK_DeletingDtorPointer: {
+    GlobalDecl GD = component.getGlobalDecl();
+
+    if (CGM.getLangOpts().CUDA) {
+      llvm_unreachable("NYI");
+    }
+
+    auto getSpecialVirtualFn = [&](StringRef name) -> mlir::cir::FuncOp {
+      // FIXME(PR43094): When merging comdat groups, lld can select a local
+      // symbol as the signature symbol even though it cannot be accessed
+      // outside that symbol's TU. The relative vtables ABI would make
+      // __cxa_pure_virtual and __cxa_deleted_virtual local symbols, and
+      // depending on link order, the comdat groups could resolve to the one
+      // with the local symbol. As a temporary solution, fill these components
+      // with zero. We shouldn't be calling these in the first place anyway.
+      if (useRelativeLayout())
+        llvm_unreachable("NYI");
+
+      // For NVPTX devices in OpenMP emit special functon as null pointers,
+      // otherwise linking ends up with unresolved references.
+      if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPIsTargetDevice &&
+          CGM.getTriple().isNVPTX())
+        llvm_unreachable("NYI");
+
+      mlir::cir::FuncType fnTy =
+          CGM.getBuilder().getFuncType({}, CGM.getBuilder().getVoidTy());
+      mlir::cir::FuncOp fnPtr = CGM.createRuntimeFunction(fnTy, name);
+      // LLVM codegen handles unnamedAddr
+      assert(!MissingFeatures::unnamedAddr());
+      return fnPtr;
+    };
+
+    mlir::cir::FuncOp fnPtr;
+    if (cast<CXXMethodDecl>(GD.getDecl())->isPureVirtual()) {
+      // Pure virtual member functions.
+      if (!PureVirtualFn)
+        PureVirtualFn =
+            getSpecialVirtualFn(CGM.getCXXABI().getPureVirtualCallName());
+      fnPtr = PureVirtualFn;
+
+    } else if (cast<CXXMethodDecl>(GD.getDecl())->isDeleted()) {
+      // Deleted virtual member functions.
+      if (!DeletedVirtualFn)
+        DeletedVirtualFn =
+            getSpecialVirtualFn(CGM.getCXXABI().getDeletedVirtualCallName());
+      fnPtr = DeletedVirtualFn;
+
+    } else if (nextVTableThunkIndex < layout.vtable_thunks().size() &&
+               layout.vtable_thunks()[nextVTableThunkIndex].first ==
+                   componentIndex) {
+      // Thunks.
+      llvm_unreachable("NYI");
+      // auto &thunkInfo = layout.vtable_thunks()[nextVTableThunkIndex].second;
+
+      // nextVTableThunkIndex++;
+      // fnPtr = maybeEmitThunk(GD, thunkInfo, /*ForVTable=*/true);
+
+    } else {
+      // Otherwise we can use the method definition directly.
+      auto fnTy = CGM.getTypes().GetFunctionTypeForVTable(GD);
+      fnPtr = CGM.GetAddrOfFunction(GD, fnTy, /*ForVTable=*/true);
+    }
+
+    if (useRelativeLayout()) {
+      llvm_unreachable("NYI");
+    } else {
+      return builder.add(mlir::cir::GlobalViewAttr::get(
+          CGM.getBuilder().getUInt8PtrTy(),
+          mlir::FlatSymbolRefAttr::get(fnPtr.getSymNameAttr())));
+    }
+  }
+
+  case VTableComponent::CK_UnusedFunctionPointer:
+    if (useRelativeLayout())
+      llvm_unreachable("NYI");
+    else {
+      llvm_unreachable("NYI");
+      // return builder.addNullPointer(CGM.Int8PtrTy);
+    }
+  }
+
+  llvm_unreachable("Unexpected vtable component kind");
+}
+
+void CIRGenVTables::createVTableInitializer(ConstantStructBuilder &builder,
+                                            const VTableLayout &layout,
+                                            mlir::Attribute rtti,
+                                            bool vtableHasLocalLinkage) {
+  auto componentType = getVTableComponentType();
+
+  const auto &addressPoints = layout.getAddressPointIndices();
+  unsigned nextVTableThunkIndex = 0;
+  for (unsigned vtableIndex = 0, endIndex = layout.getNumVTables();
+       vtableIndex != endIndex; ++vtableIndex) {
+    auto vtableElem = builder.beginArray(componentType);
+
+    size_t vtableStart = layout.getVTableOffset(vtableIndex);
+    size_t vtableEnd = vtableStart + layout.getVTableSize(vtableIndex);
+    for (size_t componentIndex = vtableStart; componentIndex < vtableEnd;
+         ++componentIndex) {
+      addVTableComponent(vtableElem, layout, componentIndex, rtti,
+                         nextVTableThunkIndex, addressPoints[vtableIndex],
+                         vtableHasLocalLinkage);
+    }
+    vtableElem.finishAndAddTo(rtti.getContext(), builder);
+  }
+}
+
+/// Compute the required linkage of the vtable for the given class.
+///
+/// Note that we only call this at the end of the translation unit.
+mlir::cir::GlobalLinkageKind
+CIRGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
+  if (!RD->isExternallyVisible())
+    return mlir::cir::GlobalLinkageKind::InternalLinkage;
+
+  // We're at the end of the translation unit, so the current key
+  // function is fully correct.
+  const CXXMethodDecl *keyFunction = astCtx.getCurrentKeyFunction(RD);
+  if (keyFunction && !RD->hasAttr<DLLImportAttr>()) {
+    // If this class has a key function, use that to determine the
+    // linkage of the vtable.
+    const FunctionDecl *def = nullptr;
+    if (keyFunction->hasBody(def))
+      keyFunction = cast<CXXMethodDecl>(def);
+
+    switch (keyFunction->getTemplateSpecializationKind()) {
+    case TSK_Undeclared:
+    case TSK_ExplicitSpecialization:
+      assert(
+          (def || codeGenOpts.OptimizationLevel > 0 ||
+           codeGenOpts.getDebugInfo() != llvm::codegenoptions::NoDebugInfo) &&
+          "Shouldn't query vtable linkage without key function, "
+          "optimizations, or debug info");
+      if (!def && codeGenOpts.OptimizationLevel > 0)
+        return mlir::cir::GlobalLinkageKind::AvailableExternallyLinkage;
+
+      if (keyFunction->isInlined())
+        return !astCtx.getLangOpts().AppleKext
+                   ? mlir::cir::GlobalLinkageKind::LinkOnceODRLinkage
+                   : mlir::cir::GlobalLinkageKind::InternalLinkage;
+
+      return mlir::cir::GlobalLinkageKind::ExternalLinkage;
+
+    case TSK_ImplicitInstantiation:
+      return !astCtx.getLangOpts().AppleKext
+                 ? mlir::cir::GlobalLinkageKind::LinkOnceODRLinkage
+                 : mlir::cir::GlobalLinkageKind::InternalLinkage;
+
+    case TSK_ExplicitInstantiationDefinition:
+      return !astCtx.getLangOpts().AppleKext
+                 ? mlir::cir::GlobalLinkageKind::WeakODRLinkage
+                 : mlir::cir::GlobalLinkageKind::InternalLinkage;
+
+    case TSK_ExplicitInstantiationDeclaration:
+      llvm_unreachable("Should not have been asked to emit this");
+    }
+  }
+
+  // -fapple-kext mode does not support weak linkage, so we must use
+  // internal linkage.
+  if (astCtx.getLangOpts().AppleKext)
+    return mlir::cir::GlobalLinkageKind::InternalLinkage;
+
+  auto DiscardableODRLinkage = mlir::cir::GlobalLinkageKind::LinkOnceODRLinkage;
+  auto NonDiscardableODRLinkage = mlir::cir::GlobalLinkageKind::WeakODRLinkage;
+  if (RD->hasAttr<DLLExportAttr>()) {
+    // Cannot discard exported vtables.
+    DiscardableODRLinkage = NonDiscardableODRLinkage;
+  } else if (RD->hasAttr<DLLImportAttr>()) {
+    // Imported vtables are available externally.
+    DiscardableODRLinkage =
+        mlir::cir::GlobalLinkageKind::AvailableExternallyLinkage;
+    NonDiscardableODRLinkage =
+        mlir::cir::GlobalLinkageKind::AvailableExternallyLinkage;
+  }
+
+  switch (RD->getTemplateSpecializationKind()) {
+  case TSK_Undeclared:
+  case TSK_ExplicitSpecialization:
+  case TSK_ImplicitInstantiation:
+    return DiscardableODRLinkage;
+
+  case TSK_ExplicitInstantiationDeclaration: {
+    // Explicit instantiations in MSVC do not provide vtables, so we must emit
+    // our own.
+    if (getTarget().getCXXABI().isMicrosoft())
+      return DiscardableODRLinkage;
+    auto r = shouldEmitAvailableExternallyVTable(*this, RD)
+                 ? mlir::cir::GlobalLinkageKind::AvailableExternallyLinkage
+                 : mlir::cir::GlobalLinkageKind::ExternalLinkage;
+    assert(r == mlir::cir::GlobalLinkageKind::ExternalLinkage &&
+           "available external NYI");
+    return r;
+  }
+
+  case TSK_ExplicitInstantiationDefinition:
+    return NonDiscardableODRLinkage;
+  }
+
+  llvm_unreachable("Invalid TemplateSpecializationKind!");
+}
+
+mlir::cir::GlobalOp
+getAddrOfVTTVTable(CIRGenVTables &CGVT, CIRGenModule &CGM,
+                   const CXXRecordDecl *MostDerivedClass,
+                   const VTTVTable &vtable,
+                   mlir::cir::GlobalLinkageKind linkage,
+                   VTableLayout::AddressPointsMapTy &addressPoints) {
+  if (vtable.getBase() == MostDerivedClass) {
+    assert(vtable.getBaseOffset().isZero() &&
+           "Most derived class vtable must have a zero offset!");
+    // This is a regular vtable.
+    return CGM.getCXXABI().getAddrOfVTable(MostDerivedClass, CharUnits());
+  }
+
+  llvm_unreachable("generateConstructionVTable NYI");
+}
+
+mlir::cir::GlobalOp CIRGenVTables::getAddrOfVTT(const CXXRecordDecl *RD) {
+  assert(RD->getNumVBases() && "Only classes with virtual bases need a VTT");
+
+  SmallString<256> OutName;
+  llvm::raw_svector_ostream Out(OutName);
+  cast<ItaniumMangleContext>(CGM.getCXXABI().getMangleContext())
+      .mangleCXXVTT(RD, Out);
+  StringRef Name = OutName.str();
+
+  // This will also defer the definition of the VTT.
+  (void)CGM.getCXXABI().getAddrOfVTable(RD, CharUnits());
+
+  VTTBuilder Builder(CGM.getASTContext(), RD, /*GenerateDefinition=*/false);
+
+  auto ArrayType = mlir::cir::ArrayType::get(CGM.getBuilder().getContext(),
+                                             CGM.getBuilder().getUInt8PtrTy(),
+                                             Builder.getVTTComponents().size());
+  auto Align =
+      CGM.getDataLayout().getABITypeAlign(CGM.getBuilder().getUInt8PtrTy());
+  auto VTT = CGM.createOrReplaceCXXRuntimeVariable(
+      CGM.getLoc(RD->getSourceRange()), Name, ArrayType,
+      mlir::cir::GlobalLinkageKind::ExternalLinkage,
+      CharUnits::fromQuantity(Align));
+  CGM.setGVProperties(VTT, RD);
+  return VTT;
+}
+
+/// Emit the definition of the given vtable.
+void CIRGenVTables::buildVTTDefinition(mlir::cir::GlobalOp VTT,
+                                       mlir::cir::GlobalLinkageKind Linkage,
+                                       const CXXRecordDecl *RD) {
+  VTTBuilder Builder(CGM.getASTContext(), RD, /*GenerateDefinition=*/true);
+
+  auto ArrayType = mlir::cir::ArrayType::get(CGM.getBuilder().getContext(),
+                                             CGM.getBuilder().getUInt8PtrTy(),
+                                             Builder.getVTTComponents().size());
+
+  SmallVector<mlir::cir::GlobalOp, 8> VTables;
+  SmallVector<VTableAddressPointsMapTy, 8> VTableAddressPoints;
+  for (const VTTVTable *i = Builder.getVTTVTables().begin(),
+                       *e = Builder.getVTTVTables().end();
+       i != e; ++i) {
+    VTableAddressPoints.push_back(VTableAddressPointsMapTy());
+    VTables.push_back(getAddrOfVTTVTable(*this, CGM, RD, *i, Linkage,
+                                         VTableAddressPoints.back()));
+  }
+
+  SmallVector<mlir::Attribute, 8> VTTComponents;
+  for (const VTTComponent *i = Builder.getVTTComponents().begin(),
+                          *e = Builder.getVTTComponents().end();
+       i != e; ++i) {
+    const VTTVTable &VTTVT = Builder.getVTTVTables()[i->VTableIndex];
+    mlir::cir::GlobalOp VTable = VTables[i->VTableIndex];
+    VTableLayout::AddressPointLocation AddressPoint;
+    if (VTTVT.getBase() == RD) {
+      // Just get the address point for the regular vtable.
+      AddressPoint =
+          getItaniumVTableContext().getVTableLayout(RD).getAddressPoint(
+              i->VTableBase);
+    } else {
+      AddressPoint = VTableAddressPoints[i->VTableIndex].lookup(i->VTableBase);
+      assert(AddressPoint.AddressPointIndex != 0 &&
+             "Did not find ctor vtable address point!");
+    }
+
+    mlir::Attribute Idxs[3] = {
+        CGM.getBuilder().getI32IntegerAttr(0),
+        CGM.getBuilder().getI32IntegerAttr(AddressPoint.VTableIndex),
+        CGM.getBuilder().getI32IntegerAttr(AddressPoint.AddressPointIndex),
+    };
+
+    auto Indices = mlir::ArrayAttr::get(CGM.getBuilder().getContext(), Idxs);
+    auto Init = CGM.getBuilder().getGlobalViewAttr(
+        CGM.getBuilder().getUInt8PtrTy(), VTable, Indices);
+
+    VTTComponents.push_back(Init);
+  }
+
+  auto Init = CGM.getBuilder().getConstArray(
+      mlir::ArrayAttr::get(CGM.getBuilder().getContext(), VTTComponents),
+      ArrayType);
+
+  VTT.setInitialValueAttr(Init);
+
+  // Set the correct linkage.
+  VTT.setLinkage(Linkage);
+  mlir::SymbolTable::setSymbolVisibility(VTT,
+                                         CIRGenModule::getMLIRVisibility(VTT));
+
+  if (CGM.supportsCOMDAT() && VTT.isWeakForLinker()) {
+    assert(!MissingFeatures::setComdat());
+  }
+}
+
+void CIRGenVTables::buildThunks(GlobalDecl GD) {
+  const CXXMethodDecl *MD =
+      cast<CXXMethodDecl>(GD.getDecl())->getCanonicalDecl();
+
+  // We don't need to generate thunks for the base destructor.
+  if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base)
+    return;
+
+  const VTableContextBase::ThunkInfoVectorTy *ThunkInfoVector =
+      VTContext->getThunkInfo(GD);
+
+  if (!ThunkInfoVector)
+    return;
+
+  for ([[maybe_unused]] const ThunkInfo &Thunk : *ThunkInfoVector)
+    llvm_unreachable("NYI");
+}
+
+bool CIRGenModule::AlwaysHasLTOVisibilityPublic(const CXXRecordDecl *RD) {
+  if (RD->hasAttr<LTOVisibilityPublicAttr>() || RD->hasAttr<UuidAttr>() ||
+      RD->hasAttr<DLLExportAttr>() || RD->hasAttr<DLLImportAttr>())
+    return true;
+
+  if (!getCodeGenOpts().LTOVisibilityPublicStd)
+    return false;
+
+  const DeclContext *DC = RD;
+  while (true) {
+    auto *D = cast<Decl>(DC);
+    DC = DC->getParent();
+    if (isa<TranslationUnitDecl>(DC->getRedeclContext())) {
+      if (auto *ND = dyn_cast<NamespaceDecl>(D))
+        if (const IdentifierInfo *II = ND->getIdentifier())
+          if (II->isStr("std") || II->isStr("stdext"))
+            return true;
+      break;
+    }
+  }
+
+  return false;
+}
+
+bool CIRGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) {
+  LinkageInfo LV = RD->getLinkageAndVisibility();
+  if (!isExternallyVisible(LV.getLinkage()))
+    return true;
+
+  if (!getTriple().isOSBinFormatCOFF() &&
+      LV.getVisibility() != HiddenVisibility)
+    return false;
+
+  return !AlwaysHasLTOVisibilityPublic(RD);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenVTables.h b/clang/lib/CIR/CodeGen/CIRGenVTables.h
new file mode 100644
index 000000000000..2def67ab1bc6
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenVTables.h
@@ -0,0 +1,179 @@
+//===--- CIRGenVTables.h - Emit LLVM Code for C++ vtables -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code dealing with C++ code generation of virtual tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENVTABLES_H
+#define LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENVTABLES_H
+
+#include "ConstantInitBuilder.h"
+#include "clang/AST/BaseSubobject.h"
+#include "clang/AST/CharUnits.h"
+#include "clang/AST/GlobalDecl.h"
+#include "clang/AST/VTableBuilder.h"
+#include "clang/Basic/ABI.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace clang {
+class CXXRecordDecl;
+}
+
+namespace cir {
+class CIRGenModule;
+// class ConstantArrayBuilder;
+// class ConstantStructBuilder;
+
+class CIRGenVTables {
+  CIRGenModule &CGM;
+
+  clang::VTableContextBase *VTContext;
+
+  /// VTableAddressPointsMapTy - Address points for a single vtable.
+  typedef clang::VTableLayout::AddressPointsMapTy VTableAddressPointsMapTy;
+
+  typedef std::pair<const clang::CXXRecordDecl *, clang::BaseSubobject>
+      BaseSubobjectPairTy;
+  typedef llvm::DenseMap<BaseSubobjectPairTy, uint64_t> SubVTTIndiciesMapTy;
+
+  /// SubVTTIndicies - Contains indices into the various sub-VTTs.
+  SubVTTIndiciesMapTy SubVTTIndicies;
+
+  typedef llvm::DenseMap<BaseSubobjectPairTy, uint64_t>
+      SecondaryVirtualPointerIndicesMapTy;
+
+  /// SecondaryVirtualPointerIndices - Contains the secondary virtual pointer
+  /// indices.
+  SecondaryVirtualPointerIndicesMapTy SecondaryVirtualPointerIndices;
+
+  /// Cache for the pure virtual member call function.
+  mlir::cir::FuncOp PureVirtualFn = nullptr;
+
+  /// Cache for the deleted virtual member call function.
+  mlir::cir::FuncOp DeletedVirtualFn = nullptr;
+
+  //   /// Get the address of a thunk and emit it if necessary.
+  //   llvm::Constant *maybeEmitThunk(GlobalDecl GD,
+  //                                  const ThunkInfo &ThunkAdjustments,
+  //                                  bool ForVTable);
+
+  void addVTableComponent(ConstantArrayBuilder &builder,
+                          const VTableLayout &layout, unsigned componentIndex,
+                          mlir::Attribute rtti, unsigned &nextVTableThunkIndex,
+                          unsigned vtableAddressPoint,
+                          bool vtableHasLocalLinkage);
+
+  //   /// Add a 32-bit offset to a component relative to the vtable when using
+  //   the
+  //   /// relative vtables ABI. The array builder points to the start of the
+  //   vtable. void addRelativeComponent(ConstantArrayBuilder &builder,
+  //                             llvm::Constant *component,
+  //                             unsigned vtableAddressPoint,
+  //                             bool vtableHasLocalLinkage,
+  //                             bool isCompleteDtor) const;
+
+  //   /// Create a dso_local stub that will be used for a relative reference in
+  //   the
+  //   /// relative vtable layout. This stub will just be a tail call to the
+  //   original
+  //   /// function and propagate any function attributes from the original. If
+  //   the
+  //   /// original function is already dso_local, the original is returned
+  //   instead
+  //   /// and a stub is not created.
+  //   llvm::Function *
+  //   getOrCreateRelativeStub(llvm::Function *func,
+  //                           llvm::GlobalValue::LinkageTypes stubLinkage,
+  //                           bool isCompleteDtor) const;
+
+  bool useRelativeLayout() const;
+
+  mlir::Type getVTableComponentType();
+
+public:
+  /// Add vtable components for the given vtable layout to the given
+  /// global initializer.
+  void createVTableInitializer(ConstantStructBuilder &builder,
+                               const VTableLayout &layout, mlir::Attribute rtti,
+                               bool vtableHasLocalLinkage);
+
+  CIRGenVTables(CIRGenModule &CGM);
+
+  clang::ItaniumVTableContext &getItaniumVTableContext() {
+    return *llvm::cast<clang::ItaniumVTableContext>(VTContext);
+  }
+
+  const clang::ItaniumVTableContext &getItaniumVTableContext() const {
+    return *llvm::cast<clang::ItaniumVTableContext>(VTContext);
+  }
+
+  //   MicrosoftVTableContext &getMicrosoftVTableContext() {
+  //     return *cast<MicrosoftVTableContext>(VTContext);
+  //   }
+
+  //   /// getSubVTTIndex - Return the index of the sub-VTT for the base class
+  //   of the
+  //   /// given record decl.
+  //   uint64_t getSubVTTIndex(const CXXRecordDecl *RD, BaseSubobject Base);
+
+  //   /// getSecondaryVirtualPointerIndex - Return the index in the VTT where
+  //   the
+  //   /// virtual pointer for the given subobject is located.
+  //   uint64_t getSecondaryVirtualPointerIndex(const CXXRecordDecl *RD,
+  //                                            BaseSubobject Base);
+
+  //   /// GenerateConstructionVTable - Generate a construction vtable for the
+  //   given
+  //   /// base subobject.
+  //   llvm::GlobalVariable *
+  //   GenerateConstructionVTable(const CXXRecordDecl *RD, const BaseSubobject
+  //   &Base,
+  //                              bool BaseIsVirtual,
+  //                              llvm::GlobalVariable::LinkageTypes Linkage,
+  //                              VTableAddressPointsMapTy &AddressPoints);
+
+  /// Get the address of the VTT for the given record decl.
+  mlir::cir::GlobalOp getAddrOfVTT(const CXXRecordDecl *RD);
+
+  /// Emit the definition of the given vtable.
+  void buildVTTDefinition(mlir::cir::GlobalOp VTT,
+                          mlir::cir::GlobalLinkageKind Linkage,
+                          const CXXRecordDecl *RD);
+
+  /// Emit the associated thunks for the given global decl.
+  void buildThunks(GlobalDecl GD);
+
+  /// Generate all the class data required to be generated upon definition of a
+  /// KeyFunction. This includes the vtable, the RTTI data structure (if RTTI
+  /// is enabled) and the VTT (if the class has virtual bases).
+  void GenerateClassData(const clang::CXXRecordDecl *RD);
+
+  bool isVTableExternal(const clang::CXXRecordDecl *RD);
+
+  /// Returns the type of a vtable with the given layout. Normally a struct of
+  /// arrays of pointers, with one struct element for each vtable in the vtable
+  /// group.
+  mlir::Type getVTableType(const clang::VTableLayout &layout);
+
+  //   /// Generate a public facing alias for the vtable and make the vtable
+  //   either
+  //   /// hidden or private. The alias will have the original linkage and
+  //   visibility
+  //   /// of the vtable. This is used for cases under the relative vtables ABI
+  //   /// when a vtable may not be dso_local.
+  //   void GenerateRelativeVTableAlias(llvm::GlobalVariable *VTable,
+  //                                    llvm::StringRef AliasNameRef);
+
+  //   /// Specify a global should not be instrumented with hwasan.
+  //   void RemoveHwasanMetadata(llvm::GlobalValue *GV) const;
+};
+
+} // end namespace cir
+#endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h
new file mode 100644
index 000000000000..50a925eabdbd
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenValue.h
@@ -0,0 +1,552 @@
+//===-- CIRGenValue.h - CIRGen wrappers for mlir::Value ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// These classes implement wrappers around mlir::Value in order to fully
+// represent the range of values for C L- and R- values.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_CIRGENVALUE_H
+#define LLVM_CLANG_LIB_CIR_CIRGENVALUE_H
+
+#include "Address.h"
+#include "CIRGenRecordLayout.h"
+#include "CIRGenTBAA.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/CharUnits.h"
+#include "clang/AST/Type.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+
+#include "llvm/ADT/PointerIntPair.h"
+
+#include "mlir/IR/Value.h"
+
+namespace cir {
+
+/// This trivial value class is used to represent the result of an
+/// expression that is evaluated. It can be one of three things: either a
+/// simple MLIR SSA value, a pair of SSA values for complex numbers, or the
+/// address of an aggregate value in memory.
+class RValue {
+  enum Flavor { Scalar, Complex, Aggregate };
+
+  // The shift to make to an aggregate's alignment to make it look
+  // like a pointer.
+  enum { AggAlignShift = 4 };
+
+  // Stores first value and flavor.
+  llvm::PointerIntPair<mlir::Value, 2, Flavor> V1;
+  // Stores second value and volatility.
+  llvm::PointerIntPair<llvm::PointerUnion<mlir::Value, int *>, 1, bool> V2;
+  // Stores element type for aggregate values.
+  mlir::Type ElementType;
+
+public:
+  bool isScalar() const { return V1.getInt() == Scalar; }
+  bool isComplex() const { return V1.getInt() == Complex; }
+  bool isAggregate() const { return V1.getInt() == Aggregate; }
+  bool isIgnored() const { return isScalar() && !getScalarVal(); }
+
+  bool isVolatileQualified() const { return V2.getInt(); }
+
+  /// Return the mlir::Value of this scalar value.
+  mlir::Value getScalarVal() const {
+    assert(isScalar() && "Not a scalar!");
+    return V1.getPointer();
+  }
+
+  /// Return the real/imag components of this complex value.
+  mlir::Value getComplexVal() const {
+    assert(isComplex() && "Not a complex!");
+    return V1.getPointer();
+  }
+
+  /// Return the mlir::Value of the address of the aggregate.
+  Address getAggregateAddress() const {
+    assert(isAggregate() && "Not an aggregate!");
+    auto align = reinterpret_cast<uintptr_t>(V2.getPointer().get<int *>()) >>
+                 AggAlignShift;
+    return Address(V1.getPointer(), ElementType,
+                   clang::CharUnits::fromQuantity(align));
+  }
+
+  mlir::Value getAggregatePointer() const {
+    assert(isAggregate() && "Not an aggregate!");
+    return V1.getPointer();
+  }
+
+  static RValue getIgnored() {
+    // FIXME: should we make this a more explicit state?
+    return get(nullptr);
+  }
+
+  static RValue get(mlir::Value V) {
+    RValue ER;
+    ER.V1.setPointer(V);
+    ER.V1.setInt(Scalar);
+    ER.V2.setInt(false);
+    return ER;
+  }
+  static RValue getComplex(mlir::Value V) {
+    RValue ER;
+    ER.V1.setPointer(V);
+    ER.V1.setInt(Complex);
+    ER.V2.setInt(false);
+    return ER;
+  }
+  // FIXME: Aggregate rvalues need to retain information about whether they are
+  // volatile or not. Remove default to find all places that probably get this
+  // wrong.
+  static RValue getAggregate(Address addr, bool isVolatile = false) {
+    RValue ER;
+    ER.V1.setPointer(addr.getPointer());
+    ER.V1.setInt(Aggregate);
+    ER.ElementType = addr.getElementType();
+
+    auto align = static_cast<uintptr_t>(addr.getAlignment().getQuantity());
+    ER.V2.setPointer(reinterpret_cast<int *>(align << AggAlignShift));
+    ER.V2.setInt(isVolatile);
+    return ER;
+  }
+};
+
+/// The source of the alignment of an l-value; an expression of
+/// confidence in the alignment actually matching the estimate.
+enum class AlignmentSource {
+  /// The l-value was an access to a declared entity or something
+  /// equivalently strong, like the address of an array allocated by a
+  /// language runtime.
+  Decl,
+
+  /// The l-value was considered opaque, so the alignment was
+  /// determined from a type, but that type was an explicitly-aligned
+  /// typedef.
+  AttributedType,
+
+  /// The l-value was considered opaque, so the alignment was
+  /// determined from a type.
+  Type
+};
+
+/// Given that the base address has the given alignment source, what's
+/// our confidence in the alignment of the field?
+static inline AlignmentSource getFieldAlignmentSource(AlignmentSource Source) {
+  // For now, we don't distinguish fields of opaque pointers from
+  // top-level declarations, but maybe we should.
+  return AlignmentSource::Decl;
+}
+
+class LValueBaseInfo {
+  AlignmentSource AlignSource;
+
+public:
+  explicit LValueBaseInfo(AlignmentSource Source = AlignmentSource::Type)
+      : AlignSource(Source) {}
+  AlignmentSource getAlignmentSource() const { return AlignSource; }
+  void setAlignmentSource(AlignmentSource Source) { AlignSource = Source; }
+
+  void mergeForCast(const LValueBaseInfo &Info) {
+    setAlignmentSource(Info.getAlignmentSource());
+  }
+};
+
+class LValue {
+  enum {
+    Simple,       // This is a normal l-value, use getAddress().
+    VectorElt,    // This is a vector element l-value (V[i]), use getVector*
+    BitField,     // This is a bitfield l-value, use getBitfield*.
+    ExtVectorElt, // This is an extended vector subset, use getExtVectorComp
+    GlobalReg,    // This is a register l-value, use getGlobalReg()
+    MatrixElt     // This is a matrix element, use getVector*
+  } LVType;
+  clang::QualType Type;
+  clang::Qualifiers Quals;
+
+  // LValue is non-gc'able for any reason, including being a parameter or local
+  // variable.
+  bool NonGC : 1;
+
+  // This flag shows if a nontemporal load/stores should be used when accessing
+  // this lvalue.
+  bool Nontemporal : 1;
+
+  TBAAAccessInfo tbaaInfo;
+
+private:
+  void Initialize(clang::QualType Type, clang::Qualifiers Quals,
+                  clang::CharUnits Alignment, LValueBaseInfo BaseInfo,
+                  TBAAAccessInfo tbaaInfo) {
+    assert((!Alignment.isZero() || Type->isIncompleteType()) &&
+           "initializing l-value with zero alignment!");
+    if (isGlobalReg())
+      assert(ElementType == nullptr && "Global reg does not store elem type");
+
+    this->Type = Type;
+    this->Quals = Quals;
+    // This flag shows if a nontemporal load/stores should be used when
+    // accessing this lvalue.
+    const unsigned MaxAlign = 1U << 31;
+    this->Alignment = Alignment.getQuantity() <= MaxAlign
+                          ? Alignment.getQuantity()
+                          : MaxAlign;
+    assert(this->Alignment == Alignment.getQuantity() &&
+           "Alignment exceeds allowed max!");
+    this->BaseInfo = BaseInfo;
+    this->tbaaInfo = tbaaInfo;
+
+    // TODO: ObjC flags
+    // Initialize Objective-C flags.
+    this->NonGC = false;
+    this->Nontemporal = false;
+  }
+
+  // The alignment to use when accessing this lvalue. (For vector elements,
+  // this is the alignment of the whole vector)
+  unsigned Alignment;
+  mlir::Value V;
+  mlir::Type ElementType;
+  mlir::Value VectorIdx;      // Index for vector subscript
+  mlir::Attribute VectorElts; // ExtVector element subset: V.xyx
+  LValueBaseInfo BaseInfo;
+  const CIRGenBitFieldInfo *BitFieldInfo{0};
+
+public:
+  bool isSimple() const { return LVType == Simple; }
+  bool isVectorElt() const { return LVType == VectorElt; }
+  bool isBitField() const { return LVType == BitField; }
+  bool isExtVectorElt() const { return LVType == ExtVectorElt; }
+  bool isGlobalReg() const { return LVType == GlobalReg; }
+  bool isMatrixElt() const { return LVType == MatrixElt; }
+
+  bool isVolatileQualified() const { return Quals.hasVolatile(); }
+
+  unsigned getVRQualifiers() const {
+    return Quals.getCVRQualifiers() & ~clang::Qualifiers::Const;
+  }
+
+  bool isNonGC() const { return NonGC; }
+  void setNonGC(bool Value) { NonGC = Value; }
+
+  bool isNontemporal() const { return Nontemporal; }
+
+  bool isObjCWeak() const {
+    return Quals.getObjCGCAttr() == clang::Qualifiers::Weak;
+  }
+  bool isObjCStrong() const {
+    return Quals.getObjCGCAttr() == clang::Qualifiers::Strong;
+  }
+
+  bool isVolatile() const { return Quals.hasVolatile(); }
+
+  clang::QualType getType() const { return Type; }
+
+  mlir::Value getPointer() const { return V; }
+
+  clang::CharUnits getAlignment() const {
+    return clang::CharUnits::fromQuantity(Alignment);
+  }
+  void setAlignment(clang::CharUnits A) { Alignment = A.getQuantity(); }
+
+  Address getAddress() const {
+    return Address(getPointer(), ElementType, getAlignment());
+  }
+
+  void setAddress(Address address) {
+    assert(isSimple());
+    V = address.getPointer();
+    ElementType = address.getElementType();
+    Alignment = address.getAlignment().getQuantity();
+    // TODO(cir): IsKnownNonNull = address.isKnownNonNull();
+  }
+
+  LValueBaseInfo getBaseInfo() const { return BaseInfo; }
+  void setBaseInfo(LValueBaseInfo Info) { BaseInfo = Info; }
+
+  static LValue makeAddr(Address address, clang::QualType T,
+                         AlignmentSource Source = AlignmentSource::Type) {
+    LValue R;
+    R.LVType = Simple;
+    R.V = address.getPointer();
+    R.ElementType = address.getElementType();
+    R.Initialize(T, T.getQualifiers(), address.getAlignment(),
+                 LValueBaseInfo(Source), TBAAAccessInfo());
+    return R;
+  }
+
+  // FIXME: only have one of these static methods.
+  static LValue makeAddr(Address address, clang::QualType T,
+                         LValueBaseInfo LBI) {
+    LValue R;
+    R.LVType = Simple;
+    R.V = address.getPointer();
+    R.ElementType = address.getElementType();
+    R.Initialize(T, T.getQualifiers(), address.getAlignment(), LBI,
+                 TBAAAccessInfo());
+    return R;
+  }
+
+  static LValue makeAddr(Address address, clang::QualType type,
+                         clang::ASTContext &context, LValueBaseInfo baseInfo,
+                         TBAAAccessInfo tbaaInfo) {
+    clang::Qualifiers qs = type.getQualifiers();
+    qs.setObjCGCAttr(context.getObjCGCAttrKind(type));
+
+    LValue R;
+    R.LVType = Simple;
+    assert(mlir::cast<mlir::cir::PointerType>(address.getPointer().getType()));
+    R.V = address.getPointer();
+    R.ElementType = address.getElementType();
+    R.Initialize(type, qs, address.getAlignment(), baseInfo, tbaaInfo);
+    return R;
+  }
+
+  TBAAAccessInfo getTBAAInfo() const { return tbaaInfo; }
+
+  const clang::Qualifiers &getQuals() const { return Quals; }
+  clang::Qualifiers &getQuals() { return Quals; }
+
+  // vector element lvalue
+  Address getVectorAddress() const {
+    return Address(getVectorPointer(), ElementType, getAlignment());
+  }
+  mlir::Value getVectorPointer() const {
+    assert(isVectorElt());
+    return V;
+  }
+  mlir::Value getVectorIdx() const {
+    assert(isVectorElt());
+    return VectorIdx;
+  }
+
+  // extended vector elements.
+  Address getExtVectorAddress() const {
+    assert(isExtVectorElt());
+    return Address(getExtVectorPointer(), ElementType, getAlignment());
+  }
+  mlir::Value getExtVectorPointer() const {
+    assert(isExtVectorElt());
+    return V;
+  }
+  mlir::ArrayAttr getExtVectorElts() const {
+    assert(isExtVectorElt());
+    return mlir::cast<mlir::ArrayAttr>(VectorElts);
+  }
+
+  static LValue MakeVectorElt(Address vecAddress, mlir::Value index,
+                              clang::QualType type, LValueBaseInfo baseInfo,
+                              TBAAAccessInfo tbaaInfo) {
+    LValue R;
+    R.LVType = VectorElt;
+    R.V = vecAddress.getPointer();
+    R.ElementType = vecAddress.getElementType();
+    R.VectorIdx = index;
+    R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(),
+                 baseInfo, tbaaInfo);
+    return R;
+  }
+
+  static LValue MakeExtVectorElt(Address vecAddress, mlir::ArrayAttr elts,
+                                 clang::QualType type, LValueBaseInfo baseInfo,
+                                 TBAAAccessInfo tbaaInfo) {
+    LValue R;
+    R.LVType = ExtVectorElt;
+    R.V = vecAddress.getPointer();
+    R.ElementType = vecAddress.getElementType();
+    R.VectorElts = elts;
+    R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(),
+                 baseInfo, tbaaInfo);
+    return R;
+  }
+
+  // bitfield lvalue
+  Address getBitFieldAddress() const {
+    return Address(getBitFieldPointer(), ElementType, getAlignment());
+  }
+
+  mlir::Value getBitFieldPointer() const {
+    assert(isBitField());
+    return V;
+  }
+
+  const CIRGenBitFieldInfo &getBitFieldInfo() const {
+    assert(isBitField());
+    return *BitFieldInfo;
+  }
+
+  /// Create a new object to represent a bit-field access.
+  ///
+  /// \param Addr - The base address of the bit-field sequence this
+  /// bit-field refers to.
+  /// \param Info - The information describing how to perform the bit-field
+  /// access.
+  static LValue MakeBitfield(Address addr, const CIRGenBitFieldInfo &info,
+                             clang::QualType type, LValueBaseInfo baseInfo,
+                             TBAAAccessInfo tbaaInfo) {
+    LValue R;
+    R.LVType = BitField;
+    R.V = addr.getPointer();
+    R.ElementType = addr.getElementType();
+    R.BitFieldInfo = &info;
+    R.Initialize(type, type.getQualifiers(), addr.getAlignment(), baseInfo,
+                 tbaaInfo);
+    return R;
+  }
+};
+
+/// An aggregate value slot.
+class AggValueSlot {
+  /// The address.
+  Address Addr;
+
+  // Qualifiers
+  clang::Qualifiers Quals;
+
+  /// This is set to true if some external code is responsible for setting up a
+  /// destructor for the slot.  Otherwise the code which constructs it should
+  /// push the appropriate cleanup.
+  bool DestructedFlag : 1;
+
+  /// This is set to true if writing to the memory in the slot might require
+  /// calling an appropriate Objective-C GC barrier.  The exact interaction here
+  /// is unnecessarily mysterious.
+  bool ObjCGCFlag : 1;
+
+  /// This is set to true if the memory in the slot is known to be zero before
+  /// the assignment into it.  This means that zero fields don't need to be set.
+  bool ZeroedFlag : 1;
+
+  /// This is set to true if the slot might be aliased and it's not undefined
+  /// behavior to access it through such an alias.  Note that it's always
+  /// undefined behavior to access a C++ object that's under construction
+  /// through an alias derived from outside the construction process.
+  ///
+  /// This flag controls whether calls that produce the aggregate
+  /// value may be evaluated directly into the slot, or whether they
+  /// must be evaluated into an unaliased temporary and then memcpy'ed
+  /// over.  Since it's invalid in general to memcpy a non-POD C++
+  /// object, it's important that this flag never be set when
+  /// evaluating an expression which constructs such an object.
+  bool AliasedFlag : 1;
+
+  /// This is set to true if the tail padding of this slot might overlap
+  /// another object that may have already been initialized (and whose
+  /// value must be preserved by this initialization). If so, we may only
+  /// store up to the dsize of the type. Otherwise we can widen stores to
+  /// the size of the type.
+  bool OverlapFlag : 1;
+
+  /// If is set to true, sanitizer checks are already generated for this address
+  /// or not required. For instance, if this address represents an object
+  /// created in 'new' expression, sanitizer checks for memory is made as a part
+  /// of 'operator new' emission and object constructor should not generate
+  /// them.
+  bool SanitizerCheckedFlag : 1;
+
+  AggValueSlot(Address Addr, clang::Qualifiers Quals, bool DestructedFlag,
+               bool ObjCGCFlag, bool ZeroedFlag, bool AliasedFlag,
+               bool OverlapFlag, bool SanitizerCheckedFlag)
+      : Addr(Addr), Quals(Quals), DestructedFlag(DestructedFlag),
+        ObjCGCFlag(ObjCGCFlag), ZeroedFlag(ZeroedFlag),
+        AliasedFlag(AliasedFlag), OverlapFlag(OverlapFlag),
+        SanitizerCheckedFlag(SanitizerCheckedFlag) {}
+
+public:
+  enum IsAliased_t { IsNotAliased, IsAliased };
+  enum IsDestructed_t { IsNotDestructed, IsDestructed };
+  enum IsZeroed_t { IsNotZeroed, IsZeroed };
+  enum Overlap_t { DoesNotOverlap, MayOverlap };
+  enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers };
+  enum IsSanitizerChecked_t { IsNotSanitizerChecked, IsSanitizerChecked };
+
+  /// ignored - Returns an aggregate value slot indicating that the aggregate
+  /// value is being ignored.
+  static AggValueSlot ignored() {
+    return forAddr(Address::invalid(), clang::Qualifiers(), IsNotDestructed,
+                   DoesNotNeedGCBarriers, IsNotAliased, DoesNotOverlap);
+  }
+
+  /// forAddr - Make a slot for an aggregate value.
+  ///
+  /// \param quals - The qualifiers that dictate how the slot should be
+  ///   initialized. Only 'volatile' and the Objective-C lifetime qualifiers
+  ///   matter.
+  ///
+  /// \param isDestructed - true if something else is responsible for calling
+  ///   destructors on this object
+  /// \param needsGC - true fi the slot is potentially located somewhere that
+  ///   ObjC GC calls should be emitted for
+  static AggValueSlot
+  forAddr(Address addr, clang::Qualifiers quals, IsDestructed_t isDestructed,
+          NeedsGCBarriers_t needsGC, IsAliased_t isAliased,
+          Overlap_t mayOverlap, IsZeroed_t isZeroed = IsNotZeroed,
+          IsSanitizerChecked_t isChecked = IsNotSanitizerChecked) {
+    return AggValueSlot(addr, quals, isDestructed, needsGC, isZeroed, isAliased,
+                        mayOverlap, isChecked);
+  }
+
+  static AggValueSlot
+  forLValue(const LValue &LV, IsDestructed_t isDestructed,
+            NeedsGCBarriers_t needsGC, IsAliased_t isAliased,
+            Overlap_t mayOverlap, IsZeroed_t isZeroed = IsNotZeroed,
+            IsSanitizerChecked_t isChecked = IsNotSanitizerChecked) {
+    return forAddr(LV.getAddress(), LV.getQuals(), isDestructed, needsGC,
+                   isAliased, mayOverlap, isZeroed, isChecked);
+  }
+
+  IsDestructed_t isExternallyDestructed() const {
+    return IsDestructed_t(DestructedFlag);
+  }
+  void setExternallyDestructed(bool destructed = true) {
+    DestructedFlag = destructed;
+  }
+
+  clang::Qualifiers getQualifiers() const { return Quals; }
+
+  bool isVolatile() const { return Quals.hasVolatile(); }
+
+  Address getAddress() const { return Addr; }
+
+  bool isIgnored() const { return !Addr.isValid(); }
+
+  mlir::Value getPointer() const { return Addr.getPointer(); }
+
+  Overlap_t mayOverlap() const { return Overlap_t(OverlapFlag); }
+
+  bool isSanitizerChecked() const { return SanitizerCheckedFlag; }
+
+  IsZeroed_t isZeroed() const { return IsZeroed_t(ZeroedFlag); }
+  void setZeroed(bool V = true) { ZeroedFlag = V; }
+
+  NeedsGCBarriers_t requiresGCollection() const {
+    return NeedsGCBarriers_t(ObjCGCFlag);
+  }
+
+  IsAliased_t isPotentiallyAliased() const { return IsAliased_t(AliasedFlag); }
+
+  RValue asRValue() const {
+    if (isIgnored()) {
+      return RValue::getIgnored();
+    } else {
+      return RValue::getAggregate(getAddress(), isVolatile());
+    }
+  }
+
+  /// Get the preferred size to use when storing a value to this slot. This
+  /// is the type size unless that might overlap another object, in which
+  /// case it's the dsize.
+  clang::CharUnits getPreferredSize(clang::ASTContext &Ctx,
+                                    clang::QualType Type) {
+    return mayOverlap() ? Ctx.getTypeInfoDataSizeInChars(Type).Width
+                        : Ctx.getTypeSizeInChars(Type);
+  }
+};
+
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/CIRGenerator.cpp b/clang/lib/CIR/CodeGen/CIRGenerator.cpp
new file mode 100644
index 000000000000..4d6a6c6c5d84
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenerator.cpp
@@ -0,0 +1,192 @@
+//===--- CIRGenerator.cpp - Emit CIR from ASTs ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This builds an AST and converts it to CIR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenModule.h"
+
+#include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/Target/LLVMIR/Import.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/CIR/CIRGenerator.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+
+using namespace cir;
+using namespace clang;
+
+void CIRGenerator::anchor() {}
+
+CIRGenerator::CIRGenerator(clang::DiagnosticsEngine &diags,
+                           llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> vfs,
+                           const CodeGenOptions &CGO)
+    : Diags(diags), fs(std::move(vfs)), codeGenOpts{CGO},
+      HandlingTopLevelDecls(0) {}
+CIRGenerator::~CIRGenerator() {
+  // There should normally not be any leftover inline method definitions.
+  assert(DeferredInlineMemberFuncDefs.empty() || Diags.hasErrorOccurred());
+}
+
+static void setMLIRDataLayout(mlir::ModuleOp &mod, const llvm::DataLayout &dl) {
+  auto *context = mod.getContext();
+  mod->setAttr(mlir::LLVM::LLVMDialect::getDataLayoutAttrName(),
+               mlir::StringAttr::get(context, dl.getStringRepresentation()));
+  mlir::DataLayoutSpecInterface dlSpec = mlir::translateDataLayout(dl, context);
+  mod->setAttr(mlir::DLTIDialect::kDataLayoutAttrName, dlSpec);
+}
+
+void CIRGenerator::Initialize(ASTContext &astCtx) {
+  using namespace llvm;
+
+  this->astCtx = &astCtx;
+
+  mlirCtx = std::make_unique<mlir::MLIRContext>();
+  mlirCtx->getOrLoadDialect<mlir::DLTIDialect>();
+  mlirCtx->getOrLoadDialect<mlir::func::FuncDialect>();
+  mlirCtx->getOrLoadDialect<mlir::cir::CIRDialect>();
+  mlirCtx->getOrLoadDialect<mlir::LLVM::LLVMDialect>();
+  mlirCtx->getOrLoadDialect<mlir::memref::MemRefDialect>();
+  mlirCtx->getOrLoadDialect<mlir::omp::OpenMPDialect>();
+  CGM = std::make_unique<CIRGenModule>(*mlirCtx.get(), astCtx, codeGenOpts,
+                                       Diags);
+  auto mod = CGM->getModule();
+  auto layout = llvm::DataLayout(astCtx.getTargetInfo().getDataLayoutString());
+  setMLIRDataLayout(mod, layout);
+}
+
+bool CIRGenerator::verifyModule() { return CGM->verifyModule(); }
+
+bool CIRGenerator::EmitFunction(const FunctionDecl *FD) {
+  llvm_unreachable("NYI");
+}
+
+mlir::ModuleOp CIRGenerator::getModule() { return CGM->getModule(); }
+
+bool CIRGenerator::HandleTopLevelDecl(DeclGroupRef D) {
+  if (Diags.hasErrorOccurred())
+    return true;
+
+  HandlingTopLevelDeclRAII HandlingDecl(*this);
+
+  for (DeclGroupRef::iterator I = D.begin(), E = D.end(); I != E; ++I) {
+    CGM->buildTopLevelDecl(*I);
+  }
+
+  return true;
+}
+
+void CIRGenerator::HandleTranslationUnit(ASTContext &C) {
+  // Release the Builder when there is no error.
+  if (!Diags.hasErrorOccurred() && CGM)
+    CGM->Release();
+
+  // If there are errors before or when releasing the CGM, reset the module to
+  // stop here before invoking the backend.
+  if (Diags.hasErrorOccurred()) {
+    if (CGM)
+      // TODO: CGM->clear();
+      // TODO: M.reset();
+      return;
+  }
+}
+
+void CIRGenerator::HandleInlineFunctionDefinition(FunctionDecl *D) {
+  if (Diags.hasErrorOccurred())
+    return;
+
+  assert(D->doesThisDeclarationHaveABody());
+
+  // We may want to emit this definition. However, that decision might be
+  // based on computing the linkage, and we have to defer that in case we are
+  // inside of something that will chagne the method's final linkage, e.g.
+  //   typedef struct {
+  //     void bar();
+  //     void foo() { bar(); }
+  //   } A;
+  DeferredInlineMemberFuncDefs.push_back(D);
+
+  // Provide some coverage mapping even for methods that aren't emitted.
+  // Don't do this for templated classes though, as they may not be
+  // instantiable.
+  if (!D->getLexicalDeclContext()->isDependentContext())
+    CGM->AddDeferredUnusedCoverageMapping(D);
+}
+
+void CIRGenerator::buildDefaultMethods() { CGM->buildDefaultMethods(); }
+
+void CIRGenerator::buildDeferredDecls() {
+  if (DeferredInlineMemberFuncDefs.empty())
+    return;
+
+  // Emit any deferred inline method definitions. Note that more deferred
+  // methods may be added during this loop, since ASTConsumer callbacks can be
+  // invoked if AST inspection results in declarations being added.
+  HandlingTopLevelDeclRAII HandlingDecls(*this);
+  for (unsigned I = 0; I != DeferredInlineMemberFuncDefs.size(); ++I)
+    CGM->buildTopLevelDecl(DeferredInlineMemberFuncDefs[I]);
+  DeferredInlineMemberFuncDefs.clear();
+}
+
+/// HandleTagDeclDefinition - This callback is invoked each time a TagDecl to
+/// (e.g. struct, union, enum, class) is completed. This allows the client hack
+/// on the type, which can occur at any point in the file (because these can be
+/// defined in declspecs).
+void CIRGenerator::HandleTagDeclDefinition(TagDecl *D) {
+  if (Diags.hasErrorOccurred())
+    return;
+
+  // Don't allow re-entrant calls to CIRGen triggered by PCH deserialization to
+  // emit deferred decls.
+  HandlingTopLevelDeclRAII HandlingDecl(*this, /*EmitDeferred=*/false);
+
+  CGM->UpdateCompletedType(D);
+
+  // For MSVC compatibility, treat declarations of static data members with
+  // inline initializers as definitions.
+  if (astCtx->getTargetInfo().getCXXABI().isMicrosoft()) {
+    llvm_unreachable("NYI");
+  }
+  // For OpenMP emit declare reduction functions, if required.
+  if (astCtx->getLangOpts().OpenMP) {
+    llvm_unreachable("NYI");
+  }
+}
+
+void CIRGenerator::HandleTagDeclRequiredDefinition(const TagDecl *D) {
+  if (Diags.hasErrorOccurred())
+    return;
+
+  // Don't allow re-entrant calls to CIRGen triggered by PCH deserialization to
+  // emit deferred decls.
+  HandlingTopLevelDeclRAII HandlingDecl(*this, /*EmitDeferred=*/false);
+
+  if (CGM->getModuleDebugInfo())
+    llvm_unreachable("NYI");
+}
+
+void CIRGenerator::HandleCXXStaticMemberVarInstantiation(VarDecl *D) {
+  if (Diags.hasErrorOccurred())
+    return;
+
+  CGM->HandleCXXStaticMemberVarInstantiation(D);
+}
+
+void CIRGenerator::CompleteTentativeDefinition(VarDecl *D) {
+  if (Diags.hasErrorOccurred())
+    return;
+
+  CGM->buildTentativeDefinition(D);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRPasses.cpp b/clang/lib/CIR/CodeGen/CIRPasses.cpp
new file mode 100644
index 000000000000..4f89daa1cee4
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRPasses.cpp
@@ -0,0 +1,105 @@
+//====- CIRPasses.cpp - Lowering from CIR to LLVM -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements machinery for any CIR <-> CIR passes used by clang.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/ASTContext.h"
+#include "clang/CIR/Dialect/Passes.h"
+
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/Passes.h"
+
+namespace cir {
+mlir::LogicalResult runCIRToCIRPasses(
+    mlir::ModuleOp theModule, mlir::MLIRContext *mlirCtx,
+    clang::ASTContext &astCtx, bool enableVerifier, bool enableLifetime,
+    llvm::StringRef lifetimeOpts, bool enableIdiomRecognizer,
+    llvm::StringRef idiomRecognizerOpts, bool enableLibOpt,
+    llvm::StringRef libOptOpts, std::string &passOptParsingFailure,
+    bool enableCIRSimplify, bool flattenCIR, bool emitMLIR,
+    bool enableCallConvLowering, bool enableMem2Reg) {
+
+  mlir::PassManager pm(mlirCtx);
+  pm.addPass(mlir::createCIRCanonicalizePass());
+
+  // TODO(CIR): Make this actually propagate errors correctly. This is stubbed
+  // in to get rebases going.
+  auto errorHandler = [](const llvm::Twine &) -> mlir::LogicalResult {
+    return mlir::LogicalResult::failure();
+  };
+
+  if (enableLifetime) {
+    auto lifetimePass = mlir::createLifetimeCheckPass(&astCtx);
+    if (lifetimePass->initializeOptions(lifetimeOpts, errorHandler).failed()) {
+      passOptParsingFailure = lifetimeOpts;
+      return mlir::failure();
+    }
+    pm.addPass(std::move(lifetimePass));
+  }
+
+  if (enableIdiomRecognizer) {
+    auto idiomPass = mlir::createIdiomRecognizerPass(&astCtx);
+    if (idiomPass->initializeOptions(idiomRecognizerOpts, errorHandler)
+            .failed()) {
+      passOptParsingFailure = idiomRecognizerOpts;
+      return mlir::failure();
+    }
+    pm.addPass(std::move(idiomPass));
+  }
+
+  if (enableLibOpt) {
+    auto libOpPass = mlir::createLibOptPass(&astCtx);
+    if (libOpPass->initializeOptions(libOptOpts, errorHandler).failed()) {
+      passOptParsingFailure = libOptOpts;
+      return mlir::failure();
+    }
+    pm.addPass(std::move(libOpPass));
+  }
+
+  if (enableCIRSimplify)
+    pm.addPass(mlir::createCIRSimplifyPass());
+
+  pm.addPass(mlir::createLoweringPreparePass(&astCtx));
+
+  // FIXME(cir): This pass should run by default, but it is lacking support for
+  // several code bits. Once it's more mature, we should fix this.
+  if (enableCallConvLowering)
+    pm.addPass(mlir::createCallConvLoweringPass());
+
+  if (flattenCIR || enableMem2Reg)
+    mlir::populateCIRPreLoweringPasses(pm);
+
+  if (enableMem2Reg)
+    pm.addPass(mlir::createMem2Reg());
+
+  if (emitMLIR)
+    pm.addPass(mlir::createSCFPreparePass());
+
+  // FIXME: once CIRCodenAction fixes emission other than CIR we
+  // need to run this right before dialect emission.
+  pm.addPass(mlir::createDropASTPass());
+  pm.enableVerifier(enableVerifier);
+  (void)mlir::applyPassManagerCLOptions(pm);
+  return pm.run(theModule);
+}
+
+} // namespace cir
+
+namespace mlir {
+
+void populateCIRPreLoweringPasses(OpPassManager &pm) {
+  pm.addPass(createFlattenCFGPass());
+  pm.addPass(createGotoSolverPass());
+}
+
+} // namespace mlir
diff --git a/clang/lib/CIR/CodeGen/CIRRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRRecordLayoutBuilder.cpp
new file mode 100644
index 000000000000..301b1efc6ab5
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRRecordLayoutBuilder.cpp
@@ -0,0 +1,754 @@
+
+#include "CIRGenBuilder.h"
+#include "CIRGenModule.h"
+#include "CIRGenTypes.h"
+
+#include "mlir/IR/BuiltinTypes.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include <memory>
+
+using namespace llvm;
+using namespace clang;
+using namespace cir;
+
+namespace {
+/// The CIRRecordLowering is responsible for lowering an ASTRecordLayout to a
+/// mlir::Type. Some of the lowering is straightforward, some is not. TODO: Here
+/// we detail some of the complexities and weirdnesses?
+struct CIRRecordLowering final {
+
+  // MemberInfo is a helper structure that contains information about a record
+  // member. In addition to the standard member types, there exists a sentinel
+  // member type that ensures correct rounding.
+  struct MemberInfo final {
+    CharUnits offset;
+    enum class InfoKind { VFPtr, VBPtr, Field, Base, VBase, Scissor } kind;
+    mlir::Type data;
+    union {
+      const FieldDecl *fieldDecl;
+      const CXXRecordDecl *cxxRecordDecl;
+    };
+    MemberInfo(CharUnits offset, InfoKind kind, mlir::Type data,
+               const FieldDecl *fieldDecl = nullptr)
+        : offset{offset}, kind{kind}, data{data}, fieldDecl{fieldDecl} {};
+    MemberInfo(CharUnits offset, InfoKind kind, mlir::Type data,
+               const CXXRecordDecl *RD)
+        : offset{offset}, kind{kind}, data{data}, cxxRecordDecl{RD} {}
+    // MemberInfos are sorted so we define a < operator.
+    bool operator<(const MemberInfo &other) const {
+      return offset < other.offset;
+    }
+  };
+  // The constructor.
+  CIRRecordLowering(CIRGenTypes &cirGenTypes, const RecordDecl *recordDecl,
+                    bool isPacked);
+
+  /// ----------------------
+  /// Short helper routines.
+
+  /// Constructs a MemberInfo instance from an offset and mlir::Type.
+  MemberInfo StorageInfo(CharUnits Offset, mlir::Type Data) {
+    return MemberInfo(Offset, MemberInfo::InfoKind::Field, Data);
+  }
+
+  // Layout routines.
+  void setBitFieldInfo(const FieldDecl *FD, CharUnits StartOffset,
+                       mlir::Type StorageType);
+
+  void lower(bool nonVirtualBaseType);
+  void lowerUnion();
+
+  /// Determines if we need a packed llvm struct.
+  void determinePacked(bool NVBaseType);
+  /// Inserts padding everywhere it's needed.
+  void insertPadding();
+
+  void computeVolatileBitfields();
+  void accumulateBases();
+  void accumulateVPtrs();
+  void accumulateVBases();
+  void accumulateFields();
+  void accumulateBitFields(RecordDecl::field_iterator Field,
+                           RecordDecl::field_iterator FieldEnd);
+
+  mlir::Type getVFPtrType();
+
+  // Helper function to check if we are targeting AAPCS.
+  bool isAAPCS() const {
+    return astContext.getTargetInfo().getABI().starts_with("aapcs");
+  }
+
+  /// Helper function to check if the target machine is BigEndian.
+  bool isBE() const { return astContext.getTargetInfo().isBigEndian(); }
+
+  /// The Microsoft bitfield layout rule allocates discrete storage
+  /// units of the field's formal type and only combines adjacent
+  /// fields of the same formal type.  We want to emit a layout with
+  /// these discrete storage units instead of combining them into a
+  /// continuous run.
+  bool isDiscreteBitFieldABI() {
+    return astContext.getTargetInfo().getCXXABI().isMicrosoft() ||
+           recordDecl->isMsStruct(astContext);
+  }
+
+  // The Itanium base layout rule allows virtual bases to overlap
+  // other bases, which complicates layout in specific ways.
+  //
+  // Note specifically that the ms_struct attribute doesn't change this.
+  bool isOverlappingVBaseABI() {
+    return !astContext.getTargetInfo().getCXXABI().isMicrosoft();
+  }
+  // Recursively searches all of the bases to find out if a vbase is
+  // not the primary vbase of some base class.
+  bool hasOwnStorage(const CXXRecordDecl *Decl, const CXXRecordDecl *Query);
+
+  CharUnits bitsToCharUnits(uint64_t bitOffset) {
+    return astContext.toCharUnitsFromBits(bitOffset);
+  }
+
+  void calculateZeroInit();
+
+  CharUnits getSize(mlir::Type Ty) {
+    return CharUnits::fromQuantity(dataLayout.layout.getTypeSize(Ty));
+  }
+  CharUnits getSizeInBits(mlir::Type Ty) {
+    return CharUnits::fromQuantity(dataLayout.layout.getTypeSizeInBits(Ty));
+  }
+  CharUnits getAlignment(mlir::Type Ty) {
+    return CharUnits::fromQuantity(dataLayout.layout.getTypeABIAlignment(Ty));
+  }
+  bool isZeroInitializable(const FieldDecl *FD) {
+    return cirGenTypes.isZeroInitializable(FD->getType());
+  }
+  bool isZeroInitializable(const RecordDecl *RD) {
+    return cirGenTypes.isZeroInitializable(RD);
+  }
+
+  mlir::Type getCharType() {
+    return mlir::cir::IntType::get(&cirGenTypes.getMLIRContext(),
+                                   astContext.getCharWidth(),
+                                   /*isSigned=*/false);
+  }
+
+  /// Wraps mlir::cir::IntType with some implicit arguments.
+  mlir::Type getUIntNType(uint64_t NumBits) {
+    unsigned AlignedBits = llvm::PowerOf2Ceil(NumBits);
+    AlignedBits = std::max(8u, AlignedBits);
+    return mlir::cir::IntType::get(&cirGenTypes.getMLIRContext(), AlignedBits,
+                                   /*isSigned=*/false);
+  }
+
+  mlir::Type getByteArrayType(CharUnits numberOfChars) {
+    assert(!numberOfChars.isZero() && "Empty byte arrays aren't allowed.");
+    mlir::Type type = getCharType();
+    return numberOfChars == CharUnits::One()
+               ? type
+               : mlir::cir::ArrayType::get(type.getContext(), type,
+                                           numberOfChars.getQuantity());
+  }
+
+  // This is different from LLVM traditional codegen because CIRGen uses arrays
+  // of bytes instead of arbitrary-sized integers. This is important for packed
+  // structures support.
+  mlir::Type getBitfieldStorageType(unsigned numBits) {
+    unsigned alignedBits = llvm::alignTo(numBits, astContext.getCharWidth());
+    if (mlir::cir::IntType::isValidPrimitiveIntBitwidth(alignedBits)) {
+      return builder.getUIntNTy(alignedBits);
+    } else {
+      mlir::Type type = getCharType();
+      return mlir::cir::ArrayType::get(type.getContext(), type,
+                                       alignedBits / astContext.getCharWidth());
+    }
+  }
+
+  // Gets the llvm Basesubobject type from a CXXRecordDecl.
+  mlir::Type getStorageType(const CXXRecordDecl *RD) {
+    return cirGenTypes.getCIRGenRecordLayout(RD).getBaseSubobjectCIRType();
+  }
+
+  mlir::Type getStorageType(const FieldDecl *fieldDecl) {
+    auto type = cirGenTypes.convertTypeForMem(fieldDecl->getType());
+    assert(!fieldDecl->isBitField() && "bit fields NYI");
+    if (!fieldDecl->isBitField())
+      return type;
+
+    // if (isDiscreteBitFieldABI())
+    //   return type;
+
+    // return getUIntNType(std::min(fielddecl->getBitWidthValue(astContext),
+    //     static_cast<unsigned int>(astContext.toBits(getSize(type)))));
+    llvm_unreachable("getStorageType only supports nonBitFields at this point");
+  }
+
+  uint64_t getFieldBitOffset(const FieldDecl *fieldDecl) {
+    return astRecordLayout.getFieldOffset(fieldDecl->getFieldIndex());
+  }
+
+  /// Fills out the structures that are ultimately consumed.
+  void fillOutputFields();
+
+  void appendPaddingBytes(CharUnits Size) {
+    if (!Size.isZero())
+      fieldTypes.push_back(getByteArrayType(Size));
+  }
+
+  CIRGenTypes &cirGenTypes;
+  CIRGenBuilderTy &builder;
+  const ASTContext &astContext;
+  const RecordDecl *recordDecl;
+  const CXXRecordDecl *cxxRecordDecl;
+  const ASTRecordLayout &astRecordLayout;
+  // Helpful intermediate data-structures
+  std::vector<MemberInfo> members;
+  // Output fields, consumed by CIRGenTypes::computeRecordLayout
+  llvm::SmallVector<mlir::Type, 16> fieldTypes;
+  llvm::DenseMap<const FieldDecl *, unsigned> fields;
+  llvm::DenseMap<const FieldDecl *, CIRGenBitFieldInfo> bitFields;
+  llvm::DenseMap<const CXXRecordDecl *, unsigned> nonVirtualBases;
+  llvm::DenseMap<const CXXRecordDecl *, unsigned> virtualBases;
+  CIRDataLayout dataLayout;
+  bool IsZeroInitializable : 1;
+  bool IsZeroInitializableAsBase : 1;
+  bool isPacked : 1;
+
+private:
+  CIRRecordLowering(const CIRRecordLowering &) = delete;
+  void operator=(const CIRRecordLowering &) = delete;
+};
+} // namespace
+
+CIRRecordLowering::CIRRecordLowering(CIRGenTypes &cirGenTypes,
+                                     const RecordDecl *recordDecl,
+                                     bool isPacked)
+    : cirGenTypes{cirGenTypes}, builder{cirGenTypes.getBuilder()},
+      astContext{cirGenTypes.getContext()}, recordDecl{recordDecl},
+      cxxRecordDecl{llvm::dyn_cast<CXXRecordDecl>(recordDecl)},
+      astRecordLayout{cirGenTypes.getContext().getASTRecordLayout(recordDecl)},
+      dataLayout{cirGenTypes.getModule().getModule()},
+      IsZeroInitializable(true), IsZeroInitializableAsBase(true),
+      isPacked{isPacked} {}
+
+void CIRRecordLowering::setBitFieldInfo(const FieldDecl *FD,
+                                        CharUnits StartOffset,
+                                        mlir::Type StorageType) {
+  CIRGenBitFieldInfo &Info = bitFields[FD->getCanonicalDecl()];
+  Info.IsSigned = FD->getType()->isSignedIntegerOrEnumerationType();
+  Info.Offset =
+      (unsigned)(getFieldBitOffset(FD) - astContext.toBits(StartOffset));
+  Info.Size = FD->getBitWidthValue(astContext);
+  Info.StorageSize = getSizeInBits(StorageType).getQuantity();
+  Info.StorageOffset = StartOffset;
+  Info.StorageType = StorageType;
+  Info.Name = FD->getName();
+
+  if (Info.Size > Info.StorageSize)
+    Info.Size = Info.StorageSize;
+  // Reverse the bit offsets for big endian machines. Because we represent
+  // a bitfield as a single large integer load, we can imagine the bits
+  // counting from the most-significant-bit instead of the
+  // least-significant-bit.
+  if (dataLayout.isBigEndian())
+    Info.Offset = Info.StorageSize - (Info.Offset + Info.Size);
+
+  Info.VolatileStorageSize = 0;
+  Info.VolatileOffset = 0;
+  Info.VolatileStorageOffset = CharUnits::Zero();
+}
+
+void CIRRecordLowering::lower(bool nonVirtualBaseType) {
+  if (recordDecl->isUnion()) {
+    lowerUnion();
+    computeVolatileBitfields();
+    return;
+  }
+
+  CharUnits Size = nonVirtualBaseType ? astRecordLayout.getNonVirtualSize()
+                                      : astRecordLayout.getSize();
+  accumulateFields();
+
+  // RD implies C++
+  if (cxxRecordDecl) {
+    accumulateVPtrs();
+    accumulateBases();
+    if (members.empty()) {
+      appendPaddingBytes(Size);
+      computeVolatileBitfields();
+      return;
+    }
+    if (!nonVirtualBaseType)
+      accumulateVBases();
+  }
+
+  llvm::stable_sort(members);
+  // TODO: implement clipTailPadding once bitfields are implemented
+  // TODO: implemented packed structs
+  // TODO: implement padding
+  // TODO: support zeroInit
+
+  members.push_back(StorageInfo(Size, getUIntNType(8)));
+  determinePacked(nonVirtualBaseType);
+  insertPadding();
+  members.pop_back();
+
+  fillOutputFields();
+  computeVolatileBitfields();
+}
+
+void CIRRecordLowering::lowerUnion() {
+  CharUnits LayoutSize = astRecordLayout.getSize();
+  mlir::Type StorageType = nullptr;
+  bool SeenNamedMember = false;
+  // Iterate through the fields setting bitFieldInfo and the Fields array. Also
+  // locate the "most appropriate" storage type.  The heuristic for finding the
+  // storage type isn't necessary, the first (non-0-length-bitfield) field's
+  // type would work fine and be simpler but would be different than what we've
+  // been doing and cause lit tests to change.
+  for (const auto *Field : recordDecl->fields()) {
+
+    mlir::Type FieldType = nullptr;
+    if (Field->isBitField()) {
+      if (Field->isZeroLengthBitField(astContext))
+        continue;
+
+      FieldType = getBitfieldStorageType(Field->getBitWidthValue(astContext));
+
+      setBitFieldInfo(Field, CharUnits::Zero(), FieldType);
+    } else {
+      FieldType = getStorageType(Field);
+    }
+    fields[Field->getCanonicalDecl()] = 0;
+    // auto FieldType = getStorageType(Field);
+    // Compute zero-initializable status.
+    // This union might not be zero initialized: it may contain a pointer to
+    // data member which might have some exotic initialization sequence.
+    // If this is the case, then we aught not to try and come up with a "better"
+    // type, it might not be very easy to come up with a Constant which
+    // correctly initializes it.
+    if (!SeenNamedMember) {
+      SeenNamedMember = Field->getIdentifier();
+      if (!SeenNamedMember)
+        if (const auto *FieldRD = Field->getType()->getAsRecordDecl())
+          SeenNamedMember = FieldRD->findFirstNamedDataMember();
+      if (SeenNamedMember && !isZeroInitializable(Field)) {
+        IsZeroInitializable = IsZeroInitializableAsBase = false;
+        StorageType = FieldType;
+      }
+    }
+    // Because our union isn't zero initializable, we won't be getting a better
+    // storage type.
+    if (!IsZeroInitializable)
+      continue;
+
+    // Conditionally update our storage type if we've got a new "better" one.
+    if (!StorageType || getAlignment(FieldType) > getAlignment(StorageType) ||
+        (getAlignment(FieldType) == getAlignment(StorageType) &&
+         getSize(FieldType) > getSize(StorageType)))
+      StorageType = FieldType;
+
+    // NOTE(cir): Track all union member's types, not just the largest one. It
+    // allows for proper type-checking and retain more info for analisys.
+    fieldTypes.push_back(FieldType);
+  }
+  // If we have no storage type just pad to the appropriate size and return.
+  if (!StorageType)
+    llvm_unreachable("no-storage union NYI");
+  // If our storage size was bigger than our required size (can happen in the
+  // case of packed bitfields on Itanium) then just use an I8 array.
+  if (LayoutSize < getSize(StorageType))
+    StorageType = getByteArrayType(LayoutSize);
+  // NOTE(cir): Defer padding calculations to the lowering process.
+  // appendPaddingBytes(LayoutSize - getSize(StorageType));
+  // Set packed if we need it.
+  if (LayoutSize % getAlignment(StorageType))
+    isPacked = true;
+}
+
+bool CIRRecordLowering::hasOwnStorage(const CXXRecordDecl *Decl,
+                                      const CXXRecordDecl *Query) {
+  const ASTRecordLayout &DeclLayout = astContext.getASTRecordLayout(Decl);
+  if (DeclLayout.isPrimaryBaseVirtual() && DeclLayout.getPrimaryBase() == Query)
+    return false;
+  for (const auto &Base : Decl->bases())
+    if (!hasOwnStorage(Base.getType()->getAsCXXRecordDecl(), Query))
+      return false;
+  return true;
+}
+
+/// The AAPCS that defines that, when possible, bit-fields should
+/// be accessed using containers of the declared type width:
+/// When a volatile bit-field is read, and its container does not overlap with
+/// any non-bit-field member or any zero length bit-field member, its container
+/// must be read exactly once using the access width appropriate to the type of
+/// the container. When a volatile bit-field is written, and its container does
+/// not overlap with any non-bit-field member or any zero-length bit-field
+/// member, its container must be read exactly once and written exactly once
+/// using the access width appropriate to the type of the container. The two
+/// accesses are not atomic.
+///
+/// Enforcing the width restriction can be disabled using
+/// -fno-aapcs-bitfield-width.
+void CIRRecordLowering::computeVolatileBitfields() {
+  if (!isAAPCS() ||
+      !cirGenTypes.getModule().getCodeGenOpts().AAPCSBitfieldWidth)
+    return;
+
+  for ([[maybe_unused]] auto &I : bitFields) {
+    assert(!MissingFeatures::armComputeVolatileBitfields());
+  }
+}
+
+void CIRRecordLowering::accumulateBases() {
+  // If we've got a primary virtual base, we need to add it with the bases.
+  if (astRecordLayout.isPrimaryBaseVirtual()) {
+    llvm_unreachable("NYI");
+  }
+
+  // Accumulate the non-virtual bases.
+  for ([[maybe_unused]] const auto &Base : cxxRecordDecl->bases()) {
+    if (Base.isVirtual())
+      continue;
+    // Bases can be zero-sized even if not technically empty if they
+    // contain only a trailing array member.
+    const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+    if (!BaseDecl->isEmpty() &&
+        !astContext.getASTRecordLayout(BaseDecl).getNonVirtualSize().isZero()) {
+      members.push_back(MemberInfo(astRecordLayout.getBaseClassOffset(BaseDecl),
+                                   MemberInfo::InfoKind::Base,
+                                   getStorageType(BaseDecl), BaseDecl));
+    }
+  }
+}
+
+void CIRRecordLowering::accumulateVBases() {
+  CharUnits ScissorOffset = astRecordLayout.getNonVirtualSize();
+  // In the itanium ABI, it's possible to place a vbase at a dsize that is
+  // smaller than the nvsize.  Here we check to see if such a base is placed
+  // before the nvsize and set the scissor offset to that, instead of the
+  // nvsize.
+  if (isOverlappingVBaseABI())
+    for (const auto &Base : cxxRecordDecl->vbases()) {
+      const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+      if (BaseDecl->isEmpty())
+        continue;
+      // If the vbase is a primary virtual base of some base, then it doesn't
+      // get its own storage location but instead lives inside of that base.
+      if (astContext.isNearlyEmpty(BaseDecl) &&
+          !hasOwnStorage(cxxRecordDecl, BaseDecl))
+        continue;
+      ScissorOffset = std::min(ScissorOffset,
+                               astRecordLayout.getVBaseClassOffset(BaseDecl));
+    }
+  members.push_back(MemberInfo(ScissorOffset, MemberInfo::InfoKind::Scissor,
+                               mlir::Type{}, cxxRecordDecl));
+  for (const auto &Base : cxxRecordDecl->vbases()) {
+    const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+    if (BaseDecl->isEmpty())
+      continue;
+    CharUnits Offset = astRecordLayout.getVBaseClassOffset(BaseDecl);
+    // If the vbase is a primary virtual base of some base, then it doesn't
+    // get its own storage location but instead lives inside of that base.
+    if (isOverlappingVBaseABI() && astContext.isNearlyEmpty(BaseDecl) &&
+        !hasOwnStorage(cxxRecordDecl, BaseDecl)) {
+      members.push_back(
+          MemberInfo(Offset, MemberInfo::InfoKind::VBase, nullptr, BaseDecl));
+      continue;
+    }
+    // If we've got a vtordisp, add it as a storage type.
+    if (astRecordLayout.getVBaseOffsetsMap()
+            .find(BaseDecl)
+            ->second.hasVtorDisp())
+      members.push_back(
+          StorageInfo(Offset - CharUnits::fromQuantity(4), getUIntNType(32)));
+    members.push_back(MemberInfo(Offset, MemberInfo::InfoKind::VBase,
+                                 getStorageType(BaseDecl), BaseDecl));
+  }
+}
+
+void CIRRecordLowering::accumulateVPtrs() {
+  if (astRecordLayout.hasOwnVFPtr())
+    members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::InfoKind::VFPtr,
+                                 getVFPtrType()));
+  if (astRecordLayout.hasOwnVBPtr())
+    llvm_unreachable("NYI");
+}
+
+mlir::Type CIRRecordLowering::getVFPtrType() {
+  // FIXME: replay LLVM codegen for now, perhaps add a vtable ptr special
+  // type so it's a bit more clear and C++ idiomatic.
+  return builder.getVirtualFnPtrType();
+}
+
+void CIRRecordLowering::fillOutputFields() {
+  for (auto &member : members) {
+    if (member.data)
+      fieldTypes.push_back(member.data);
+    if (member.kind == MemberInfo::InfoKind::Field) {
+      if (member.fieldDecl)
+        fields[member.fieldDecl->getCanonicalDecl()] = fieldTypes.size() - 1;
+      // A field without storage must be a bitfield.
+      if (!member.data)
+        setBitFieldInfo(member.fieldDecl, member.offset, fieldTypes.back());
+    } else if (member.kind == MemberInfo::InfoKind::Base) {
+      nonVirtualBases[member.cxxRecordDecl] = fieldTypes.size() - 1;
+    } else if (member.kind == MemberInfo::InfoKind::VBase) {
+      virtualBases[member.cxxRecordDecl] = fieldTypes.size() - 1;
+    }
+  }
+}
+
+void CIRRecordLowering::accumulateBitFields(
+    RecordDecl::field_iterator Field, RecordDecl::field_iterator FieldEnd) {
+  // Run stores the first element of the current run of bitfields.  FieldEnd is
+  // used as a special value to note that we don't have a current run.  A
+  // bitfield run is a contiguous collection of bitfields that can be stored in
+  // the same storage block.  Zero-sized bitfields and bitfields that would
+  // cross an alignment boundary break a run and start a new one.
+  RecordDecl::field_iterator Run = FieldEnd;
+  // Tail is the offset of the first bit off the end of the current run.  It's
+  // used to determine if the ASTRecordLayout is treating these two bitfields as
+  // contiguous.  StartBitOffset is offset of the beginning of the Run.
+  uint64_t StartBitOffset, Tail = 0;
+  if (isDiscreteBitFieldABI()) {
+    llvm_unreachable("NYI");
+  }
+
+  // Check if OffsetInRecord (the size in bits of the current run) is better
+  // as a single field run. When OffsetInRecord has legal integer width, and
+  // its bitfield offset is naturally aligned, it is better to make the
+  // bitfield a separate storage component so as it can be accessed directly
+  // with lower cost.
+  auto IsBetterAsSingleFieldRun = [&](uint64_t OffsetInRecord,
+                                      uint64_t StartBitOffset,
+                                      uint64_t nextTail = 0) {
+    if (!cirGenTypes.getModule().getCodeGenOpts().FineGrainedBitfieldAccesses)
+      return false;
+    llvm_unreachable("NYI");
+    // if (OffsetInRecord < 8 || !llvm::isPowerOf2_64(OffsetInRecord) ||
+    //     !DataLayout.fitsInLegalInteger(OffsetInRecord))
+    //   return false;
+    // Make sure StartBitOffset is naturally aligned if it is treated as an
+    // IType integer.
+    // if (StartBitOffset %
+    //         astContext.toBits(getAlignment(getUIntNType(OffsetInRecord))) !=
+    //     0)
+    //   return false;
+    return true;
+  };
+
+  // The start field is better as a single field run.
+  bool StartFieldAsSingleRun = false;
+  for (;;) {
+    // Check to see if we need to start a new run.
+    if (Run == FieldEnd) {
+      // If we're out of fields, return.
+      if (Field == FieldEnd)
+        break;
+      // Any non-zero-length bitfield can start a new run.
+      if (!Field->isZeroLengthBitField(astContext)) {
+        Run = Field;
+        StartBitOffset = getFieldBitOffset(*Field);
+        Tail = StartBitOffset + Field->getBitWidthValue(astContext);
+        StartFieldAsSingleRun =
+            IsBetterAsSingleFieldRun(Tail - StartBitOffset, StartBitOffset);
+      }
+      ++Field;
+      continue;
+    }
+
+    // If the start field of a new run is better as a single run, or if current
+    // field (or consecutive fields) is better as a single run, or if current
+    // field has zero width bitfield and either UseZeroLengthBitfieldAlignment
+    // or UseBitFieldTypeAlignment is set to true, or if the offset of current
+    // field is inconsistent with the offset of previous field plus its offset,
+    // skip the block below and go ahead to emit the storage. Otherwise, try to
+    // add bitfields to the run.
+    uint64_t nextTail = Tail;
+    if (Field != FieldEnd)
+      nextTail += Field->getBitWidthValue(astContext);
+
+    if (!StartFieldAsSingleRun && Field != FieldEnd &&
+        !IsBetterAsSingleFieldRun(Tail - StartBitOffset, StartBitOffset,
+                                  nextTail) &&
+        (!Field->isZeroLengthBitField(astContext) ||
+         (!astContext.getTargetInfo().useZeroLengthBitfieldAlignment() &&
+          !astContext.getTargetInfo().useBitFieldTypeAlignment())) &&
+        Tail == getFieldBitOffset(*Field)) {
+      Tail = nextTail;
+      ++Field;
+      continue;
+    }
+
+    // We've hit a break-point in the run and need to emit a storage field.
+    auto Type = getBitfieldStorageType(Tail - StartBitOffset);
+
+    // Add the storage member to the record and set the bitfield info for all of
+    // the bitfields in the run. Bitfields get the offset of their storage but
+    // come afterward and remain there after a stable sort.
+    members.push_back(StorageInfo(bitsToCharUnits(StartBitOffset), Type));
+    for (; Run != Field; ++Run)
+      members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset),
+                                   MemberInfo::InfoKind::Field, nullptr, *Run));
+    Run = FieldEnd;
+    StartFieldAsSingleRun = false;
+  }
+}
+
+void CIRRecordLowering::accumulateFields() {
+  for (RecordDecl::field_iterator field = recordDecl->field_begin(),
+                                  fieldEnd = recordDecl->field_end();
+       field != fieldEnd;) {
+    if (field->isBitField()) {
+      RecordDecl::field_iterator start = field;
+      // Iterate to gather the list of bitfields.
+      for (++field; field != fieldEnd && field->isBitField(); ++field)
+        ;
+      accumulateBitFields(start, field);
+    } else if (!field->isZeroSize(astContext)) {
+      members.push_back(MemberInfo{bitsToCharUnits(getFieldBitOffset(*field)),
+                                   MemberInfo::InfoKind::Field,
+                                   getStorageType(*field), *field});
+      ++field;
+    } else {
+      // TODO(cir): do we want to do anything special about zero size
+      // members?
+      ++field;
+    }
+  }
+}
+
+void CIRRecordLowering::determinePacked(bool NVBaseType) {
+  if (isPacked)
+    return;
+  CharUnits Alignment = CharUnits::One();
+  CharUnits NVAlignment = CharUnits::One();
+  CharUnits NVSize = !NVBaseType && cxxRecordDecl
+                         ? astRecordLayout.getNonVirtualSize()
+                         : CharUnits::Zero();
+  for (std::vector<MemberInfo>::const_iterator Member = members.begin(),
+                                               MemberEnd = members.end();
+       Member != MemberEnd; ++Member) {
+    if (!Member->data)
+      continue;
+    // If any member falls at an offset that it not a multiple of its alignment,
+    // then the entire record must be packed.
+    if (Member->offset % getAlignment(Member->data))
+      isPacked = true;
+    if (Member->offset < NVSize)
+      NVAlignment = std::max(NVAlignment, getAlignment(Member->data));
+    Alignment = std::max(Alignment, getAlignment(Member->data));
+  }
+  // If the size of the record (the capstone's offset) is not a multiple of the
+  // record's alignment, it must be packed.
+  if (members.back().offset % Alignment)
+    isPacked = true;
+  // If the non-virtual sub-object is not a multiple of the non-virtual
+  // sub-object's alignment, it must be packed.  We cannot have a packed
+  // non-virtual sub-object and an unpacked complete object or vise versa.
+  if (NVSize % NVAlignment)
+    isPacked = true;
+  // Update the alignment of the sentinel.
+  if (!isPacked)
+    members.back().data = getUIntNType(astContext.toBits(Alignment));
+}
+
+void CIRRecordLowering::insertPadding() {
+  std::vector<std::pair<CharUnits, CharUnits>> Padding;
+  CharUnits Size = CharUnits::Zero();
+  for (std::vector<MemberInfo>::const_iterator Member = members.begin(),
+                                               MemberEnd = members.end();
+       Member != MemberEnd; ++Member) {
+    if (!Member->data)
+      continue;
+    CharUnits Offset = Member->offset;
+    assert(Offset >= Size);
+    // Insert padding if we need to.
+    if (Offset !=
+        Size.alignTo(isPacked ? CharUnits::One() : getAlignment(Member->data)))
+      Padding.push_back(std::make_pair(Size, Offset - Size));
+    Size = Offset + getSize(Member->data);
+  }
+  if (Padding.empty())
+    return;
+  // Add the padding to the Members list and sort it.
+  for (std::vector<std::pair<CharUnits, CharUnits>>::const_iterator
+           Pad = Padding.begin(),
+           PadEnd = Padding.end();
+       Pad != PadEnd; ++Pad)
+    members.push_back(StorageInfo(Pad->first, getByteArrayType(Pad->second)));
+  llvm::stable_sort(members);
+}
+
+std::unique_ptr<CIRGenRecordLayout>
+CIRGenTypes::computeRecordLayout(const RecordDecl *D,
+                                 mlir::cir::StructType *Ty) {
+  CIRRecordLowering builder(*this, D, /*packed=*/false);
+  assert(Ty->isIncomplete() && "recomputing record layout?");
+  builder.lower(/*nonVirtualBaseType=*/false);
+
+  // If we're in C++, compute the base subobject type.
+  mlir::cir::StructType BaseTy;
+  if (llvm::isa<CXXRecordDecl>(D) && !D->isUnion() &&
+      !D->hasAttr<FinalAttr>()) {
+    BaseTy = *Ty;
+    if (builder.astRecordLayout.getNonVirtualSize() !=
+        builder.astRecordLayout.getSize()) {
+      CIRRecordLowering baseBuilder(*this, D, /*Packed=*/builder.isPacked);
+      baseBuilder.lower(/*NonVirtualBaseType=*/true);
+      auto baseIdentifier = getRecordTypeName(D, ".base");
+      BaseTy = Builder.getCompleteStructTy(
+          baseBuilder.fieldTypes, baseIdentifier, baseBuilder.isPacked, D);
+      // TODO(cir): add something like addRecordTypeName
+
+      // BaseTy and Ty must agree on their packedness for getCIRFieldNo to work
+      // on both of them with the same index.
+      assert(builder.isPacked == baseBuilder.isPacked &&
+             "Non-virtual and complete types must agree on packedness");
+    }
+  }
+
+  // Fill in the struct *after* computing the base type.  Filling in the body
+  // signifies that the type is no longer opaque and record layout is complete,
+  // but we may need to recursively layout D while laying D out as a base type.
+  auto astAttr = mlir::cir::ASTRecordDeclAttr::get(Ty->getContext(), D);
+  Ty->complete(builder.fieldTypes, builder.isPacked, astAttr);
+
+  auto RL = std::make_unique<CIRGenRecordLayout>(
+      Ty ? *Ty : mlir::cir::StructType{},
+      BaseTy ? BaseTy : mlir::cir::StructType{},
+      (bool)builder.IsZeroInitializable,
+      (bool)builder.IsZeroInitializableAsBase);
+
+  RL->NonVirtualBases.swap(builder.nonVirtualBases);
+  RL->CompleteObjectVirtualBases.swap(builder.virtualBases);
+
+  // Add all the field numbers.
+  RL->FieldInfo.swap(builder.fields);
+
+  // Add bitfield info.
+  RL->BitFields.swap(builder.bitFields);
+
+  // Dump the layout, if requested.
+  if (getContext().getLangOpts().DumpRecordLayouts) {
+    llvm_unreachable("NYI");
+  }
+
+  // TODO: implement verification
+  return RL;
+}
+
+CIRGenBitFieldInfo CIRGenBitFieldInfo::MakeInfo(CIRGenTypes &Types,
+                                                const FieldDecl *FD,
+                                                uint64_t Offset, uint64_t Size,
+                                                uint64_t StorageSize,
+                                                CharUnits StorageOffset) {
+  llvm_unreachable("NYI");
+}
diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt
new file mode 100644
index 000000000000..97a8ad4f5ea8
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CMakeLists.txt
@@ -0,0 +1,83 @@
+set(
+  LLVM_LINK_COMPONENTS
+  Core
+  Support
+)
+
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
+add_clang_library(clangCIR
+  CIRAsm.cpp
+  CIRGenAtomic.cpp
+  CIRGenBuilder.cpp
+  CIRGenBuiltin.cpp
+  CIRGenBuiltinAArch64.cpp
+  CIRGenBuiltinX86.cpp
+  CIRGenCXX.cpp
+  CIRGenCXXABI.cpp
+  CIRGenCall.cpp
+  CIRGenClass.cpp
+  CIRGenCleanup.cpp
+  CIRGenCoroutine.cpp
+  CIRGenDecl.cpp
+  CIRGenDeclCXX.cpp
+  CIRGenException.cpp
+  CIRGenExpr.cpp
+  CIRGenExprComplex.cpp
+  CIRGenExprConst.cpp
+  CIRGenExprAgg.cpp
+  CIRGenExprCXX.cpp
+  CIRGenExprScalar.cpp
+  CIRGenFunction.cpp
+  CIRGenItaniumCXXABI.cpp
+  CIRGenModule.cpp
+  CIRGenOpenCLRuntime.cpp
+  CIRGenOpenCL.cpp
+  CIRGenOpenMPRuntime.cpp
+  CIRGenStmt.cpp
+  CIRGenStmtOpenMP.cpp
+  CIRGenTBAA.cpp
+  CIRGenTypes.cpp
+  CIRGenVTables.cpp
+  CIRGenerator.cpp
+  CIRPasses.cpp
+  CIRRecordLayoutBuilder.cpp
+  ConstantInitBuilder.cpp
+  TargetInfo.cpp
+
+  DEPENDS
+  MLIRCIR
+  MLIRCIROpsIncGen
+  MLIRCIRASTAttrInterfacesIncGen
+  MLIRCIROpInterfacesIncGen
+  MLIRCIRLoopOpInterfaceIncGen
+  ${dialect_libs}
+
+  LINK_LIBS
+  clangAST
+  clangBasic
+  clangLex
+  ${dialect_libs}
+  MLIRCIR
+  MLIRCIRTransforms
+  MLIRCIRInterfaces
+  MLIRAffineToStandard
+  MLIRAnalysis
+  MLIRDLTIDialect
+  MLIRFuncToLLVM
+  MLIRIR
+  MLIRLLVMCommonConversion
+  MLIRLLVMDialect
+  MLIROpenMPDialect
+  MLIRLLVMToLLVMIRTranslation
+  MLIRMemRefDialect
+  MLIRMemRefToLLVM
+  MLIRParser
+  MLIRPass
+  MLIRSCFToControlFlow
+  MLIRSideEffectInterfaces
+  MLIRSupport
+  MLIRTargetLLVMIRImport
+  MLIRTargetLLVMIRExport
+  MLIRTransforms
+)
diff --git a/clang/lib/CIR/CodeGen/ConstantInitBuilder.cpp b/clang/lib/CIR/CodeGen/ConstantInitBuilder.cpp
new file mode 100644
index 000000000000..522f59adff60
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/ConstantInitBuilder.cpp
@@ -0,0 +1,327 @@
+//===--- ConstantInitBuilder.cpp - Global initializer builder -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines out-of-line routines for building initializers for
+// global variables, in particular the kind of globals that are implicitly
+// introduced by various language ABIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ConstantInitBuilder.h"
+#include "CIRGenModule.h"
+
+using namespace clang;
+using namespace cir;
+
+ConstantInitBuilderBase::ConstantInitBuilderBase(CIRGenModule &CGM)
+    : CGM(CGM), builder(CGM.getBuilder()) {}
+
+mlir::Type ConstantInitFuture::getType() const {
+  assert(Data && "dereferencing null future");
+  if (Data.is<mlir::Attribute>()) {
+    auto attr = mlir::dyn_cast<mlir::TypedAttr>(Data.get<mlir::Attribute>());
+    assert(attr && "expected typed attribute");
+    return attr.getType();
+  } else {
+    llvm_unreachable("Only sypport typed attributes here");
+  }
+}
+
+void ConstantInitFuture::abandon() {
+  assert(Data && "abandoning null future");
+  if (auto builder = mlir::dyn_cast<ConstantInitBuilderBase *>(Data)) {
+    builder->abandon(0);
+  }
+  Data = nullptr;
+}
+
+void ConstantInitFuture::installInGlobal(mlir::cir::GlobalOp GV) {
+  assert(Data && "installing null future");
+  if (Data.is<mlir::Attribute>()) {
+    CIRGenModule::setInitializer(GV, Data.get<mlir::Attribute>());
+  } else {
+    llvm_unreachable("NYI");
+    // auto &builder = *Data.get<ConstantInitBuilderBase *>();
+    // assert(builder.Buffer.size() == 1);
+    // builder.setGlobalInitializer(GV, builder.Buffer[0]);
+    // builder.Buffer.clear();
+    // Data = nullptr;
+  }
+}
+
+ConstantInitFuture
+ConstantInitBuilderBase::createFuture(mlir::Attribute initializer) {
+  assert(Buffer.empty() && "buffer not current empty");
+  Buffer.push_back(initializer);
+  return ConstantInitFuture(this);
+}
+
+// Only used in this file.
+inline ConstantInitFuture::ConstantInitFuture(ConstantInitBuilderBase *builder)
+    : Data(builder) {
+  assert(!builder->Frozen);
+  assert(builder->Buffer.size() == 1);
+  assert(builder->Buffer[0] != nullptr);
+}
+
+mlir::cir::GlobalOp ConstantInitBuilderBase::createGlobal(
+    mlir::Attribute initializer, const llvm::Twine &name, CharUnits alignment,
+    bool constant, mlir::cir::GlobalLinkageKind linkage,
+    unsigned addressSpace) {
+  llvm_unreachable("NYI");
+  // auto GV =
+  //     new llvm::GlobalVariable(CGM.getModule(), initializer->getType(),
+  //                              constant, linkage, initializer, name,
+  //                              /*insert before*/ nullptr,
+  //                              llvm::GlobalValue::NotThreadLocal,
+  //                              addressSpace);
+  // GV->setAlignment(alignment.getAsAlign());
+  // resolveSelfReferences(GV);
+  // return GV;
+}
+
+void ConstantInitBuilderBase::setGlobalInitializer(
+    mlir::cir::GlobalOp GV, mlir::Attribute initializer) {
+  CIRGenModule::setInitializer(GV, initializer);
+
+  if (!SelfReferences.empty())
+    resolveSelfReferences(GV);
+}
+
+void ConstantInitBuilderBase::resolveSelfReferences(mlir::cir::GlobalOp GV) {
+  llvm_unreachable("NYI");
+  // for (auto &entry : SelfReferences) {
+  //   mlir::Attribute resolvedReference =
+  //       llvm::ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV,
+  //                                                    entry.Indices);
+  //   auto dummy = entry.Dummy;
+  //   dummy->replaceAllUsesWith(resolvedReference);
+  //   dummy->eraseFromParent();
+  // }
+  // SelfReferences.clear();
+}
+
+void ConstantInitBuilderBase::abandon(size_t newEnd) {
+  llvm_unreachable("NYI");
+  // // Remove all the entries we've added.
+  // Buffer.erase(Buffer.begin() + newEnd, Buffer.end());
+
+  // // If we're abandoning all the way to the beginning, destroy
+  // // all the self-references, because we might not get another
+  // // opportunity.
+  // if (newEnd == 0) {
+  //   for (auto &entry : SelfReferences) {
+  //     auto dummy = entry.Dummy;
+  //     dummy->replaceAllUsesWith(llvm::PoisonValue::get(dummy->getType()));
+  //     dummy->eraseFromParent();
+  //   }
+  //   SelfReferences.clear();
+  // }
+}
+
+void ConstantAggregateBuilderBase::addSize(CharUnits size) {
+  add(Builder.CGM.getSize(size));
+}
+
+mlir::Attribute
+ConstantAggregateBuilderBase::getRelativeOffset(mlir::cir::IntType offsetType,
+                                                mlir::Attribute target) {
+  return getRelativeOffsetToPosition(offsetType, target,
+                                     Builder.Buffer.size() - Begin);
+}
+
+mlir::Attribute ConstantAggregateBuilderBase::getRelativeOffsetToPosition(
+    mlir::cir::IntType offsetType, mlir::Attribute target, size_t position) {
+  llvm_unreachable("NYI");
+  // // Compute the address of the relative-address slot.
+  // auto base = getAddrOfPosition(offsetType, position);
+
+  // // Subtract.
+  // base = llvm::ConstantExpr::getPtrToInt(base, Builder.CGM.IntPtrTy);
+  // target = llvm::ConstantExpr::getPtrToInt(target, Builder.CGM.IntPtrTy);
+  // mlir::Attribute offset = llvm::ConstantExpr::getSub(target, base);
+
+  // // Truncate to the relative-address type if necessary.
+  // if (Builder.CGM.IntPtrTy != offsetType) {
+  //   offset = llvm::ConstantExpr::getTrunc(offset, offsetType);
+  // }
+
+  // return offset;
+}
+
+mlir::Attribute
+ConstantAggregateBuilderBase::getAddrOfPosition(mlir::Type type,
+                                                size_t position) {
+  llvm_unreachable("NYI");
+  // // Make a global variable.  We will replace this with a GEP to this
+  // // position after installing the initializer.
+  // auto dummy = new llvm::GlobalVariable(Builder.CGM.getModule(), type, true,
+  //                                       llvm::GlobalVariable::PrivateLinkage,
+  //                                       nullptr, "");
+  // Builder.SelfReferences.emplace_back(dummy);
+  // auto &entry = Builder.SelfReferences.back();
+  // (void)getGEPIndicesTo(entry.Indices, position + Begin);
+  // return dummy;
+}
+
+mlir::Attribute
+ConstantAggregateBuilderBase::getAddrOfCurrentPosition(mlir::Type type) {
+  llvm_unreachable("NYI");
+  // // Make a global variable.  We will replace this with a GEP to this
+  // // position after installing the initializer.
+  // auto dummy = new llvm::GlobalVariable(Builder.CGM.getModule(), type, true,
+  //                                       llvm::GlobalVariable::PrivateLinkage,
+  //                                       nullptr, "");
+  // Builder.SelfReferences.emplace_back(dummy);
+  // auto &entry = Builder.SelfReferences.back();
+  // (void)getGEPIndicesToCurrentPosition(entry.Indices);
+  // return dummy;
+}
+
+void ConstantAggregateBuilderBase::getGEPIndicesTo(
+    llvm::SmallVectorImpl<mlir::Attribute> &indices, size_t position) const {
+  llvm_unreachable("NYI");
+  // // Recurse on the parent builder if present.
+  // if (Parent) {
+  //   Parent->getGEPIndicesTo(indices, Begin);
+
+  //   // Otherwise, add an index to drill into the first level of pointer.
+  // } else {
+  //   assert(indices.empty());
+  //   indices.push_back(llvm::ConstantInt::get(Builder.CGM.Int32Ty, 0));
+  // }
+
+  // assert(position >= Begin);
+  // // We have to use i32 here because struct GEPs demand i32 indices.
+  // // It's rather unlikely to matter in practice.
+  // indices.push_back(
+  //     llvm::ConstantInt::get(Builder.CGM.Int32Ty, position - Begin));
+}
+
+ConstantAggregateBuilderBase::PlaceholderPosition
+ConstantAggregateBuilderBase::addPlaceholderWithSize(mlir::Type type) {
+  llvm_unreachable("NYI");
+  // // Bring the offset up to the last field.
+  // CharUnits offset = getNextOffsetFromGlobal();
+
+  // // Create the placeholder.
+  // auto position = addPlaceholder();
+
+  // // Advance the offset past that field.
+  // auto &layout = Builder.CGM.getDataLayout();
+  // if (!Packed)
+  //   offset =
+  //       offset.alignTo(CharUnits::fromQuantity(layout.getABITypeAlign(type)));
+  // offset += CharUnits::fromQuantity(layout.getTypeStoreSize(type));
+
+  // CachedOffsetEnd = Builder.Buffer.size();
+  // CachedOffsetFromGlobal = offset;
+
+  // return position;
+}
+
+CharUnits
+ConstantAggregateBuilderBase::getOffsetFromGlobalTo(size_t end) const {
+  size_t cacheEnd = CachedOffsetEnd;
+  assert(cacheEnd <= end);
+
+  // Fast path: if the cache is valid, just use it.
+  if (cacheEnd == end) {
+    return CachedOffsetFromGlobal;
+  }
+
+  // If the cached range ends before the index at which the current
+  // aggregate starts, recurse for the parent.
+  CharUnits offset;
+  if (cacheEnd < Begin) {
+    assert(cacheEnd == 0);
+    assert(Parent && "Begin != 0 for root builder");
+    cacheEnd = Begin;
+    offset = Parent->getOffsetFromGlobalTo(Begin);
+  } else {
+    offset = CachedOffsetFromGlobal;
+  }
+
+  // Perform simple layout on the elements in cacheEnd..<end.
+  if (cacheEnd != end) {
+    llvm_unreachable("NYI");
+    // auto &layout = Builder.CGM.getDataLayout();
+    // do {
+    //   mlir::Attribute element = Builder.Buffer[cacheEnd];
+    //   assert(element != nullptr &&
+    //          "cannot compute offset when a placeholder is present");
+    //   mlir::Type elementType = element->getType();
+    //   if (!Packed)
+    //     offset = offset.alignTo(
+    //         CharUnits::fromQuantity(layout.getABITypeAlign(elementType)));
+    //   offset +=
+    //   CharUnits::fromQuantity(layout.getTypeStoreSize(elementType));
+    // } while (++cacheEnd != end);
+  }
+
+  // Cache and return.
+  CachedOffsetEnd = cacheEnd;
+  CachedOffsetFromGlobal = offset;
+  return offset;
+}
+
+// FIXME(cir): ideally we should use CIRGenBuilder for both static function
+// bellow by threading ConstantAggregateBuilderBase through
+// ConstantAggregateBuilderBase.
+static mlir::cir::ConstArrayAttr getConstArray(mlir::Attribute attrs,
+                                               mlir::cir::ArrayType arrayTy) {
+  return mlir::cir::ConstArrayAttr::get(arrayTy, attrs);
+}
+
+mlir::Attribute ConstantAggregateBuilderBase::finishArray(mlir::Type eltTy) {
+  markFinished();
+
+  auto &buffer = getBuffer();
+  assert((Begin < buffer.size() || (Begin == buffer.size() && eltTy)) &&
+         "didn't add any array elements without element type");
+  auto elts = llvm::ArrayRef(buffer).slice(Begin);
+  if (!eltTy) {
+    llvm_unreachable("NYI");
+    // Uncomment this once we get a testcase.
+    // auto tAttr = elts[0].dyn_cast<mlir::TypedAttr>();
+    // assert(tAttr && "expected typed attribute");
+    // eltTy = tAttr.getType();
+  }
+
+  auto constant = getConstArray(
+      mlir::ArrayAttr::get(eltTy.getContext(), elts),
+      mlir::cir::ArrayType::get(eltTy.getContext(), eltTy, elts.size()));
+  buffer.erase(buffer.begin() + Begin, buffer.end());
+  return constant;
+}
+
+mlir::Attribute
+ConstantAggregateBuilderBase::finishStruct(mlir::MLIRContext *ctx,
+                                           mlir::cir::StructType ty) {
+  markFinished();
+
+  auto &buffer = getBuffer();
+  auto elts = llvm::ArrayRef(buffer).slice(Begin);
+
+  if (ty == nullptr && elts.empty()) {
+    llvm_unreachable("NYI");
+  }
+
+  mlir::Attribute constant;
+  if (ty) {
+    llvm_unreachable("NYI");
+    // assert(ty->isPacked() == Packed);
+    // constant = llvm::ConstantStruct::get(ty, elts);
+  } else {
+    const auto members = mlir::ArrayAttr::get(ctx, elts);
+    constant = Builder.CGM.getBuilder().getAnonConstStruct(members, Packed);
+  }
+
+  buffer.erase(buffer.begin() + Begin, buffer.end());
+  return constant;
+}
diff --git a/clang/lib/CIR/CodeGen/ConstantInitBuilder.h b/clang/lib/CIR/CodeGen/ConstantInitBuilder.h
new file mode 100644
index 000000000000..d78584f42e71
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/ConstantInitBuilder.h
@@ -0,0 +1,591 @@
+//===- ConstantInitBuilder.h - Builder for CIR attributes -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class provides a convenient interface for building complex
+// global initializers of the sort that are frequently required for
+// language ABIs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_CIR_CODEGEN_CONSTANTINITBUILDER_H
+#define LLVM_CLANG_CIR_CODEGEN_CONSTANTINITBUILDER_H
+
+#include "clang/AST/CharUnits.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+
+#include "CIRGenBuilder.h"
+#include "ConstantInitFuture.h"
+
+#include <cstdint>
+#include <vector>
+
+using namespace clang;
+
+namespace cir {
+
+class CIRGenModule;
+
+/// A convenience builder class for complex constant initializers,
+/// especially for anonymous global structures used by various language
+/// runtimes.
+///
+/// The basic usage pattern is expected to be something like:
+///    ConstantInitBuilder builder(CGM);
+///    auto toplevel = builder.beginStruct();
+///    toplevel.addInt(CGM.SizeTy, widgets.size());
+///    auto widgetArray = builder.beginArray();
+///    for (auto &widget : widgets) {
+///      auto widgetDesc = widgetArray.beginStruct();
+///      widgetDesc.addInt(CGM.SizeTy, widget.getPower());
+///      widgetDesc.add(CGM.GetAddrOfConstantString(widget.getName()));
+///      widgetDesc.add(CGM.GetAddrOfGlobal(widget.getInitializerDecl()));
+///      widgetDesc.finishAndAddTo(widgetArray);
+///    }
+///    widgetArray.finishAndAddTo(toplevel);
+///    auto global = toplevel.finishAndCreateGlobal("WIDGET_LIST", Align,
+///                                                 /*constant*/ true);
+class ConstantInitBuilderBase {
+  struct SelfReference {
+    mlir::cir::GlobalOp Dummy;
+    llvm::SmallVector<mlir::Attribute, 4> Indices;
+
+    SelfReference(mlir::cir::GlobalOp dummy) : Dummy(dummy) {}
+  };
+  CIRGenModule &CGM;
+  CIRGenBuilderTy &builder;
+  llvm::SmallVector<mlir::Attribute, 16> Buffer;
+  std::vector<SelfReference> SelfReferences;
+  bool Frozen = false;
+
+  friend class ConstantInitFuture;
+  friend class ConstantAggregateBuilderBase;
+  template <class, class> friend class ConstantAggregateBuilderTemplateBase;
+
+protected:
+  explicit ConstantInitBuilderBase(CIRGenModule &CGM);
+
+  ~ConstantInitBuilderBase() {
+    assert(Buffer.empty() && "didn't claim all values out of buffer");
+    assert(SelfReferences.empty() && "didn't apply all self-references");
+  }
+
+private:
+  mlir::cir::GlobalOp
+  createGlobal(mlir::Attribute initializer, const llvm::Twine &name,
+               CharUnits alignment, bool constant = false,
+               mlir::cir::GlobalLinkageKind linkage =
+                   mlir::cir::GlobalLinkageKind::InternalLinkage,
+               unsigned addressSpace = 0);
+
+  ConstantInitFuture createFuture(mlir::Attribute initializer);
+
+  void setGlobalInitializer(mlir::cir::GlobalOp GV,
+                            mlir::Attribute initializer);
+
+  void resolveSelfReferences(mlir::cir::GlobalOp GV);
+
+  void abandon(size_t newEnd);
+};
+
+/// A concrete base class for struct and array aggregate
+/// initializer builders.
+class ConstantAggregateBuilderBase {
+protected:
+  ConstantInitBuilderBase &Builder;
+  ConstantAggregateBuilderBase *Parent;
+  size_t Begin;
+  mutable size_t CachedOffsetEnd = 0;
+  bool Finished = false;
+  bool Frozen = false;
+  bool Packed = false;
+  mutable CharUnits CachedOffsetFromGlobal;
+
+  llvm::SmallVectorImpl<mlir::Attribute> &getBuffer() { return Builder.Buffer; }
+
+  const llvm::SmallVectorImpl<mlir::Attribute> &getBuffer() const {
+    return Builder.Buffer;
+  }
+
+  ConstantAggregateBuilderBase(ConstantInitBuilderBase &builder,
+                               ConstantAggregateBuilderBase *parent)
+      : Builder(builder), Parent(parent), Begin(builder.Buffer.size()) {
+    if (parent) {
+      assert(!parent->Frozen && "parent already has child builder active");
+      parent->Frozen = true;
+    } else {
+      assert(!builder.Frozen && "builder already has child builder active");
+      builder.Frozen = true;
+    }
+  }
+
+  ~ConstantAggregateBuilderBase() {
+    assert(Finished && "didn't finish aggregate builder");
+  }
+
+  void markFinished() {
+    assert(!Frozen && "child builder still active");
+    assert(!Finished && "builder already finished");
+    Finished = true;
+    if (Parent) {
+      assert(Parent->Frozen && "parent not frozen while child builder active");
+      Parent->Frozen = false;
+    } else {
+      assert(Builder.Frozen && "builder not frozen while child builder active");
+      Builder.Frozen = false;
+    }
+  }
+
+public:
+  // Not copyable.
+  ConstantAggregateBuilderBase(const ConstantAggregateBuilderBase &) = delete;
+  ConstantAggregateBuilderBase &
+  operator=(const ConstantAggregateBuilderBase &) = delete;
+
+  // Movable, mostly to allow returning.  But we have to write this out
+  // properly to satisfy the assert in the destructor.
+  ConstantAggregateBuilderBase(ConstantAggregateBuilderBase &&other)
+      : Builder(other.Builder), Parent(other.Parent), Begin(other.Begin),
+        CachedOffsetEnd(other.CachedOffsetEnd), Finished(other.Finished),
+        Frozen(other.Frozen), Packed(other.Packed),
+        CachedOffsetFromGlobal(other.CachedOffsetFromGlobal) {
+    other.Finished = true;
+  }
+  ConstantAggregateBuilderBase &
+  operator=(ConstantAggregateBuilderBase &&other) = delete;
+
+  /// Return the number of elements that have been added to
+  /// this struct or array.
+  size_t size() const {
+    assert(!this->Finished && "cannot query after finishing builder");
+    assert(!this->Frozen && "cannot query while sub-builder is active");
+    assert(this->Begin <= this->getBuffer().size());
+    return this->getBuffer().size() - this->Begin;
+  }
+
+  /// Return true if no elements have yet been added to this struct or array.
+  bool empty() const { return size() == 0; }
+
+  /// Abandon this builder completely.
+  void abandon() {
+    markFinished();
+    Builder.abandon(Begin);
+  }
+
+  /// Add a new value to this initializer.
+  void add(mlir::Attribute value) {
+    assert(value && "adding null value to constant initializer");
+    assert(!Finished && "cannot add more values after finishing builder");
+    assert(!Frozen && "cannot add values while subbuilder is active");
+    Builder.Buffer.push_back(value);
+  }
+
+  /// Add an integer value of type size_t.
+  void addSize(CharUnits size);
+
+  /// Add an integer value of a specific type.
+  void addInt(mlir::cir::IntType intTy, uint64_t value, bool isSigned = false) {
+    add(mlir::IntegerAttr::get(intTy,
+                               llvm::APInt{intTy.getWidth(), value, isSigned}));
+  }
+
+  /// Add a pointer of a specific type.
+  void addPointer(mlir::cir::PointerType ptrTy, uint64_t value) {
+    auto val = mlir::IntegerAttr::get(
+        mlir::IntegerType::get(ptrTy.getContext(), 64), value);
+    add(mlir::cir::ConstPtrAttr::get(ptrTy.getContext(), ptrTy, val));
+  }
+
+  /// Add a bitcast of a value to a specific type.
+  void addBitCast(mlir::Attribute value, mlir::Type type) {
+    llvm_unreachable("NYI");
+    // add(llvm::ConstantExpr::getBitCast(value, type));
+  }
+
+  /// Add a bunch of new values to this initializer.
+  void addAll(llvm::ArrayRef<mlir::Attribute> values) {
+    assert(!Finished && "cannot add more values after finishing builder");
+    assert(!Frozen && "cannot add values while subbuilder is active");
+    Builder.Buffer.append(values.begin(), values.end());
+  }
+
+  /// Add a relative offset to the given target address, i.e. the
+  /// static difference between the target address and the address
+  /// of the relative offset.  The target must be known to be defined
+  /// in the current linkage unit.  The offset will have the given
+  /// integer type, which must be no wider than intptr_t.  Some
+  /// targets may not fully support this operation.
+  void addRelativeOffset(mlir::cir::IntType type, mlir::Attribute target) {
+    llvm_unreachable("NYI");
+    // add(getRelativeOffset(type, target));
+  }
+
+  /// Same as addRelativeOffset(), but instead relative to an element in this
+  /// aggregate, identified by its index.
+  void addRelativeOffsetToPosition(mlir::cir::IntType type,
+                                   mlir::Attribute target, size_t position) {
+    llvm_unreachable("NYI");
+    // add(getRelativeOffsetToPosition(type, target, position));
+  }
+
+  /// Add a relative offset to the target address, plus a small
+  /// constant offset.  This is primarily useful when the relative
+  /// offset is known to be a multiple of (say) four and therefore
+  /// the tag can be used to express an extra two bits of information.
+  void addTaggedRelativeOffset(mlir::cir::IntType type, mlir::Attribute address,
+                               unsigned tag) {
+    llvm_unreachable("NYI");
+    // mlir::Attribute offset =
+    // getRelativeOffset(type, address); if
+    // (tag) {
+    //   offset =
+    //       llvm::ConstantExpr::getAdd(offset,
+    //       llvm::ConstantInt::get(type, tag));
+    // }
+    // add(offset);
+  }
+
+  /// Return the offset from the start of the initializer to the
+  /// next position, assuming no padding is required prior to it.
+  ///
+  /// This operation will not succeed if any unsized placeholders are
+  /// currently in place in the initializer.
+  CharUnits getNextOffsetFromGlobal() const {
+    assert(!Finished && "cannot add more values after finishing builder");
+    assert(!Frozen && "cannot add values while subbuilder is active");
+    return getOffsetFromGlobalTo(Builder.Buffer.size());
+  }
+
+  /// An opaque class to hold the abstract position of a placeholder.
+  class PlaceholderPosition {
+    size_t Index;
+    friend class ConstantAggregateBuilderBase;
+    PlaceholderPosition(size_t index) : Index(index) {}
+  };
+
+  /// Add a placeholder value to the structure.  The returned position
+  /// can be used to set the value later; it will not be invalidated by
+  /// any intermediate operations except (1) filling the same position or
+  /// (2) finishing the entire builder.
+  ///
+  /// This is useful for emitting certain kinds of structure which
+  /// contain some sort of summary field, generally a count, before any
+  /// of the data.  By emitting a placeholder first, the structure can
+  /// be emitted eagerly.
+  PlaceholderPosition addPlaceholder() {
+    assert(!Finished && "cannot add more values after finishing builder");
+    assert(!Frozen && "cannot add values while subbuilder is active");
+    Builder.Buffer.push_back(nullptr);
+    return Builder.Buffer.size() - 1;
+  }
+
+  /// Add a placeholder, giving the expected type that will be filled in.
+  PlaceholderPosition addPlaceholderWithSize(mlir::Type expectedType);
+
+  /// Fill a previously-added placeholder.
+  void fillPlaceholderWithInt(PlaceholderPosition position,
+                              mlir::cir::IntType type, uint64_t value,
+                              bool isSigned = false) {
+    llvm_unreachable("NYI");
+    // fillPlaceholder(position, llvm::ConstantInt::get(type, value, isSigned));
+  }
+
+  /// Fill a previously-added placeholder.
+  void fillPlaceholder(PlaceholderPosition position, mlir::Attribute value) {
+    assert(!Finished && "cannot change values after finishing builder");
+    assert(!Frozen && "cannot add values while subbuilder is active");
+    mlir::Attribute &slot = Builder.Buffer[position.Index];
+    assert(slot == nullptr && "placeholder already filled");
+    slot = value;
+  }
+
+  /// Produce an address which will eventually point to the next
+  /// position to be filled.  This is computed with an indexed
+  /// getelementptr rather than by computing offsets.
+  ///
+  /// The returned pointer will have type T*, where T is the given type. This
+  /// type can differ from the type of the actual element.
+  mlir::Attribute getAddrOfCurrentPosition(mlir::Type type);
+
+  /// Produce an address which points to a position in the aggregate being
+  /// constructed. This is computed with an indexed getelementptr rather than by
+  /// computing offsets.
+  ///
+  /// The returned pointer will have type T*, where T is the given type. This
+  /// type can differ from the type of the actual element.
+  mlir::Attribute getAddrOfPosition(mlir::Type type, size_t position);
+
+  llvm::ArrayRef<mlir::Attribute> getGEPIndicesToCurrentPosition(
+      llvm::SmallVectorImpl<mlir::Attribute> &indices) {
+    getGEPIndicesTo(indices, Builder.Buffer.size());
+    return indices;
+  }
+
+protected:
+  mlir::Attribute finishArray(mlir::Type eltTy);
+  mlir::Attribute finishStruct(mlir::MLIRContext *ctx,
+                               mlir::cir::StructType structTy);
+
+private:
+  void getGEPIndicesTo(llvm::SmallVectorImpl<mlir::Attribute> &indices,
+                       size_t position) const;
+
+  mlir::Attribute getRelativeOffset(mlir::cir::IntType offsetType,
+                                    mlir::Attribute target);
+
+  mlir::Attribute getRelativeOffsetToPosition(mlir::cir::IntType offsetType,
+                                              mlir::Attribute target,
+                                              size_t position);
+
+  CharUnits getOffsetFromGlobalTo(size_t index) const;
+};
+
+template <class Impl, class Traits>
+class ConstantAggregateBuilderTemplateBase
+    : public Traits::AggregateBuilderBase {
+  using super = typename Traits::AggregateBuilderBase;
+
+public:
+  using InitBuilder = typename Traits::InitBuilder;
+  using ArrayBuilder = typename Traits::ArrayBuilder;
+  using StructBuilder = typename Traits::StructBuilder;
+  using AggregateBuilderBase = typename Traits::AggregateBuilderBase;
+
+protected:
+  ConstantAggregateBuilderTemplateBase(InitBuilder &builder,
+                                       AggregateBuilderBase *parent)
+      : super(builder, parent) {}
+
+  Impl &asImpl() { return *static_cast<Impl *>(this); }
+
+public:
+  ArrayBuilder beginArray(mlir::Type eltTy = nullptr) {
+    return ArrayBuilder(static_cast<InitBuilder &>(this->Builder), this, eltTy);
+  }
+
+  StructBuilder beginStruct(mlir::cir::StructType ty = nullptr) {
+    return StructBuilder(static_cast<InitBuilder &>(this->Builder), this, ty);
+  }
+
+  /// Given that this builder was created by beginning an array or struct
+  /// component on the given parent builder, finish the array/struct
+  /// component and add it to the parent.
+  ///
+  /// It is an intentional choice that the parent is passed in explicitly
+  /// despite it being redundant with information already kept in the
+  /// builder.  This aids in readability by making it easier to find the
+  /// places that add components to a builder, as well as "bookending"
+  /// the sub-builder more explicitly.
+  void finishAndAddTo(mlir::MLIRContext *ctx, AggregateBuilderBase &parent) {
+    assert(this->Parent == &parent && "adding to non-parent builder");
+    parent.add(asImpl().finishImpl(ctx));
+  }
+
+  /// Given that this builder was created by beginning an array or struct
+  /// directly on a ConstantInitBuilder, finish the array/struct and
+  /// create a global variable with it as the initializer.
+  template <class... As>
+  mlir::cir::GlobalOp finishAndCreateGlobal(mlir::MLIRContext *ctx,
+                                            As &&...args) {
+    assert(!this->Parent && "finishing non-root builder");
+    return this->Builder.createGlobal(asImpl().finishImpl(ctx),
+                                      std::forward<As>(args)...);
+  }
+
+  /// Given that this builder was created by beginning an array or struct
+  /// directly on a ConstantInitBuilder, finish the array/struct and
+  /// set it as the initializer of the given global variable.
+  void finishAndSetAsInitializer(mlir::cir::GlobalOp global,
+                                 bool forVTable = false) {
+    assert(!this->Parent && "finishing non-root builder");
+    mlir::Attribute init = asImpl().finishImpl(global.getContext());
+    auto initCSA = mlir::dyn_cast<mlir::cir::ConstStructAttr>(init);
+    assert(initCSA &&
+           "expected #cir.const_struct attribute to represent vtable data");
+    return this->Builder.setGlobalInitializer(
+        global, forVTable ? mlir::cir::VTableAttr::get(initCSA.getType(),
+                                                       initCSA.getMembers())
+                          : init);
+  }
+
+  /// Given that this builder was created by beginning an array or struct
+  /// directly on a ConstantInitBuilder, finish the array/struct and
+  /// return a future which can be used to install the initializer in
+  /// a global later.
+  ///
+  /// This is useful for allowing a finished initializer to passed to
+  /// an API which will build the global.  However, the "future" preserves
+  /// a dependency on the original builder; it is an error to pass it aside.
+  ConstantInitFuture finishAndCreateFuture(mlir::MLIRContext *ctx) {
+    assert(!this->Parent && "finishing non-root builder");
+    return this->Builder.createFuture(asImpl().finishImpl(ctx));
+  }
+};
+
+template <class Traits>
+class ConstantArrayBuilderTemplateBase
+    : public ConstantAggregateBuilderTemplateBase<typename Traits::ArrayBuilder,
+                                                  Traits> {
+  using super =
+      ConstantAggregateBuilderTemplateBase<typename Traits::ArrayBuilder,
+                                           Traits>;
+
+public:
+  using InitBuilder = typename Traits::InitBuilder;
+  using AggregateBuilderBase = typename Traits::AggregateBuilderBase;
+
+private:
+  mlir::Type EltTy;
+
+  template <class, class> friend class ConstantAggregateBuilderTemplateBase;
+
+protected:
+  ConstantArrayBuilderTemplateBase(InitBuilder &builder,
+                                   AggregateBuilderBase *parent,
+                                   mlir::Type eltTy)
+      : super(builder, parent), EltTy(eltTy) {}
+
+private:
+  /// Form an array constant from the values that have been added to this
+  /// builder.
+  mlir::Attribute finishImpl([[maybe_unused]] mlir::MLIRContext *ctx) {
+    return AggregateBuilderBase::finishArray(EltTy);
+  }
+};
+
+/// A template class designed to allow other frontends to
+/// easily customize the builder classes used by ConstantInitBuilder,
+/// and thus to extend the API to work with the abstractions they
+/// prefer.  This would probably not be necessary if C++ just
+/// supported extension methods.
+template <class Traits>
+class ConstantStructBuilderTemplateBase
+    : public ConstantAggregateBuilderTemplateBase<
+          typename Traits::StructBuilder, Traits> {
+  using super =
+      ConstantAggregateBuilderTemplateBase<typename Traits::StructBuilder,
+                                           Traits>;
+
+public:
+  using InitBuilder = typename Traits::InitBuilder;
+  using AggregateBuilderBase = typename Traits::AggregateBuilderBase;
+
+private:
+  mlir::cir::StructType StructTy;
+
+  template <class, class> friend class ConstantAggregateBuilderTemplateBase;
+
+protected:
+  ConstantStructBuilderTemplateBase(InitBuilder &builder,
+                                    AggregateBuilderBase *parent,
+                                    mlir::cir::StructType structTy)
+      : super(builder, parent), StructTy(structTy) {
+    if (structTy) {
+      llvm_unreachable("NYI");
+      // this->Packed = structTy->isPacked();
+    }
+  }
+
+public:
+  void setPacked(bool packed) { this->Packed = packed; }
+
+  /// Use the given type for the struct if its element count is correct.
+  /// Don't add more elements after calling this.
+  void suggestType(mlir::cir::StructType structTy) {
+    if (this->size() == structTy.getNumElements()) {
+      StructTy = structTy;
+    }
+  }
+
+private:
+  /// Form an array constant from the values that have been added to this
+  /// builder.
+  mlir::Attribute finishImpl(mlir::MLIRContext *ctx) {
+    return AggregateBuilderBase::finishStruct(ctx, StructTy);
+  }
+};
+
+/// A template class designed to allow other frontends to
+/// easily customize the builder classes used by ConstantInitBuilder,
+/// and thus to extend the API to work with the abstractions they
+/// prefer.  This would probably not be necessary if C++ just
+/// supported extension methods.
+template <class Traits>
+class ConstantInitBuilderTemplateBase : public ConstantInitBuilderBase {
+protected:
+  ConstantInitBuilderTemplateBase(CIRGenModule &CGM)
+      : ConstantInitBuilderBase(CGM) {}
+
+public:
+  using InitBuilder = typename Traits::InitBuilder;
+  using ArrayBuilder = typename Traits::ArrayBuilder;
+  using StructBuilder = typename Traits::StructBuilder;
+
+  ArrayBuilder beginArray(mlir::Type eltTy = nullptr) {
+    return ArrayBuilder(static_cast<InitBuilder &>(*this), nullptr, eltTy);
+  }
+
+  StructBuilder beginStruct(mlir::cir::StructType structTy = nullptr) {
+    return StructBuilder(static_cast<InitBuilder &>(*this), nullptr, structTy);
+  }
+};
+
+class ConstantInitBuilder;
+class ConstantStructBuilder;
+class ConstantArrayBuilder;
+
+struct ConstantInitBuilderTraits {
+  using InitBuilder = ConstantInitBuilder;
+  using AggregateBuilderBase = ConstantAggregateBuilderBase;
+  using ArrayBuilder = ConstantArrayBuilder;
+  using StructBuilder = ConstantStructBuilder;
+};
+
+/// The standard implementation of ConstantInitBuilder used in Clang.
+class ConstantInitBuilder
+    : public ConstantInitBuilderTemplateBase<ConstantInitBuilderTraits> {
+public:
+  explicit ConstantInitBuilder(CIRGenModule &CGM)
+      : ConstantInitBuilderTemplateBase(CGM) {}
+};
+
+/// A helper class of ConstantInitBuilder, used for building constant
+/// array initializers.
+class ConstantArrayBuilder
+    : public ConstantArrayBuilderTemplateBase<ConstantInitBuilderTraits> {
+  template <class Traits> friend class ConstantInitBuilderTemplateBase;
+
+  // The use of explicit qualification is a GCC workaround.
+  template <class Impl, class Traits>
+  friend class cir::ConstantAggregateBuilderTemplateBase;
+
+  ConstantArrayBuilder(ConstantInitBuilder &builder,
+                       ConstantAggregateBuilderBase *parent, mlir::Type eltTy)
+      : ConstantArrayBuilderTemplateBase(builder, parent, eltTy) {}
+};
+
+/// A helper class of ConstantInitBuilder, used for building constant
+/// struct initializers.
+class ConstantStructBuilder
+    : public ConstantStructBuilderTemplateBase<ConstantInitBuilderTraits> {
+  template <class Traits> friend class ConstantInitBuilderTemplateBase;
+
+  // The use of explicit qualification is a GCC workaround.
+  template <class Impl, class Traits>
+  friend class cir::ConstantAggregateBuilderTemplateBase;
+
+  ConstantStructBuilder(ConstantInitBuilder &builder,
+                        ConstantAggregateBuilderBase *parent,
+                        mlir::cir::StructType structTy)
+      : ConstantStructBuilderTemplateBase(builder, parent, structTy) {}
+};
+
+} // end namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/ConstantInitFuture.h b/clang/lib/CIR/CodeGen/ConstantInitFuture.h
new file mode 100644
index 000000000000..97631d5da88c
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/ConstantInitFuture.h
@@ -0,0 +1,102 @@
+//===- ConstantInitFuture.h - "Future" constant initializers ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class defines the ConstantInitFuture class.  This is split out
+// from ConstantInitBuilder.h in order to allow APIs to work with it
+// without having to include that entire header.  This is particularly
+// important because it is often useful to be able to default-construct
+// a future in, say, a default argument.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_CIR_CODEGEN_CONSTANTINITFUTURE_H
+#define LLVM_CLANG_CIR_CODEGEN_CONSTANTINITFUTURE_H
+
+#include "mlir/IR/Attributes.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "llvm/ADT/PointerUnion.h"
+
+// Forward-declare ConstantInitBuilderBase and give it a
+// PointerLikeTypeTraits specialization so that we can safely use it
+// in a PointerUnion below.
+namespace cir {
+class ConstantInitBuilderBase;
+} // namespace cir
+
+namespace llvm {
+template <> struct PointerLikeTypeTraits<::cir::ConstantInitBuilderBase *> {
+  using T = ::cir::ConstantInitBuilderBase *;
+
+  static inline void *getAsVoidPointer(T p) { return p; }
+  static inline T getFromVoidPointer(void *p) { return static_cast<T>(p); }
+  static constexpr int NumLowBitsAvailable = 2;
+};
+} // namespace llvm
+
+namespace cir {
+
+/// A "future" for a completed constant initializer, which can be passed
+/// around independently of any sub-builders (but not the original parent).
+class ConstantInitFuture {
+  using PairTy = llvm::PointerUnion<ConstantInitBuilderBase *, mlir::Attribute>;
+
+  PairTy Data;
+
+  friend class ConstantInitBuilderBase;
+  explicit ConstantInitFuture(ConstantInitBuilderBase *builder);
+
+public:
+  ConstantInitFuture() {}
+
+  /// A future can be explicitly created from a fixed initializer.
+  explicit ConstantInitFuture(mlir::Attribute initializer) : Data(initializer) {
+    assert(initializer && "creating null future");
+  }
+
+  /// Is this future non-null?
+  explicit operator bool() const { return bool(Data); }
+
+  /// Return the type of the initializer.
+  mlir::Type getType() const;
+
+  /// Abandon this initializer.
+  void abandon();
+
+  /// Install the initializer into a global variable.  This cannot
+  /// be called multiple times.
+  void installInGlobal(mlir::cir::GlobalOp global);
+
+  void *getOpaqueValue() const { return Data.getOpaqueValue(); }
+  static ConstantInitFuture getFromOpaqueValue(void *value) {
+    ConstantInitFuture result;
+    result.Data = PairTy::getFromOpaqueValue(value);
+    return result;
+  }
+  static constexpr int NumLowBitsAvailable =
+      llvm::PointerLikeTypeTraits<PairTy>::NumLowBitsAvailable;
+};
+
+} // namespace cir
+
+namespace llvm {
+
+template <> struct PointerLikeTypeTraits<::cir::ConstantInitFuture> {
+  using T = ::cir::ConstantInitFuture;
+
+  static inline void *getAsVoidPointer(T future) {
+    return future.getOpaqueValue();
+  }
+  static inline T getFromVoidPointer(void *p) {
+    return T::getFromOpaqueValue(p);
+  }
+  static constexpr int NumLowBitsAvailable = T::NumLowBitsAvailable;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/EHScopeStack.h b/clang/lib/CIR/CodeGen/EHScopeStack.h
new file mode 100644
index 000000000000..5ab356df319f
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/EHScopeStack.h
@@ -0,0 +1,421 @@
+//===-- EHScopeStack.h - Stack for cleanup CIR generation -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// These classes should be the minimum interface required for other parts of
+// CodeGen to emit cleanups.  The implementation is in CIRGenCleanup.cpp and
+// other implemenentation details that are not widely needed are in
+// CIRGenCleanup.h.
+//
+// TODO(cir): this header should be shared between LLVM and CIR codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIRGEN_EHSCOPESTACK_H
+#define LLVM_CLANG_LIB_CIRGEN_EHSCOPESTACK_H
+
+#include "mlir/IR/Value.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace cir {
+
+class CIRGenFunction;
+
+/// A branch fixup.  These are required when emitting a goto to a
+/// label which hasn't been emitted yet.  The goto is optimistically
+/// emitted as a branch to the basic block for the label, and (if it
+/// occurs in a scope with non-trivial cleanups) a fixup is added to
+/// the innermost cleanup.  When a (normal) cleanup is popped, any
+/// unresolved fixups in that scope are threaded through the cleanup.
+struct BranchFixup {
+  // /// The block containing the terminator which needs to be modified
+  // /// into a switch if this fixup is resolved into the current scope.
+  // /// If null, LatestBranch points directly to the destination.
+  // llvm::BasicBlock *OptimisticBranchBlock;
+
+  // /// The ultimate destination of the branch.
+  // ///
+  // /// This can be set to null to indicate that this fixup was
+  // /// successfully resolved.
+  // llvm::BasicBlock *Destination;
+
+  // /// The destination index value.
+  // unsigned DestinationIndex;
+
+  // /// The initial branch of the fixup.
+  // llvm::BranchInst *InitialBranch;
+};
+
+template <class T> struct InvariantValue {
+  typedef T type;
+  typedef T saved_type;
+  static bool needsSaving(type value) { return false; }
+  static saved_type save(CIRGenFunction &CGF, type value) { return value; }
+  static type restore(CIRGenFunction &CGF, saved_type value) { return value; }
+};
+
+/// A metaprogramming class for ensuring that a value will dominate an
+/// arbitrary position in a function.
+template <class T> struct DominatingValue : InvariantValue<T> {};
+
+template <class T, bool mightBeInstruction =
+                       (std::is_base_of<mlir::Value, T>::value ||
+                        std::is_base_of<mlir::Operation, T>::value) &&
+                       !std::is_base_of<mlir::cir::ConstantOp, T>::value &&
+                       !std::is_base_of<mlir::Block, T>::value>
+struct DominatingPointer;
+template <class T> struct DominatingPointer<T, false> : InvariantValue<T *> {};
+// template <class T> struct DominatingPointer<T,true> at end of file
+
+template <class T> struct DominatingValue<T *> : DominatingPointer<T> {};
+
+enum CleanupKind : unsigned {
+  /// Denotes a cleanup that should run when a scope is exited using exceptional
+  /// control flow (a throw statement leading to stack unwinding, ).
+  EHCleanup = 0x1,
+
+  /// Denotes a cleanup that should run when a scope is exited using normal
+  /// control flow (falling off the end of the scope, return, goto, ...).
+  NormalCleanup = 0x2,
+
+  NormalAndEHCleanup = EHCleanup | NormalCleanup,
+
+  LifetimeMarker = 0x8,
+  NormalEHLifetimeMarker = LifetimeMarker | NormalAndEHCleanup,
+};
+
+/// A stack of scopes which respond to exceptions, including cleanups
+/// and catch blocks.
+class EHScopeStack {
+public:
+  /* Should switch to alignof(uint64_t) instead of 8, when EHCleanupScope can */
+  enum { ScopeStackAlignment = 8 };
+
+  /// A saved depth on the scope stack.  This is necessary because
+  /// pushing scopes onto the stack invalidates iterators.
+  class stable_iterator {
+    friend class EHScopeStack;
+
+    /// Offset from StartOfData to EndOfBuffer.
+    ptrdiff_t Size;
+
+    stable_iterator(ptrdiff_t Size) : Size(Size) {}
+
+  public:
+    static stable_iterator invalid() { return stable_iterator(-1); }
+    stable_iterator() : Size(-1) {}
+
+    bool isValid() const { return Size >= 0; }
+
+    /// Returns true if this scope encloses I.
+    /// Returns false if I is invalid.
+    /// This scope must be valid.
+    bool encloses(stable_iterator I) const { return Size <= I.Size; }
+
+    /// Returns true if this scope strictly encloses I: that is,
+    /// if it encloses I and is not I.
+    /// Returns false is I is invalid.
+    /// This scope must be valid.
+    bool strictlyEncloses(stable_iterator I) const { return Size < I.Size; }
+
+    friend bool operator==(stable_iterator A, stable_iterator B) {
+      return A.Size == B.Size;
+    }
+    friend bool operator!=(stable_iterator A, stable_iterator B) {
+      return A.Size != B.Size;
+    }
+  };
+
+  /// Information for lazily generating a cleanup.  Subclasses must be
+  /// POD-like: cleanups will not be destructed, and they will be
+  /// allocated on the cleanup stack and freely copied and moved
+  /// around.
+  ///
+  /// Cleanup implementations should generally be declared in an
+  /// anonymous namespace.
+  class Cleanup {
+    // Anchor the construction vtable.
+    virtual void anchor();
+
+  protected:
+    ~Cleanup() = default;
+
+  public:
+    Cleanup(const Cleanup &) = default;
+    Cleanup(Cleanup &&) {}
+    Cleanup() = default;
+
+    virtual bool isRedundantBeforeReturn() { return false; }
+
+    /// Generation flags.
+    class Flags {
+      enum {
+        F_IsForEH = 0x1,
+        F_IsNormalCleanupKind = 0x2,
+        F_IsEHCleanupKind = 0x4,
+        F_HasExitSwitch = 0x8,
+      };
+      unsigned flags;
+
+    public:
+      Flags() : flags(0) {}
+
+      /// isForEH - true if the current emission is for an EH cleanup.
+      bool isForEHCleanup() const { return flags & F_IsForEH; }
+      bool isForNormalCleanup() const { return !isForEHCleanup(); }
+      void setIsForEHCleanup() { flags |= F_IsForEH; }
+
+      bool isNormalCleanupKind() const { return flags & F_IsNormalCleanupKind; }
+      void setIsNormalCleanupKind() { flags |= F_IsNormalCleanupKind; }
+
+      /// isEHCleanupKind - true if the cleanup was pushed as an EH
+      /// cleanup.
+      bool isEHCleanupKind() const { return flags & F_IsEHCleanupKind; }
+      void setIsEHCleanupKind() { flags |= F_IsEHCleanupKind; }
+
+      bool hasExitSwitch() const { return flags & F_HasExitSwitch; }
+      void setHasExitSwitch() { flags |= F_HasExitSwitch; }
+    };
+
+    /// Emit the cleanup.  For normal cleanups, this is run in the
+    /// same EH context as when the cleanup was pushed, i.e. the
+    /// immediately-enclosing context of the cleanup scope.  For
+    /// EH cleanups, this is run in a terminate context.
+    ///
+    // \param flags cleanup kind.
+    virtual void Emit(CIRGenFunction &CGF, Flags flags) = 0;
+  };
+
+  /// ConditionalCleanup stores the saved form of its parameters,
+  /// then restores them and performs the cleanup.
+  template <class T, class... As>
+  class ConditionalCleanup final : public Cleanup {
+    typedef std::tuple<typename DominatingValue<As>::saved_type...> SavedTuple;
+    SavedTuple Saved;
+
+    template <std::size_t... Is>
+    T restore(CIRGenFunction &CGF, std::index_sequence<Is...>) {
+      // It's important that the restores are emitted in order. The braced init
+      // list guarantees that.
+      return T{DominatingValue<As>::restore(CGF, std::get<Is>(Saved))...};
+    }
+
+    void Emit(CIRGenFunction &CGF, Flags flags) override {
+      restore(CGF, std::index_sequence_for<As...>()).Emit(CGF, flags);
+    }
+
+  public:
+    ConditionalCleanup(typename DominatingValue<As>::saved_type... A)
+        : Saved(A...) {}
+
+    ConditionalCleanup(SavedTuple Tuple) : Saved(std::move(Tuple)) {}
+  };
+
+private:
+  // The implementation for this class is in CGException.h and
+  // CGException.cpp; the definition is here because it's used as a
+  // member of CIRGenFunction.
+
+  /// The start of the scope-stack buffer, i.e. the allocated pointer
+  /// for the buffer.  All of these pointers are either simultaneously
+  /// null or simultaneously valid.
+  char *StartOfBuffer;
+
+  /// The end of the buffer.
+  char *EndOfBuffer;
+
+  /// The first valid entry in the buffer.
+  char *StartOfData;
+
+  /// The innermost normal cleanup on the stack.
+  stable_iterator InnermostNormalCleanup;
+
+  /// The innermost EH scope on the stack.
+  stable_iterator InnermostEHScope;
+
+  /// The CGF this Stack belong to
+  CIRGenFunction *CGF;
+
+  /// The current set of branch fixups.  A branch fixup is a jump to
+  /// an as-yet unemitted label, i.e. a label for which we don't yet
+  /// know the EH stack depth.  Whenever we pop a cleanup, we have
+  /// to thread all the current branch fixups through it.
+  ///
+  /// Fixups are recorded as the Use of the respective branch or
+  /// switch statement.  The use points to the final destination.
+  /// When popping out of a cleanup, these uses are threaded through
+  /// the cleanup and adjusted to point to the new cleanup.
+  ///
+  /// Note that branches are allowed to jump into protected scopes
+  /// in certain situations;  e.g. the following code is legal:
+  ///     struct A { ~A(); }; // trivial ctor, non-trivial dtor
+  ///     goto foo;
+  ///     A a;
+  ///    foo:
+  ///     bar();
+  llvm::SmallVector<BranchFixup, 8> BranchFixups;
+
+  char *allocate(size_t Size);
+  void deallocate(size_t Size);
+
+  void *pushCleanup(CleanupKind K, size_t DataSize);
+
+public:
+  EHScopeStack()
+      : StartOfBuffer(nullptr), EndOfBuffer(nullptr), StartOfData(nullptr),
+        InnermostNormalCleanup(stable_end()), InnermostEHScope(stable_end()),
+        CGF(nullptr) {}
+  ~EHScopeStack() { delete[] StartOfBuffer; }
+
+  /// Push a lazily-created cleanup on the stack.
+  template <class T, class... As> void pushCleanup(CleanupKind Kind, As... A) {
+    static_assert(alignof(T) <= ScopeStackAlignment,
+                  "Cleanup's alignment is too large.");
+    void *Buffer = pushCleanup(Kind, sizeof(T));
+    Cleanup *Obj = new (Buffer) T(A...);
+    (void)Obj;
+  }
+
+  /// Push a lazily-created cleanup on the stack. Tuple version.
+  template <class T, class... As>
+  void pushCleanupTuple(CleanupKind Kind, std::tuple<As...> A) {
+    static_assert(alignof(T) <= ScopeStackAlignment,
+                  "Cleanup's alignment is too large.");
+    void *Buffer = pushCleanup(Kind, sizeof(T));
+    Cleanup *Obj = new (Buffer) T(std::move(A));
+    (void)Obj;
+  }
+
+  // Feel free to add more variants of the following:
+
+  /// Push a cleanup with non-constant storage requirements on the
+  /// stack.  The cleanup type must provide an additional static method:
+  ///   static size_t getExtraSize(size_t);
+  /// The argument to this method will be the value N, which will also
+  /// be passed as the first argument to the constructor.
+  ///
+  /// The data stored in the extra storage must obey the same
+  /// restrictions as normal cleanup member data.
+  ///
+  /// The pointer returned from this method is valid until the cleanup
+  /// stack is modified.
+  template <class T, class... As>
+  T *pushCleanupWithExtra(CleanupKind Kind, size_t N, As... A) {
+    static_assert(alignof(T) <= ScopeStackAlignment,
+                  "Cleanup's alignment is too large.");
+    void *Buffer = pushCleanup(Kind, sizeof(T) + T::getExtraSize(N));
+    return new (Buffer) T(N, A...);
+  }
+
+  void pushCopyOfCleanup(CleanupKind Kind, const void *Cleanup, size_t Size) {
+    void *Buffer = pushCleanup(Kind, Size);
+    std::memcpy(Buffer, Cleanup, Size);
+  }
+
+  void setCGF(CIRGenFunction *inCGF) { CGF = inCGF; }
+
+  /// Pops a cleanup scope off the stack.  This is private to CGCleanup.cpp.
+  void popCleanup();
+
+  /// Push a set of catch handlers on the stack.  The catch is
+  /// uninitialized and will need to have the given number of handlers
+  /// set on it.
+  class EHCatchScope *pushCatch(unsigned NumHandlers);
+
+  /// Pops a catch scope off the stack.  This is private to CGException.cpp.
+  void popCatch();
+
+  /// Push an exceptions filter on the stack.
+  class EHFilterScope *pushFilter(unsigned NumFilters);
+
+  /// Pops an exceptions filter off the stack.
+  void popFilter();
+
+  /// Push a terminate handler on the stack.
+  void pushTerminate();
+
+  /// Pops a terminate handler off the stack.
+  void popTerminate();
+
+  // Returns true iff the current scope is either empty or contains only
+  // lifetime markers, i.e. no real cleanup code
+  bool containsOnlyLifetimeMarkers(stable_iterator Old) const;
+
+  /// Determines whether the exception-scopes stack is empty.
+  bool empty() const { return StartOfData == EndOfBuffer; }
+
+  bool requiresLandingPad() const;
+
+  /// Determines whether there are any normal cleanups on the stack.
+  bool hasNormalCleanups() const {
+    return InnermostNormalCleanup != stable_end();
+  }
+
+  /// Returns the innermost normal cleanup on the stack, or
+  /// stable_end() if there are no normal cleanups.
+  stable_iterator getInnermostNormalCleanup() const {
+    return InnermostNormalCleanup;
+  }
+  stable_iterator getInnermostActiveNormalCleanup() const;
+
+  stable_iterator getInnermostEHScope() const { return InnermostEHScope; }
+
+  /// An unstable reference to a scope-stack depth.  Invalidated by
+  /// pushes but not pops.
+  class iterator;
+
+  /// Returns an iterator pointing to the innermost EH scope.
+  iterator begin() const;
+
+  /// Returns an iterator pointing to the outermost EH scope.
+  iterator end() const;
+
+  /// Create a stable reference to the top of the EH stack.  The
+  /// returned reference is valid until that scope is popped off the
+  /// stack.
+  stable_iterator stable_begin() const {
+    return stable_iterator(EndOfBuffer - StartOfData);
+  }
+
+  /// Create a stable reference to the bottom of the EH stack.
+  static stable_iterator stable_end() { return stable_iterator(0); }
+
+  /// Translates an iterator into a stable_iterator.
+  stable_iterator stabilize(iterator it) const;
+
+  /// Turn a stable reference to a scope depth into a unstable pointer
+  /// to the EH stack.
+  iterator find(stable_iterator save) const;
+
+  /// Add a branch fixup to the current cleanup scope.
+  BranchFixup &addBranchFixup() {
+    assert(hasNormalCleanups() && "adding fixup in scope without cleanups");
+    BranchFixups.push_back(BranchFixup());
+    return BranchFixups.back();
+  }
+
+  unsigned getNumBranchFixups() const { return BranchFixups.size(); }
+  BranchFixup &getBranchFixup(unsigned I) {
+    assert(I < getNumBranchFixups());
+    return BranchFixups[I];
+  }
+
+  /// Pops lazily-removed fixups from the end of the list.  This
+  /// should only be called by procedures which have just popped a
+  /// cleanup or resolved one or more fixups.
+  void popNullFixups();
+
+  /// Clears the branch-fixups list.  This should only be called by
+  /// ResolveAllBranchFixups.
+  void clearFixups() { BranchFixups.clear(); }
+};
+
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp
new file mode 100644
index 000000000000..a802abe18313
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp
@@ -0,0 +1,620 @@
+#include "TargetInfo.h"
+#include "ABIInfo.h"
+#include "CIRGenCXXABI.h"
+#include "CIRGenFunctionInfo.h"
+#include "CIRGenTypes.h"
+
+#include "clang/Basic/TargetInfo.h"
+#include "clang/CIR/Target/x86.h"
+
+using namespace cir;
+using namespace clang;
+
+static bool testIfIsVoidTy(QualType Ty) {
+  const auto *BT = Ty->getAs<BuiltinType>();
+  if (!BT)
+    return false;
+
+  BuiltinType::Kind k = BT->getKind();
+  return k == BuiltinType::Void;
+}
+
+static bool isAggregateTypeForABI(QualType T) {
+  return !CIRGenFunction::hasScalarEvaluationKind(T) ||
+         T->isMemberFunctionPointerType();
+}
+
+/// Pass transparent unions as if they were the type of the first element. Sema
+/// should ensure that all elements of the union have the same "machine type".
+static QualType useFirstFieldIfTransparentUnion(QualType Ty) {
+  assert(!Ty->getAsUnionType() && "NYI");
+  return Ty;
+}
+
+namespace {
+
+/// The default implementation for ABI specific
+/// details. This implementation provides information which results in
+/// self-consistent and sensible LLVM IR generation, but does not
+/// conform to any particular ABI.
+class DefaultABIInfo : public ABIInfo {
+public:
+  DefaultABIInfo(CIRGenTypes &CGT) : ABIInfo(CGT) {}
+
+  virtual ~DefaultABIInfo() = default;
+
+  ABIArgInfo classifyReturnType(QualType RetTy) const {
+    if (RetTy->isVoidType())
+      return ABIArgInfo::getIgnore();
+
+    if (isAggregateTypeForABI(RetTy))
+      llvm_unreachable("NYI");
+
+    // Treat an enum type as its underlying type.
+    if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
+      llvm_unreachable("NYI");
+
+    if (const auto *EIT = RetTy->getAs<BitIntType>())
+      llvm_unreachable("NYI");
+
+    return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
+                                                 : ABIArgInfo::getDirect());
+  }
+
+  ABIArgInfo classifyArgumentType(QualType Ty) const {
+    Ty = useFirstFieldIfTransparentUnion(Ty);
+
+    if (isAggregateTypeForABI(Ty)) {
+      llvm_unreachable("NYI");
+    }
+
+    // Treat an enum type as its underlying type.
+    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+      llvm_unreachable("NYI");
+
+    if (const auto *EIT = Ty->getAs<BitIntType>())
+      llvm_unreachable("NYI");
+
+    return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+                                              : ABIArgInfo::getDirect());
+  }
+
+  void computeInfo(CIRGenFunctionInfo &FI) const override {
+    if (!getCXXABI().classifyReturnType(FI))
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    for (auto &I : FI.arguments())
+      I.info = classifyArgumentType(I.type);
+  }
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// AArch64 ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class AArch64ABIInfo : public ABIInfo {
+public:
+  enum ABIKind { AAPCS = 0, DarwinPCS, Win64 };
+
+private:
+  ABIKind Kind;
+
+public:
+  AArch64ABIInfo(CIRGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {}
+
+private:
+  ABIKind getABIKind() const { return Kind; }
+  bool isDarwinPCS() const { return Kind == DarwinPCS; }
+
+  ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadic,
+                                  unsigned CallingConvention) const;
+
+  void computeInfo(CIRGenFunctionInfo &FI) const override {
+    // Top leevl CIR has unlimited arguments and return types. Lowering for ABI
+    // specific concerns should happen during a lowering phase. Assume
+    // everything is direct for now.
+    for (CIRGenFunctionInfo::arg_iterator it = FI.arg_begin(),
+                                          ie = FI.arg_end();
+         it != ie; ++it) {
+      if (testIfIsVoidTy(it->type))
+        it->info = ABIArgInfo::getIgnore();
+      else
+        it->info = ABIArgInfo::getDirect(CGT.ConvertType(it->type));
+    }
+    auto RetTy = FI.getReturnType();
+    if (testIfIsVoidTy(RetTy))
+      FI.getReturnInfo() = ABIArgInfo::getIgnore();
+    else
+      FI.getReturnInfo() = ABIArgInfo::getDirect(CGT.ConvertType(RetTy));
+
+    return;
+  }
+};
+
+class AArch64TargetCIRGenInfo : public TargetCIRGenInfo {
+public:
+  AArch64TargetCIRGenInfo(CIRGenTypes &CGT, AArch64ABIInfo::ABIKind Kind)
+      : TargetCIRGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {}
+};
+
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// X86 ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// The AVX ABI leel for X86 targets.
+using X86AVXABILevel = ::cir::X86AVXABILevel;
+
+class X86_64ABIInfo : public ABIInfo {
+  using Class = X86ArgClass;
+
+  // X86AVXABILevel AVXLevel;
+  // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on 64-bit
+  // hardware.
+  // bool Has64BitPointers;
+
+public:
+  X86_64ABIInfo(CIRGenTypes &CGT, X86AVXABILevel AVXLevel)
+      : ABIInfo(CGT)
+  // , AVXLevel(AVXLevel)
+  // , Has64BitPointers(CGT.getDataLayout().getPointeSize(0) == 8)
+  {}
+
+  virtual void computeInfo(CIRGenFunctionInfo &FI) const override;
+
+  /// classify - Determine the x86_64 register classes in which the given type T
+  /// should be passed.
+  ///
+  /// \param Lo - The classification for the parts of the type residing in the
+  /// low word of the containing object.
+  ///
+  /// \param Hi - The classification for the parts of the type residing in the
+  /// high word of the containing object.
+  ///
+  /// \param OffsetBase - The bit offset of this type in the containing object.
+  /// Some parameters are classified different depending on whether they
+  /// straddle an eightbyte boundary.
+  ///
+  /// \param isNamedArg - Whether the argument in question is a "named"
+  /// argument, as used in AMD64-ABI 3.5.7.
+  ///
+  /// If a word is unused its result will be NoClass; if a type should be passed
+  /// in Memory then at least the classification of \arg Lo will be Memory.
+  ///
+  /// The \arg Lo class will be NoClass iff the argument is ignored.
+  ///
+  /// If the \arg Lo class is ComplexX87, then the \arg Hi class will also be
+  /// ComplexX87.
+  void classify(clang::QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi,
+                bool isNamedArg) const;
+
+  mlir::Type GetSSETypeAtOffset(mlir::Type CIRType, unsigned CIROffset,
+                                clang::QualType SourceTy,
+                                unsigned SourceOffset) const;
+
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
+
+  ABIArgInfo classifyArgumentType(clang::QualType Ty, unsigned freeIntRegs,
+                                  unsigned &neededInt, unsigned &neededSSE,
+                                  bool isNamedArg) const;
+
+  mlir::Type GetINTEGERTypeAtOffset(mlir::Type CIRType, unsigned CIROffset,
+                                    QualType SourceTy,
+                                    unsigned SourceOffset) const;
+
+  /// getIndirectResult - Give a source type \arg Ty, return a suitable result
+  /// such that the argument will be passed in memory.
+  ///
+  /// \param freeIntRegs - The number of free integer registers remaining
+  /// available.
+  ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const;
+};
+
+class X86_64TargetCIRGenInfo : public TargetCIRGenInfo {
+public:
+  X86_64TargetCIRGenInfo(CIRGenTypes &CGT, X86AVXABILevel AVXLevel)
+      : TargetCIRGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) {}
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// Base ABI and target codegen info implementation common between SPIR and
+// SPIR-V.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class CommonSPIRABIInfo : public DefaultABIInfo {
+public:
+  CommonSPIRABIInfo(CIRGenTypes &CGT) : DefaultABIInfo(CGT) {}
+};
+
+class SPIRVABIInfo : public CommonSPIRABIInfo {
+public:
+  SPIRVABIInfo(CIRGenTypes &CGT) : CommonSPIRABIInfo(CGT) {}
+  void computeInfo(CIRGenFunctionInfo &FI) const override {
+    // The logic is same as in DefaultABIInfo with an exception on the kernel
+    // arguments handling.
+    mlir::cir::CallingConv CC = FI.getCallingConvention();
+
+    bool cxxabiHit = getCXXABI().classifyReturnType(FI);
+    assert(!cxxabiHit && "C++ ABI not considered");
+
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+
+    for (auto &I : FI.arguments()) {
+      if (CC == mlir::cir::CallingConv::SpirKernel) {
+        I.info = classifyKernelArgumentType(I.type);
+      } else {
+        I.info = classifyArgumentType(I.type);
+      }
+    }
+  }
+
+private:
+  ABIArgInfo classifyKernelArgumentType(QualType Ty) const {
+    assert(!getContext().getLangOpts().CUDAIsDevice && "NYI");
+    return classifyArgumentType(Ty);
+  }
+};
+} // namespace
+
+namespace cir {
+void computeSPIRKernelABIInfo(CIRGenModule &CGM, CIRGenFunctionInfo &FI) {
+  if (CGM.getTarget().getTriple().isSPIRV())
+    SPIRVABIInfo(CGM.getTypes()).computeInfo(FI);
+  else
+    CommonSPIRABIInfo(CGM.getTypes()).computeInfo(FI);
+}
+} // namespace cir
+
+namespace {
+
+class CommonSPIRTargetCIRGenInfo : public TargetCIRGenInfo {
+public:
+  CommonSPIRTargetCIRGenInfo(std::unique_ptr<ABIInfo> ABIInfo)
+      : TargetCIRGenInfo(std::move(ABIInfo)) {}
+
+  mlir::cir::AddressSpaceAttr getCIRAllocaAddressSpace() const override {
+    return mlir::cir::AddressSpaceAttr::get(
+        &getABIInfo().CGT.getMLIRContext(),
+        mlir::cir::AddressSpaceAttr::Kind::offload_private);
+  }
+
+  mlir::cir::CallingConv getOpenCLKernelCallingConv() const override {
+    return mlir::cir::CallingConv::SpirKernel;
+  }
+};
+
+class SPIRVTargetCIRGenInfo : public CommonSPIRTargetCIRGenInfo {
+public:
+  SPIRVTargetCIRGenInfo(CIRGenTypes &CGT)
+      : CommonSPIRTargetCIRGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {}
+};
+
+} // namespace
+
+// TODO(cir): remove the attribute once this gets used.
+LLVM_ATTRIBUTE_UNUSED
+static bool classifyReturnType(const CIRGenCXXABI &CXXABI,
+                               CIRGenFunctionInfo &FI, const ABIInfo &Info) {
+  QualType Ty = FI.getReturnType();
+
+  assert(!Ty->getAs<RecordType>() && "RecordType returns NYI");
+
+  return CXXABI.classifyReturnType(FI);
+}
+
+CIRGenCXXABI &ABIInfo::getCXXABI() const { return CGT.getCXXABI(); }
+
+clang::ASTContext &ABIInfo::getContext() const { return CGT.getContext(); }
+
+ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
+                                            unsigned freeIntRegs) const {
+  assert(false && "NYI");
+}
+
+void X86_64ABIInfo::computeInfo(CIRGenFunctionInfo &FI) const {
+  // Top level CIR has unlimited arguments and return types. Lowering for ABI
+  // specific concerns should happen during a lowering phase. Assume everything
+  // is direct for now.
+  for (CIRGenFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
+       it != ie; ++it) {
+    if (testIfIsVoidTy(it->type))
+      it->info = ABIArgInfo::getIgnore();
+    else
+      it->info = ABIArgInfo::getDirect(CGT.ConvertType(it->type));
+  }
+  auto RetTy = FI.getReturnType();
+  if (testIfIsVoidTy(RetTy))
+    FI.getReturnInfo() = ABIArgInfo::getIgnore();
+  else
+    FI.getReturnInfo() = ABIArgInfo::getDirect(CGT.ConvertType(RetTy));
+}
+
+/// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in
+/// an 8-byte GPR. This means that we either have a scalar or we are talking
+/// about the high or low part of an up-to-16-byte struct. This routine picks
+/// the best CIR type to represent this, which may be i64 or may be anything
+/// else that the backend will pass in a GPR that works better (e.g. i8, %foo*,
+/// etc).
+///
+/// PrefType is a CIR type that corresponds to (part of) the IR type for the
+/// source type. CIROffset is an offset in bytes into the CIR type taht the
+/// 8-byte value references. PrefType may be null.
+///
+/// SourceTy is the source-level type for the entire argument. SourceOffset is
+/// an offset into this that we're processing (which is always either 0 or 8).
+///
+mlir::Type X86_64ABIInfo::GetINTEGERTypeAtOffset(mlir::Type CIRType,
+                                                 unsigned CIROffset,
+                                                 QualType SourceTy,
+                                                 unsigned SourceOffset) const {
+  // TODO: entirely stubbed out
+  assert(CIROffset == 0 && "NYI");
+  assert(SourceOffset == 0 && "NYI");
+  return CIRType;
+}
+
+ABIArgInfo X86_64ABIInfo::classifyArgumentType(QualType Ty,
+                                               unsigned int freeIntRegs,
+                                               unsigned int &neededInt,
+                                               unsigned int &neededSSE,
+                                               bool isNamedArg) const {
+  Ty = useFirstFieldIfTransparentUnion(Ty);
+
+  X86_64ABIInfo::Class Lo, Hi;
+  classify(Ty, 0, Lo, Hi, isNamedArg);
+
+  // Check some invariants
+  // FIXME: Enforce these by construction.
+  assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
+  assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification.");
+
+  neededInt = 0;
+  neededSSE = 0;
+  mlir::Type ResType = nullptr;
+  switch (Lo) {
+  default:
+    assert(false && "NYI");
+
+  // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next available
+  // register of the sequence %rdi, %rsi, %rdx, %rcx, %r8 and %r9 is used.
+  case Integer:
+    ++neededInt;
+
+    // Pick an 8-byte type based on the preferred type.
+    ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0);
+
+    // If we have a sign or zero extended integer, make sure to return Extend so
+    // that the parameter gets the right LLVM IR attributes.
+    if (Hi == NoClass && mlir::isa<mlir::cir::IntType>(ResType)) {
+      assert(!Ty->getAs<EnumType>() && "NYI");
+      if (Ty->isSignedIntegerOrEnumerationType() &&
+          isPromotableIntegerTypeForABI(Ty))
+        return ABIArgInfo::getExtend(Ty);
+    }
+
+    break;
+
+    // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next available SSE
+    // register is used, the registers are taken in the order from %xmm0 to
+    // %xmm7.
+  case SSE: {
+    mlir::Type CIRType = CGT.ConvertType(Ty);
+    ResType = GetSSETypeAtOffset(CIRType, 0, Ty, 0);
+    ++neededSSE;
+    break;
+  }
+  }
+
+  mlir::Type HighPart = nullptr;
+  switch (Hi) {
+  default:
+    assert(false && "NYI");
+  case NoClass:
+    break;
+  }
+
+  assert(!HighPart && "NYI");
+
+  return ABIArgInfo::getDirect(ResType);
+}
+
+ABIInfo::~ABIInfo() {}
+
+bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const {
+  if (getContext().isPromotableIntegerType(Ty))
+    return true;
+
+  assert(!Ty->getAs<BitIntType>() && "NYI");
+
+  return false;
+}
+
+void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
+                             Class &Hi, bool isNamedArg) const {
+  // FIXME: This code can be simplified by introducing a simple value class for
+  // Class pairs with appropriate constructor methods for the various
+  // situations.
+
+  // FIXME: Some of the split computations are wrong; unaligned vectors
+  // shouldn't be passed in registers for example, so there is no chance they
+  // can straddle an eightbyte. Verify & simplify.
+
+  Lo = Hi = NoClass;
+  Class &Current = OffsetBase < 64 ? Lo : Hi;
+  Current = Memory;
+
+  if (const auto *BT = Ty->getAs<BuiltinType>()) {
+    BuiltinType::Kind k = BT->getKind();
+    if (k == BuiltinType::Void) {
+      Current = NoClass;
+    } else if (k == BuiltinType::Int128 || k == BuiltinType::UInt128) {
+      assert(false && "NYI");
+      Lo = Integer;
+      Hi = Integer;
+    } else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) {
+      Current = Integer;
+    } else if (k == BuiltinType::Float || k == BuiltinType::Double ||
+               k == BuiltinType::Float16) {
+      Current = SSE;
+    } else if (k == BuiltinType::LongDouble) {
+      assert(false && "NYI");
+    } else
+      assert(false &&
+             "Only void and Integer supported so far for builtin types");
+    // FIXME: _Decimal32 and _Decimal64 are SSE.
+    // FIXME: _float128 and _Decimal128 are (SSE, SSEUp).
+    return;
+  }
+
+  assert(!Ty->getAs<EnumType>() && "Enums NYI");
+  if (Ty->hasPointerRepresentation()) {
+    Current = Integer;
+    return;
+  }
+
+  assert(false && "Nothing else implemented yet");
+}
+
+/// GetSSETypeAtOffset - Return a type that will be passed by the backend in the
+/// low 8 bytes of an XMM register, corresponding to the SSE class.
+mlir::Type X86_64ABIInfo::GetSSETypeAtOffset(mlir::Type CIRType,
+                                             unsigned int CIROffset,
+                                             clang::QualType SourceTy,
+                                             unsigned int SourceOffset) const {
+  // TODO: entirely stubbed out
+  assert(CIROffset == 0 && "NYI");
+  assert(SourceOffset == 0 && "NYI");
+  return CIRType;
+}
+
+ABIArgInfo X86_64ABIInfo::classifyReturnType(QualType RetTy) const {
+  // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the classification
+  // algorithm.
+  X86_64ABIInfo::Class Lo, Hi;
+  classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true);
+
+  // Check some invariants.
+  assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
+  assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification.");
+
+  mlir::Type ResType = nullptr;
+  assert(Lo == NoClass || Lo == Integer ||
+         Lo == SSE && "Only NoClass and Integer supported so far");
+
+  switch (Lo) {
+  case NoClass:
+    assert(Hi == NoClass && "Only NoClass supported so far for Hi");
+    return ABIArgInfo::getIgnore();
+
+  // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next available
+  // register of the sequence %rax, %rdx is used.
+  case Integer:
+    ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0);
+
+    // If we have a sign or zero extended integer, make sure to return Extend so
+    // that the parameter gets the right LLVM IR attributes.
+    // TODO: extend the above consideration to MLIR
+    if (Hi == NoClass && mlir::isa<mlir::cir::IntType>(ResType)) {
+      // Treat an enum type as its underlying type.
+      if (const auto *EnumTy = RetTy->getAs<EnumType>())
+        RetTy = EnumTy->getDecl()->getIntegerType();
+
+      if (RetTy->isIntegralOrEnumerationType() &&
+          isPromotableIntegerTypeForABI(RetTy)) {
+        return ABIArgInfo::getExtend(RetTy);
+      }
+    }
+    break;
+
+    // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next available SSE
+    // register of the sequence %xmm0, %xmm1 is used.
+  case SSE:
+    ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0);
+    break;
+
+  default:
+    llvm_unreachable("NYI");
+  }
+
+  mlir::Type HighPart = nullptr;
+
+  if (HighPart)
+    assert(false && "NYI");
+
+  return ABIArgInfo::getDirect(ResType);
+}
+
+clang::LangAS
+TargetCIRGenInfo::getGlobalVarAddressSpace(cir::CIRGenModule &CGM,
+                                           const clang::VarDecl *D) const {
+  assert(!CGM.getLangOpts().OpenCL &&
+         !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
+         "Address space agnostic languages only");
+  return D ? D->getType().getAddressSpace() : LangAS::Default;
+}
+
+mlir::Value TargetCIRGenInfo::performAddrSpaceCast(
+    CIRGenFunction &CGF, mlir::Value Src, mlir::cir::AddressSpaceAttr SrcAddr,
+    mlir::cir::AddressSpaceAttr DestAddr, mlir::Type DestTy,
+    bool IsNonNull) const {
+  // Since target may map different address spaces in AST to the same address
+  // space, an address space conversion may end up as a bitcast.
+  if (auto globalOp = Src.getDefiningOp<mlir::cir::GlobalOp>())
+    llvm_unreachable("Global ops addrspace cast NYI");
+  // Try to preserve the source's name to make IR more readable.
+  return CGF.getBuilder().createAddrSpaceCast(Src, DestTy);
+}
+
+const TargetCIRGenInfo &CIRGenModule::getTargetCIRGenInfo() {
+  if (TheTargetCIRGenInfo)
+    return *TheTargetCIRGenInfo;
+
+  // Helper to set the unique_ptr while still keeping the return value.
+  auto SetCIRGenInfo = [&](TargetCIRGenInfo *P) -> const TargetCIRGenInfo & {
+    this->TheTargetCIRGenInfo.reset(P);
+    return *P;
+  };
+
+  const llvm::Triple &Triple = getTarget().getTriple();
+
+  switch (Triple.getArch()) {
+  default:
+    assert(false && "Target not yet supported!");
+
+  case llvm::Triple::aarch64_be:
+  case llvm::Triple::aarch64: {
+    AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS;
+    assert(getTarget().getABI() == "aapcs" ||
+           getTarget().getABI() == "darwinpcs" &&
+               "Only Darwin supported for aarch64");
+    Kind = AArch64ABIInfo::DarwinPCS;
+    return SetCIRGenInfo(new AArch64TargetCIRGenInfo(genTypes, Kind));
+  }
+
+  case llvm::Triple::x86_64: {
+    StringRef ABI = getTarget().getABI();
+    X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512
+                               : ABI == "avx"  ? X86AVXABILevel::AVX
+                                               : X86AVXABILevel::None);
+
+    switch (Triple.getOS()) {
+    default:
+      assert(false && "OSType NYI");
+    case llvm::Triple::Linux:
+      return SetCIRGenInfo(new X86_64TargetCIRGenInfo(genTypes, AVXLevel));
+    }
+  }
+
+  case llvm::Triple::spirv64: {
+    return SetCIRGenInfo(new SPIRVTargetCIRGenInfo(genTypes));
+  }
+  }
+}
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h
new file mode 100644
index 000000000000..994fa357c864
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/TargetInfo.h
@@ -0,0 +1,113 @@
+//===---- TargetInfo.h - Encapsulate target details -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// These classes wrap the information about a call or function
+// definition used to handle ABI compliancy.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_TARGETINFO_H
+#define LLVM_CLANG_LIB_CIR_TARGETINFO_H
+
+#include "ABIInfo.h"
+#include "CIRGenValue.h"
+#include "mlir/IR/Types.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+
+#include <memory>
+
+namespace cir {
+
+class CIRGenFunction;
+class CIRGenModule;
+
+/// This class organizes various target-specific codegeneration issues, like
+/// target-specific attributes, builtins and so on.
+/// Equivalent to LLVM's TargetCodeGenInfo.
+class TargetCIRGenInfo {
+  std::unique_ptr<ABIInfo> Info = nullptr;
+
+public:
+  TargetCIRGenInfo(std::unique_ptr<ABIInfo> Info) : Info(std::move(Info)) {}
+
+  /// Returns ABI info helper for the target.
+  const ABIInfo &getABIInfo() const { return *Info; }
+
+  virtual bool isScalarizableAsmOperand(CIRGenFunction &CGF,
+                                        mlir::Type Ty) const {
+    return false;
+  }
+
+  /// Corrects the MLIR type for a given constraint and "usual"
+  /// type.
+  ///
+  /// \returns A new MLIR type, possibly the same as the original
+  /// on success
+  virtual mlir::Type adjustInlineAsmType(CIRGenFunction &CGF,
+                                         llvm::StringRef Constraint,
+                                         mlir::Type Ty) const {
+    return Ty;
+  }
+
+  virtual void
+  addReturnRegisterOutputs(CIRGenFunction &CGF, LValue ReturnValue,
+                           std::string &Constraints,
+                           std::vector<mlir::Type> &ResultRegTypes,
+                           std::vector<mlir::Type> &ResultTruncRegTypes,
+                           std::vector<LValue> &ResultRegDests,
+                           std::string &AsmString, unsigned NumOutputs) const {}
+
+  /// Get target favored AST address space of a global variable for languages
+  /// other than OpenCL and CUDA.
+  /// If \p D is nullptr, returns the default target favored address space
+  /// for global variable.
+  virtual clang::LangAS getGlobalVarAddressSpace(CIRGenModule &CGM,
+                                                 const clang::VarDecl *D) const;
+
+  /// Get the CIR address space for alloca.
+  virtual mlir::cir::AddressSpaceAttr getCIRAllocaAddressSpace() const {
+    // Return the null attribute, which means the target does not care about the
+    // alloca address space.
+    return {};
+  }
+
+  /// Perform address space cast of an expression of pointer type.
+  /// \param V is the value to be casted to another address space.
+  /// \param SrcAddr is the CIR address space of \p V.
+  /// \param DestAddr is the targeted CIR address space.
+  /// \param DestTy is the destination pointer type.
+  /// \param IsNonNull is the flag indicating \p V is known to be non null.
+  virtual mlir::Value performAddrSpaceCast(CIRGenFunction &CGF, mlir::Value V,
+                                           mlir::cir::AddressSpaceAttr SrcAddr,
+                                           mlir::cir::AddressSpaceAttr DestAddr,
+                                           mlir::Type DestTy,
+                                           bool IsNonNull = false) const;
+
+  /// Get CIR calling convention for OpenCL kernel.
+  virtual mlir::cir::CallingConv getOpenCLKernelCallingConv() const {
+    // OpenCL kernels are called via an explicit runtime API with arguments
+    // set with clSetKernelArg(), not as normal sub-functions.
+    // Return SPIR_KERNEL by default as the kernel calling convention to
+    // ensure the fingerprint is fixed such way that each OpenCL argument
+    // gets one matching argument in the produced kernel function argument
+    // list to enable feasible implementation of clSetKernelArg() with
+    // aggregates etc. In case we would use the default C calling conv here,
+    // clSetKernelArg() might break depending on the target-specific
+    // conventions; different targets might split structs passed as values
+    // to multiple function arguments etc.
+    return mlir::cir::CallingConv::SpirKernel;
+  }
+
+  virtual ~TargetCIRGenInfo() {}
+};
+
+void computeSPIRKernelABIInfo(CIRGenModule &CGM, CIRGenFunctionInfo &FI);
+
+} // namespace cir
+
+#endif
diff --git a/clang/lib/CIR/Dialect/CMakeLists.txt b/clang/lib/CIR/Dialect/CMakeLists.txt
index f33061b2d87c..9f57627c321f 100644
--- a/clang/lib/CIR/Dialect/CMakeLists.txt
+++ b/clang/lib/CIR/Dialect/CMakeLists.txt
@@ -1 +1,2 @@
 add_subdirectory(IR)
+add_subdirectory(Transforms)
diff --git a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp
new file mode 100644
index 000000000000..2dfc0db372f6
--- /dev/null
+++ b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp
@@ -0,0 +1,688 @@
+//===- CIRTypes.cpp - MLIR CIR Types --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the types in the CIR dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributeInterfaces.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/DialectImplementation.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/OpImplementation.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/LogicalResult.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+// ClangIR holds back AST references when available.
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/ExprCXX.h"
+
+static void printStructMembers(mlir::AsmPrinter &p, mlir::ArrayAttr members);
+static mlir::ParseResult parseStructMembers(::mlir::AsmParser &parser,
+                                            mlir::ArrayAttr &members);
+
+static void printFloatLiteral(mlir::AsmPrinter &p, llvm::APFloat value,
+                              mlir::Type ty);
+static mlir::ParseResult
+parseFloatLiteral(mlir::AsmParser &parser,
+                  mlir::FailureOr<llvm::APFloat> &value, mlir::Type ty);
+
+static mlir::ParseResult parseConstPtr(mlir::AsmParser &parser,
+                                       mlir::IntegerAttr &value);
+
+static void printConstPtr(mlir::AsmPrinter &p, mlir::IntegerAttr value);
+
+#define GET_ATTRDEF_CLASSES
+#include "clang/CIR/Dialect/IR/CIROpsAttributes.cpp.inc"
+
+using namespace mlir;
+using namespace mlir::cir;
+
+//===----------------------------------------------------------------------===//
+// CIR AST Attr helpers
+//===----------------------------------------------------------------------===//
+
+namespace mlir {
+namespace cir {
+
+mlir::Attribute makeFuncDeclAttr(const clang::Decl *decl,
+                                 mlir::MLIRContext *ctx) {
+  return llvm::TypeSwitch<const clang::Decl *, mlir::Attribute>(decl)
+      .Case([ctx](const clang::CXXConstructorDecl *ast) {
+        return ASTCXXConstructorDeclAttr::get(ctx, ast);
+      })
+      .Case([ctx](const clang::CXXConversionDecl *ast) {
+        return ASTCXXConversionDeclAttr::get(ctx, ast);
+      })
+      .Case([ctx](const clang::CXXDestructorDecl *ast) {
+        return ASTCXXDestructorDeclAttr::get(ctx, ast);
+      })
+      .Case([ctx](const clang::CXXMethodDecl *ast) {
+        return ASTCXXMethodDeclAttr::get(ctx, ast);
+      })
+      .Case([ctx](const clang::FunctionDecl *ast) {
+        return ASTFunctionDeclAttr::get(ctx, ast);
+      })
+      .Default([](auto) {
+        llvm_unreachable("unexpected Decl kind");
+        return mlir::Attribute();
+      });
+}
+
+} // namespace cir
+} // namespace mlir
+
+//===----------------------------------------------------------------------===//
+// General CIR parsing / printing
+//===----------------------------------------------------------------------===//
+
+Attribute CIRDialect::parseAttribute(DialectAsmParser &parser,
+                                     Type type) const {
+  llvm::SMLoc typeLoc = parser.getCurrentLocation();
+  StringRef mnemonic;
+  Attribute genAttr;
+  OptionalParseResult parseResult =
+      generatedAttributeParser(parser, &mnemonic, type, genAttr);
+  if (parseResult.has_value())
+    return genAttr;
+  parser.emitError(typeLoc, "unknown attribute in CIR dialect");
+  return Attribute();
+}
+
+void CIRDialect::printAttribute(Attribute attr, DialectAsmPrinter &os) const {
+  if (failed(generatedAttributePrinter(attr, os)))
+    llvm_unreachable("unexpected CIR type kind");
+}
+
+static void printStructMembers(mlir::AsmPrinter &printer,
+                               mlir::ArrayAttr members) {
+  printer << '{';
+  llvm::interleaveComma(members, printer);
+  printer << '}';
+}
+
+static ParseResult parseStructMembers(mlir::AsmParser &parser,
+                                      mlir::ArrayAttr &members) {
+  SmallVector<mlir::Attribute, 4> elts;
+
+  auto delimiter = AsmParser::Delimiter::Braces;
+  auto result = parser.parseCommaSeparatedList(delimiter, [&]() {
+    mlir::TypedAttr attr;
+    if (parser.parseAttribute(attr).failed())
+      return mlir::failure();
+    elts.push_back(attr);
+    return mlir::success();
+  });
+
+  if (result.failed())
+    return mlir::failure();
+
+  members = mlir::ArrayAttr::get(parser.getContext(), elts);
+  return mlir::success();
+}
+
+LogicalResult ConstStructAttr::verify(
+    ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError,
+    mlir::Type type, ArrayAttr members) {
+  auto sTy = mlir::dyn_cast_if_present<mlir::cir::StructType>(type);
+  if (!sTy) {
+    emitError() << "expected !cir.struct type";
+    return failure();
+  }
+
+  if (sTy.getMembers().size() != members.size()) {
+    emitError() << "number of elements must match";
+    return failure();
+  }
+
+  unsigned attrIdx = 0;
+  for (auto &member : sTy.getMembers()) {
+    auto m = dyn_cast_if_present<TypedAttr>(members[attrIdx]);
+    if (!m) {
+      emitError() << "expected mlir::TypedAttr attribute";
+      return failure();
+    }
+    if (member != m.getType()) {
+      emitError() << "element at index " << attrIdx << " has type "
+                  << m.getType() << " but return type for this element is "
+                  << member;
+      return failure();
+    }
+    attrIdx++;
+  }
+
+  return success();
+}
+
+LogicalResult StructLayoutAttr::verify(
+    ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError, unsigned size,
+    unsigned alignment, bool padded, mlir::Type largest_member,
+    mlir::ArrayAttr offsets) {
+  if (not std::all_of(offsets.begin(), offsets.end(), [](mlir::Attribute attr) {
+        return mlir::isa<mlir::IntegerAttr>(attr);
+      })) {
+    return emitError() << "all index values must be integers";
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// LangAttr definitions
+//===----------------------------------------------------------------------===//
+
+Attribute LangAttr::parse(AsmParser &parser, Type odsType) {
+  auto loc = parser.getCurrentLocation();
+  if (parser.parseLess())
+    return {};
+
+  // Parse variable 'lang'.
+  llvm::StringRef lang;
+  if (parser.parseKeyword(&lang))
+    return {};
+
+  // Check if parsed value is a valid language.
+  auto langEnum = symbolizeSourceLanguage(lang);
+  if (!langEnum.has_value()) {
+    parser.emitError(loc) << "invalid language keyword '" << lang << "'";
+    return {};
+  }
+
+  if (parser.parseGreater())
+    return {};
+
+  return get(parser.getContext(), langEnum.value());
+}
+
+void LangAttr::print(AsmPrinter &printer) const {
+  printer << "<" << getLang() << '>';
+}
+
+//===----------------------------------------------------------------------===//
+// ConstPtrAttr definitions
+//===----------------------------------------------------------------------===//
+
+// TODO: Consider encoding the null value differently and use conditional
+// assembly format instead of custom parsing/printing.
+static ParseResult parseConstPtr(AsmParser &parser, mlir::IntegerAttr &value) {
+
+  if (parser.parseOptionalKeyword("null").succeeded()) {
+    value = mlir::IntegerAttr::get(
+        mlir::IntegerType::get(parser.getContext(), 64), 0);
+    return success();
+  }
+
+  return parser.parseAttribute(value);
+}
+
+static void printConstPtr(AsmPrinter &p, mlir::IntegerAttr value) {
+  if (!value.getInt())
+    p << "null";
+  else
+    p << value;
+}
+
+//===----------------------------------------------------------------------===//
+// IntAttr definitions
+//===----------------------------------------------------------------------===//
+
+Attribute IntAttr::parse(AsmParser &parser, Type odsType) {
+  mlir::APInt APValue;
+
+  if (!mlir::isa<IntType>(odsType))
+    return {};
+  auto type = mlir::cast<IntType>(odsType);
+
+  // Consume the '<' symbol.
+  if (parser.parseLess())
+    return {};
+
+  // Fetch arbitrary precision integer value.
+  if (type.isSigned()) {
+    int64_t value;
+    if (parser.parseInteger(value))
+      parser.emitError(parser.getCurrentLocation(), "expected integer value");
+    APValue = mlir::APInt(type.getWidth(), value, type.isSigned());
+    if (APValue.getSExtValue() != value)
+      parser.emitError(parser.getCurrentLocation(),
+                       "integer value too large for the given type");
+  } else {
+    uint64_t value;
+    if (parser.parseInteger(value))
+      parser.emitError(parser.getCurrentLocation(), "expected integer value");
+    APValue = mlir::APInt(type.getWidth(), value, type.isSigned());
+    if (APValue.getZExtValue() != value)
+      parser.emitError(parser.getCurrentLocation(),
+                       "integer value too large for the given type");
+  }
+
+  // Consume the '>' symbol.
+  if (parser.parseGreater())
+    return {};
+
+  return IntAttr::get(type, APValue);
+}
+
+void IntAttr::print(AsmPrinter &printer) const {
+  auto type = mlir::cast<IntType>(getType());
+  printer << '<';
+  if (type.isSigned())
+    printer << getSInt();
+  else
+    printer << getUInt();
+  printer << '>';
+}
+
+LogicalResult IntAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+                              Type type, APInt value) {
+  if (!mlir::isa<IntType>(type)) {
+    emitError() << "expected 'simple.int' type";
+    return failure();
+  }
+
+  auto intType = mlir::cast<IntType>(type);
+  if (value.getBitWidth() != intType.getWidth()) {
+    emitError() << "type and value bitwidth mismatch: " << intType.getWidth()
+                << " != " << value.getBitWidth();
+    return failure();
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// FPAttr definitions
+//===----------------------------------------------------------------------===//
+
+static void printFloatLiteral(mlir::AsmPrinter &p, llvm::APFloat value,
+                              mlir::Type ty) {
+  p << value;
+}
+
+static mlir::ParseResult
+parseFloatLiteral(mlir::AsmParser &parser,
+                  mlir::FailureOr<llvm::APFloat> &value, mlir::Type ty) {
+  double rawValue;
+  if (parser.parseFloat(rawValue)) {
+    return parser.emitError(parser.getCurrentLocation(),
+                            "expected floating-point value");
+  }
+
+  auto losesInfo = false;
+  value.emplace(rawValue);
+
+  auto tyFpInterface = dyn_cast<cir::CIRFPTypeInterface>(ty);
+  if (!tyFpInterface) {
+    // Parsing of the current floating-point literal has succeeded, but the
+    // given attribute type is invalid. This error will be reported later when
+    // the attribute is being verified.
+    return success();
+  }
+
+  value->convert(tyFpInterface.getFloatSemantics(),
+                 llvm::RoundingMode::TowardZero, &losesInfo);
+  return success();
+}
+
+cir::FPAttr cir::FPAttr::getZero(mlir::Type type) {
+  return get(
+      type, APFloat::getZero(
+                mlir::cast<cir::CIRFPTypeInterface>(type).getFloatSemantics()));
+}
+
+LogicalResult cir::FPAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+                                  Type type, APFloat value) {
+  auto fltTypeInterface = mlir::dyn_cast<cir::CIRFPTypeInterface>(type);
+  if (!fltTypeInterface) {
+    emitError() << "expected floating-point type";
+    return failure();
+  }
+  if (APFloat::SemanticsToEnum(fltTypeInterface.getFloatSemantics()) !=
+      APFloat::SemanticsToEnum(value.getSemantics())) {
+    emitError() << "floating-point semantics mismatch";
+    return failure();
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// ComplexAttr definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult ComplexAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+                                  mlir::cir::ComplexType type,
+                                  mlir::TypedAttr real, mlir::TypedAttr imag) {
+  auto elemTy = type.getElementTy();
+  if (real.getType() != elemTy) {
+    emitError() << "type of the real part does not match the complex type";
+    return failure();
+  }
+  if (imag.getType() != elemTy) {
+    emitError() << "type of the imaginary part does not match the complex type";
+    return failure();
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// CmpThreeWayInfoAttr definitions
+//===----------------------------------------------------------------------===//
+
+std::string CmpThreeWayInfoAttr::getAlias() const {
+  std::string alias = "cmp3way_info";
+
+  if (getOrdering() == CmpOrdering::Strong)
+    alias.append("_strong_");
+  else
+    alias.append("_partial_");
+
+  auto appendInt = [&](int64_t value) {
+    if (value < 0) {
+      alias.push_back('n');
+      value = -value;
+    }
+    alias.append(std::to_string(value));
+  };
+
+  alias.append("lt");
+  appendInt(getLt());
+  alias.append("eq");
+  appendInt(getEq());
+  alias.append("gt");
+  appendInt(getGt());
+
+  if (auto unordered = getUnordered()) {
+    alias.append("un");
+    appendInt(unordered.value());
+  }
+
+  return alias;
+}
+
+LogicalResult
+CmpThreeWayInfoAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+                            CmpOrdering ordering, int64_t lt, int64_t eq,
+                            int64_t gt, std::optional<int64_t> unordered) {
+  // The presense of unordered must match the value of ordering.
+  if (ordering == CmpOrdering::Strong && unordered) {
+    emitError() << "strong ordering does not include unordered ordering";
+    return failure();
+  }
+  if (ordering == CmpOrdering::Partial && !unordered) {
+    emitError() << "partial ordering lacks unordered ordering";
+    return failure();
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// DataMemberAttr definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult
+DataMemberAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+                       mlir::cir::DataMemberType ty,
+                       std::optional<unsigned> memberIndex) {
+  if (!memberIndex.has_value()) {
+    // DataMemberAttr without a given index represents a null value.
+    return success();
+  }
+
+  auto clsStructTy = ty.getClsTy();
+  if (clsStructTy.isIncomplete()) {
+    emitError() << "incomplete 'cir.struct' cannot be used to build a non-null "
+                   "data member pointer";
+    return failure();
+  }
+
+  auto memberIndexValue = memberIndex.value();
+  if (memberIndexValue >= clsStructTy.getNumElements()) {
+    emitError()
+        << "member index of a #cir.data_member attribute is out of range";
+    return failure();
+  }
+
+  auto memberTy = clsStructTy.getMembers()[memberIndexValue];
+  if (memberTy != ty.getMemberTy()) {
+    emitError() << "member type of a #cir.data_member attribute must match the "
+                   "attribute type";
+    return failure();
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// MethodAttr definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult
+MethodAttr::verify(function_ref<::mlir::InFlightDiagnostic()> emitError,
+                   mlir::cir::MethodType type,
+                   std::optional<FlatSymbolRefAttr> symbol,
+                   std::optional<uint64_t> vtable_offset) {
+  if (symbol.has_value() && vtable_offset.has_value()) {
+    emitError() << "at most one of symbol and vtable_offset can be present "
+                   "in #cir.method";
+    return failure();
+  }
+
+  return success();
+}
+
+Attribute MethodAttr::parse(AsmParser &parser, Type odsType) {
+  auto ty = mlir::cast<mlir::cir::MethodType>(odsType);
+
+  if (parser.parseLess())
+    return {};
+
+  // Try to parse the null pointer constant.
+  if (parser.parseOptionalKeyword("null").succeeded()) {
+    if (parser.parseGreater())
+      return {};
+    return get(ty);
+  }
+
+  // Try to parse a flat symbol ref for a pointer to non-virtual member
+  // function.
+  FlatSymbolRefAttr symbol;
+  auto parseSymbolRefResult = parser.parseOptionalAttribute(symbol);
+  if (parseSymbolRefResult.has_value()) {
+    if (parseSymbolRefResult.value().failed())
+      return {};
+    if (parser.parseGreater())
+      return {};
+    return get(ty, symbol);
+  }
+
+  // Parse a uint64 that represents the vtable offset.
+  std::uint64_t vtableOffset = 0;
+  if (parser.parseKeyword("vtable_offset"))
+    return {};
+  if (parser.parseEqual())
+    return {};
+  if (parser.parseInteger(vtableOffset))
+    return {};
+
+  if (parser.parseGreater())
+    return {};
+
+  return get(ty, vtableOffset);
+}
+
+void MethodAttr::print(AsmPrinter &printer) const {
+  auto symbol = getSymbol();
+  auto vtableOffset = getVtableOffset();
+
+  printer << '<';
+  if (symbol.has_value()) {
+    printer << *symbol;
+  } else if (vtableOffset.has_value()) {
+    printer << "vtable_offset = " << *vtableOffset;
+  } else {
+    printer << "null";
+  }
+  printer << '>';
+}
+
+//===----------------------------------------------------------------------===//
+// GlobalAnnotationValuesAttr definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult GlobalAnnotationValuesAttr::verify(
+    function_ref<::mlir::InFlightDiagnostic()> emitError,
+    mlir::ArrayAttr annotations) {
+  if (annotations.empty()) {
+    emitError()
+        << "GlobalAnnotationValuesAttr should at least have one annotation";
+    return failure();
+  }
+  for (auto &entry : annotations) {
+    auto annoEntry = ::mlir::dyn_cast<mlir::ArrayAttr>(entry);
+    if (!annoEntry) {
+      emitError() << "Element of GlobalAnnotationValuesAttr annotations array"
+                     " must be an array";
+      return failure();
+    } else if (annoEntry.size() != 2) {
+      emitError() << "Element of GlobalAnnotationValuesAttr annotations array"
+                  << " must be a 2-element array and you have "
+                  << annoEntry.size();
+      return failure();
+    } else if (!::mlir::isa<mlir::StringAttr>(annoEntry[0])) {
+      emitError() << "Element of GlobalAnnotationValuesAttr annotations"
+                     "array must start with a string, which is the name of "
+                     "global op or func it annotates";
+      return failure();
+    }
+    auto annoPart = ::mlir::dyn_cast<mlir::cir::AnnotationAttr>(annoEntry[1]);
+    if (!annoPart) {
+      emitError() << "The second element of GlobalAnnotationValuesAttr"
+                     "annotations array element must be of "
+                     "type AnnotationValueAttr";
+      return failure();
+    }
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// DynamicCastInfoAtttr definitions
+//===----------------------------------------------------------------------===//
+
+std::string DynamicCastInfoAttr::getAlias() const {
+  // The alias looks like: `dyn_cast_info_<src>_<dest>`
+
+  std::string alias = "dyn_cast_info_";
+
+  alias.append(getSrcRtti().getSymbol().getValue());
+  alias.push_back('_');
+  alias.append(getDestRtti().getSymbol().getValue());
+
+  return alias;
+}
+
+LogicalResult DynamicCastInfoAttr::verify(
+    function_ref<InFlightDiagnostic()> emitError,
+    mlir::cir::GlobalViewAttr srcRtti, mlir::cir::GlobalViewAttr destRtti,
+    mlir::FlatSymbolRefAttr runtimeFunc, mlir::FlatSymbolRefAttr badCastFunc,
+    mlir::cir::IntAttr offsetHint) {
+  auto isRttiPtr = [](mlir::Type ty) {
+    // RTTI pointers are !cir.ptr<!u8i>.
+
+    auto ptrTy = mlir::dyn_cast<mlir::cir::PointerType>(ty);
+    if (!ptrTy)
+      return false;
+
+    auto pointeeIntTy = mlir::dyn_cast<mlir::cir::IntType>(ptrTy.getPointee());
+    if (!pointeeIntTy)
+      return false;
+
+    return pointeeIntTy.isUnsigned() && pointeeIntTy.getWidth() == 8;
+  };
+
+  if (!isRttiPtr(srcRtti.getType())) {
+    emitError() << "srcRtti must be an RTTI pointer";
+    return failure();
+  }
+
+  if (!isRttiPtr(destRtti.getType())) {
+    emitError() << "destRtti must be an RTTI pointer";
+    return failure();
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// AddressSpaceAttr definitions
+//===----------------------------------------------------------------------===//
+
+std::optional<int32_t>
+AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) {
+  using clang::LangAS;
+  switch (langAS) {
+  case LangAS::Default:
+    // Default address space should be encoded as a null attribute.
+    return std::nullopt;
+  case LangAS::opencl_global:
+    return Kind::offload_global;
+  case LangAS::opencl_local:
+    return Kind::offload_local;
+  case LangAS::opencl_constant:
+    return Kind::offload_constant;
+  case LangAS::opencl_private:
+    return Kind::offload_private;
+  case LangAS::opencl_generic:
+    return Kind::offload_generic;
+
+  case LangAS::opencl_global_device:
+  case LangAS::opencl_global_host:
+  case LangAS::cuda_device:
+  case LangAS::cuda_constant:
+  case LangAS::cuda_shared:
+  case LangAS::sycl_global:
+  case LangAS::sycl_global_device:
+  case LangAS::sycl_global_host:
+  case LangAS::sycl_local:
+  case LangAS::sycl_private:
+  case LangAS::ptr32_sptr:
+  case LangAS::ptr32_uptr:
+  case LangAS::ptr64:
+  case LangAS::hlsl_groupshared:
+  case LangAS::wasm_funcref:
+    llvm_unreachable("NYI");
+  default:
+    // Target address space offset arithmetics
+    return clang::toTargetAddressSpace(langAS) + kFirstTargetASValue;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// CIR Dialect
+//===----------------------------------------------------------------------===//
+
+void CIRDialect::registerAttributes() {
+  addAttributes<
+#define GET_ATTRDEF_LIST
+#include "clang/CIR/Dialect/IR/CIROpsAttributes.cpp.inc"
+      >();
+}
diff --git a/clang/lib/CIR/Dialect/IR/CIRDataLayout.cpp b/clang/lib/CIR/Dialect/IR/CIRDataLayout.cpp
new file mode 100644
index 000000000000..16dbfd2c2471
--- /dev/null
+++ b/clang/lib/CIR/Dialect/IR/CIRDataLayout.cpp
@@ -0,0 +1,234 @@
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+
+using namespace cir;
+
+//===----------------------------------------------------------------------===//
+// Support for StructLayout
+//===----------------------------------------------------------------------===//
+
+StructLayout::StructLayout(mlir::cir::StructType ST, const CIRDataLayout &DL)
+    : StructSize(llvm::TypeSize::getFixed(0)) {
+  assert(!ST.isIncomplete() && "Cannot get layout of opaque structs");
+  IsPadded = false;
+  NumElements = ST.getNumElements();
+
+  // Loop over each of the elements, placing them in memory.
+  for (unsigned i = 0, e = NumElements; i != e; ++i) {
+    mlir::Type Ty = ST.getMembers()[i];
+    if (i == 0 && ::cir::MissingFeatures::typeIsScalableType())
+      llvm_unreachable("Scalable types are not yet supported in CIR");
+
+    assert(!::cir::MissingFeatures::recordDeclIsPacked() &&
+           "Cannot identify packed structs");
+    const llvm::Align TyAlign =
+        ST.getPacked() ? llvm::Align(1) : DL.getABITypeAlign(Ty);
+
+    // Add padding if necessary to align the data element properly.
+    // Currently the only structure with scalable size will be the homogeneous
+    // scalable vector types. Homogeneous scalable vector types have members of
+    // the same data type so no alignment issue will happen. The condition here
+    // assumes so and needs to be adjusted if this assumption changes (e.g. we
+    // support structures with arbitrary scalable data type, or structure that
+    // contains both fixed size and scalable size data type members).
+    if (!StructSize.isScalable() && !isAligned(TyAlign, StructSize)) {
+      IsPadded = true;
+      StructSize = llvm::TypeSize::getFixed(alignTo(StructSize, TyAlign));
+    }
+
+    // Keep track of maximum alignment constraint.
+    StructAlignment = std::max(TyAlign, StructAlignment);
+
+    getMemberOffsets()[i] = StructSize;
+    // Consume space for this data item
+    StructSize += DL.getTypeAllocSize(Ty);
+  }
+
+  // Add padding to the end of the struct so that it could be put in an array
+  // and all array elements would be aligned correctly.
+  if (!StructSize.isScalable() && !isAligned(StructAlignment, StructSize)) {
+    IsPadded = true;
+    StructSize = llvm::TypeSize::getFixed(alignTo(StructSize, StructAlignment));
+  }
+}
+
+/// getElementContainingOffset - Given a valid offset into the structure,
+/// return the structure index that contains it.
+unsigned StructLayout::getElementContainingOffset(uint64_t FixedOffset) const {
+  assert(!StructSize.isScalable() &&
+         "Cannot get element at offset for structure containing scalable "
+         "vector types");
+  llvm::TypeSize Offset = llvm::TypeSize::getFixed(FixedOffset);
+  llvm::ArrayRef<llvm::TypeSize> MemberOffsets = getMemberOffsets();
+
+  const auto *SI =
+      std::upper_bound(MemberOffsets.begin(), MemberOffsets.end(), Offset,
+                       [](llvm::TypeSize LHS, llvm::TypeSize RHS) -> bool {
+                         return llvm::TypeSize::isKnownLT(LHS, RHS);
+                       });
+  assert(SI != MemberOffsets.begin() && "Offset not in structure type!");
+  --SI;
+  assert(llvm::TypeSize::isKnownLE(*SI, Offset) && "upper_bound didn't work");
+  assert((SI == MemberOffsets.begin() ||
+          llvm::TypeSize::isKnownLE(*(SI - 1), Offset)) &&
+         (SI + 1 == MemberOffsets.end() ||
+          llvm::TypeSize::isKnownGT(*(SI + 1), Offset)) &&
+         "Upper bound didn't work!");
+
+  // Multiple fields can have the same offset if any of them are zero sized.
+  // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
+  // at the i32 element, because it is the last element at that offset.  This is
+  // the right one to return, because anything after it will have a higher
+  // offset, implying that this element is non-empty.
+  return SI - MemberOffsets.begin();
+}
+
+//===----------------------------------------------------------------------===//
+//                       DataLayout Class Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class StructLayoutMap {
+  using LayoutInfoTy = llvm::DenseMap<mlir::cir::StructType, StructLayout *>;
+  LayoutInfoTy LayoutInfo;
+
+public:
+  ~StructLayoutMap() {
+    // Remove any layouts.
+    for (const auto &I : LayoutInfo) {
+      StructLayout *Value = I.second;
+      Value->~StructLayout();
+      free(Value);
+    }
+  }
+
+  StructLayout *&operator[](mlir::cir::StructType STy) {
+    return LayoutInfo[STy];
+  }
+};
+
+} // namespace
+
+CIRDataLayout::CIRDataLayout(mlir::ModuleOp modOp) : layout{modOp} {
+  reset(modOp.getDataLayoutSpec());
+}
+
+void CIRDataLayout::reset(mlir::DataLayoutSpecInterface spec) {
+  clear();
+
+  bigEndian = false;
+  if (spec) {
+    auto key = mlir::StringAttr::get(
+        spec.getContext(), mlir::DLTIDialect::kDataLayoutEndiannessKey);
+    if (auto entry = spec.getSpecForIdentifier(key))
+      if (auto str = llvm::dyn_cast<mlir::StringAttr>(entry.getValue()))
+        bigEndian = str == mlir::DLTIDialect::kDataLayoutEndiannessBig;
+  }
+
+  LayoutMap = nullptr;
+
+  // ManglingMode = MM_None;
+  // NonIntegralAddressSpaces.clear();
+  StructAlignment =
+      llvm::LayoutAlignElem::get(llvm::Align(1), llvm::Align(8), 0);
+
+  // NOTE(cir): Alignment setter functions are skipped as these should already
+  // be set in MLIR's data layout.
+}
+
+void CIRDataLayout::clear() {
+  delete static_cast<StructLayoutMap *>(LayoutMap);
+  LayoutMap = nullptr;
+}
+
+const StructLayout *
+CIRDataLayout::getStructLayout(mlir::cir::StructType Ty) const {
+  if (!LayoutMap)
+    LayoutMap = new StructLayoutMap();
+
+  StructLayoutMap *STM = static_cast<StructLayoutMap *>(LayoutMap);
+  StructLayout *&SL = (*STM)[Ty];
+  if (SL)
+    return SL;
+
+  // Otherwise, create the struct layout.  Because it is variable length, we
+  // malloc it, then use placement new.
+  StructLayout *L = (StructLayout *)llvm::safe_malloc(
+      StructLayout::totalSizeToAlloc<llvm::TypeSize>(Ty.getNumElements()));
+
+  // Set SL before calling StructLayout's ctor.  The ctor could cause other
+  // entries to be added to TheMap, invalidating our reference.
+  SL = L;
+
+  new (L) StructLayout(Ty, *this);
+
+  return L;
+}
+
+/*!
+  \param abiOrPref Flag that determines which alignment is returned. true
+  returns the ABI alignment, false returns the preferred alignment.
+  \param Ty The underlying type for which alignment is determined.
+
+  Get the ABI (\a abiOrPref == true) or preferred alignment (\a abiOrPref
+  == false) for the requested type \a Ty.
+ */
+llvm::Align CIRDataLayout::getAlignment(mlir::Type Ty, bool abiOrPref) const {
+
+  if (llvm::isa<mlir::cir::StructType>(Ty)) {
+    // Packed structure types always have an ABI alignment of one.
+    if (::cir::MissingFeatures::recordDeclIsPacked() && abiOrPref)
+      llvm_unreachable("NYI");
+
+    auto stTy = llvm::dyn_cast<mlir::cir::StructType>(Ty);
+    if (stTy && stTy.getPacked() && abiOrPref)
+      return llvm::Align(1);
+
+    // Get the layout annotation... which is lazily created on demand.
+    const StructLayout *Layout =
+        getStructLayout(llvm::cast<mlir::cir::StructType>(Ty));
+    const llvm::Align Align =
+        abiOrPref ? StructAlignment.ABIAlign : StructAlignment.PrefAlign;
+    return std::max(Align, Layout->getAlignment());
+  }
+
+  // FIXME(cir): This does not account for differnt address spaces, and relies
+  // on CIR's data layout to give the proper alignment.
+  assert(!::cir::MissingFeatures::addressSpace());
+
+  // Fetch type alignment from MLIR's data layout.
+  unsigned align = abiOrPref ? layout.getTypeABIAlignment(Ty)
+                             : layout.getTypePreferredAlignment(Ty);
+  return llvm::Align(align);
+}
+
+// The implementation of this method is provided inline as it is particularly
+// well suited to constant folding when called on a specific Type subclass.
+llvm::TypeSize CIRDataLayout::getTypeSizeInBits(mlir::Type Ty) const {
+  assert(!::cir::MissingFeatures::typeIsSized() &&
+         "Cannot getTypeInfo() on a type that is unsized!");
+
+  if (auto structTy = llvm::dyn_cast<mlir::cir::StructType>(Ty)) {
+
+    // FIXME(cir): CIR struct's data layout implementation doesn't do a good job
+    // of handling unions particularities. We should have a separate union type.
+    if (structTy.isUnion()) {
+      auto largestMember = structTy.getLargestMember(layout);
+      return llvm::TypeSize::getFixed(layout.getTypeSizeInBits(largestMember));
+    }
+
+    // FIXME(cir): We should be able to query the size of a struct directly to
+    // its data layout implementation instead of requiring a separate
+    // StructLayout object.
+    // Get the layout annotation... which is lazily created on demand.
+    return getStructLayout(structTy)->getSizeInBits();
+  }
+
+  // FIXME(cir): This does not account for different address spaces, and relies
+  // on CIR's data layout to give the proper ABI-specific type width.
+  assert(!::cir::MissingFeatures::addressSpace());
+
+  return llvm::TypeSize::getFixed(layout.getTypeSizeInBits(Ty));
+}
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index c2829c3ff2af..a9c445b08796 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -10,4 +10,3909 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <clang/CIR/Dialect/IR/CIRDialect.h>
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/AST/Attrs.inc"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/Interfaces/CIRLoopOpInterface.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <numeric>
+#include <optional>
+#include <set>
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Diagnostics.h"
+#include "mlir/IR/DialectImplementation.h"
+#include "mlir/IR/DialectInterface.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/StorageUniquerSupport.h"
+#include "mlir/IR/TypeUtilities.h"
+#include "mlir/Interfaces/DataLayoutInterfaces.h"
+#include "mlir/Interfaces/FunctionImplementation.h"
+#include "mlir/Interfaces/InferTypeOpInterface.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/LogicalResult.h"
+
+using namespace mlir;
+using namespace mlir::cir;
+
+#include "clang/CIR/Dialect/IR/CIROpsEnums.cpp.inc"
+#include "clang/CIR/Dialect/IR/CIROpsStructs.cpp.inc"
+
+#include "clang/CIR/Dialect/IR/CIROpsDialect.cpp.inc"
+#include "clang/CIR/Interfaces/ASTAttrInterfaces.h"
+#include "clang/CIR/Interfaces/CIROpInterfaces.h"
+
+//===----------------------------------------------------------------------===//
+// CIR Dialect
+//===----------------------------------------------------------------------===//
+namespace {
+struct CIROpAsmDialectInterface : public OpAsmDialectInterface {
+  using OpAsmDialectInterface::OpAsmDialectInterface;
+
+  AliasResult getAlias(Type type, raw_ostream &os) const final {
+    if (auto structType = dyn_cast<StructType>(type)) {
+      StringAttr nameAttr = structType.getName();
+      if (!nameAttr)
+        os << "ty_anon_" << structType.getKindAsStr();
+      else
+        os << "ty_" << nameAttr.getValue();
+      return AliasResult::OverridableAlias;
+    }
+    if (auto intType = dyn_cast<IntType>(type)) {
+      // We only provide alias for standard integer types (i.e. integer types
+      // whose width is divisible by 8).
+      if (intType.getWidth() % 8 != 0)
+        return AliasResult::NoAlias;
+      os << intType.getAlias();
+      return AliasResult::OverridableAlias;
+    }
+    if (auto voidType = dyn_cast<VoidType>(type)) {
+      os << voidType.getAlias();
+      return AliasResult::OverridableAlias;
+    }
+
+    return AliasResult::NoAlias;
+  }
+
+  AliasResult getAlias(Attribute attr, raw_ostream &os) const final {
+    if (auto boolAttr = mlir::dyn_cast<mlir::cir::BoolAttr>(attr)) {
+      os << (boolAttr.getValue() ? "true" : "false");
+      return AliasResult::FinalAlias;
+    }
+    if (auto bitfield = mlir::dyn_cast<mlir::cir::BitfieldInfoAttr>(attr)) {
+      os << "bfi_" << bitfield.getName().str();
+      return AliasResult::FinalAlias;
+    }
+    if (auto extraFuncAttr =
+            mlir::dyn_cast<mlir::cir::ExtraFuncAttributesAttr>(attr)) {
+      os << "fn_attr";
+      return AliasResult::FinalAlias;
+    }
+    if (auto cmpThreeWayInfoAttr =
+            mlir::dyn_cast<mlir::cir::CmpThreeWayInfoAttr>(attr)) {
+      os << cmpThreeWayInfoAttr.getAlias();
+      return AliasResult::FinalAlias;
+    }
+    if (auto dynCastInfoAttr =
+            mlir::dyn_cast<mlir::cir::DynamicCastInfoAttr>(attr)) {
+      os << dynCastInfoAttr.getAlias();
+      return AliasResult::FinalAlias;
+    }
+
+    return AliasResult::NoAlias;
+  }
+};
+} // namespace
+
+/// Dialect initialization, the instance will be owned by the context. This is
+/// the point of registration of types and operations for the dialect.
+void cir::CIRDialect::initialize() {
+  registerTypes();
+  registerAttributes();
+  addOperations<
+#define GET_OP_LIST
+#include "clang/CIR/Dialect/IR/CIROps.cpp.inc"
+      >();
+  addInterfaces<CIROpAsmDialectInterface>();
+}
+
+Operation *cir::CIRDialect::materializeConstant(mlir::OpBuilder &builder,
+                                                mlir::Attribute value,
+                                                mlir::Type type,
+                                                mlir::Location loc) {
+  return builder.create<mlir::cir::ConstantOp>(
+      loc, type, mlir::cast<mlir::TypedAttr>(value));
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers
+//===----------------------------------------------------------------------===//
+
+// Parses one of the keywords provided in the list `keywords` and returns the
+// position of the parsed keyword in the list. If none of the keywords from the
+// list is parsed, returns -1.
+static int parseOptionalKeywordAlternative(AsmParser &parser,
+                                           ArrayRef<StringRef> keywords) {
+  for (auto en : llvm::enumerate(keywords)) {
+    if (succeeded(parser.parseOptionalKeyword(en.value())))
+      return en.index();
+  }
+  return -1;
+}
+
+namespace {
+template <typename Ty> struct EnumTraits {};
+
+#define REGISTER_ENUM_TYPE(Ty)                                                 \
+  template <> struct EnumTraits<Ty> {                                          \
+    static StringRef stringify(Ty value) { return stringify##Ty(value); }      \
+    static unsigned getMaxEnumVal() { return getMaxEnumValFor##Ty(); }         \
+  }
+#define REGISTER_ENUM_TYPE_WITH_NS(NS, Ty)                                     \
+  template <> struct EnumTraits<NS::Ty> {                                      \
+    static StringRef stringify(NS::Ty value) {                                 \
+      return NS::stringify##Ty(value);                                         \
+    }                                                                          \
+    static unsigned getMaxEnumVal() { return NS::getMaxEnumValFor##Ty(); }     \
+  }
+
+REGISTER_ENUM_TYPE(GlobalLinkageKind);
+REGISTER_ENUM_TYPE(CallingConv);
+REGISTER_ENUM_TYPE_WITH_NS(sob, SignedOverflowBehavior);
+} // namespace
+
+/// Parse an enum from the keyword, or default to the provided default value.
+/// The return type is the enum type by default, unless overriden with the
+/// second template argument.
+/// TODO: teach other places in this file to use this function.
+template <typename EnumTy, typename RetTy = EnumTy>
+static RetTy parseOptionalCIRKeyword(AsmParser &parser, EnumTy defaultValue) {
+  SmallVector<StringRef, 10> names;
+  for (unsigned i = 0, e = EnumTraits<EnumTy>::getMaxEnumVal(); i <= e; ++i)
+    names.push_back(EnumTraits<EnumTy>::stringify(static_cast<EnumTy>(i)));
+
+  int index = parseOptionalKeywordAlternative(parser, names);
+  if (index == -1)
+    return static_cast<RetTy>(defaultValue);
+  return static_cast<RetTy>(index);
+}
+
+/// Parse an enum from the keyword, return failure if the keyword is not found.
+template <typename EnumTy, typename RetTy = EnumTy>
+static ParseResult parseCIRKeyword(AsmParser &parser, RetTy &result) {
+  SmallVector<StringRef, 10> names;
+  for (unsigned i = 0, e = EnumTraits<EnumTy>::getMaxEnumVal(); i <= e; ++i)
+    names.push_back(EnumTraits<EnumTy>::stringify(static_cast<EnumTy>(i)));
+
+  int index = parseOptionalKeywordAlternative(parser, names);
+  if (index == -1)
+    return failure();
+  result = static_cast<RetTy>(index);
+  return success();
+}
+
+// Check if a region's termination omission is valid and, if so, creates and
+// inserts the omitted terminator into the region.
+LogicalResult ensureRegionTerm(OpAsmParser &parser, Region &region,
+                               SMLoc errLoc) {
+  Location eLoc = parser.getEncodedSourceLoc(parser.getCurrentLocation());
+  OpBuilder builder(parser.getBuilder().getContext());
+
+  // Region is empty or properly terminated: nothing to do.
+  if (region.empty() ||
+      (region.back().mightHaveTerminator() && region.back().getTerminator()))
+    return success();
+
+  // Check for invalid terminator omissions.
+  if (!region.hasOneBlock())
+    return parser.emitError(errLoc,
+                            "multi-block region must not omit terminator");
+  if (region.back().empty())
+    return parser.emitError(errLoc, "empty region must not omit terminator");
+
+  // Terminator was omited correctly: recreate it.
+  region.back().push_back(builder.create<cir::YieldOp>(eLoc));
+  return success();
+}
+
+// True if the region's terminator should be omitted.
+bool omitRegionTerm(mlir::Region &r) {
+  const auto singleNonEmptyBlock = r.hasOneBlock() && !r.back().empty();
+  const auto yieldsNothing = [&r]() {
+    YieldOp y = dyn_cast<YieldOp>(r.back().getTerminator());
+    return y && y.getArgs().empty();
+  };
+  return singleNonEmptyBlock && yieldsNothing();
+}
+
+void printVisibilityAttr(OpAsmPrinter &printer,
+                         mlir::cir::VisibilityAttr &visibility) {
+  switch (visibility.getValue()) {
+  case VisibilityKind::Hidden:
+    printer << "hidden";
+    break;
+  case VisibilityKind::Protected:
+    printer << "protected";
+    break;
+  default:
+    break;
+  }
+}
+
+void parseVisibilityAttr(OpAsmParser &parser,
+                         mlir::cir::VisibilityAttr &visibility) {
+  VisibilityKind visibilityKind;
+
+  if (parser.parseOptionalKeyword("hidden").succeeded()) {
+    visibilityKind = VisibilityKind::Hidden;
+  } else if (parser.parseOptionalKeyword("protected").succeeded()) {
+    visibilityKind = VisibilityKind::Protected;
+  } else {
+    visibilityKind = VisibilityKind::Default;
+  }
+
+  visibility =
+      mlir::cir::VisibilityAttr::get(parser.getContext(), visibilityKind);
+}
+
+//===----------------------------------------------------------------------===//
+// CIR Custom Parsers/Printers
+//===----------------------------------------------------------------------===//
+
+static mlir::ParseResult parseOmittedTerminatorRegion(mlir::OpAsmParser &parser,
+                                                      mlir::Region &region) {
+  auto regionLoc = parser.getCurrentLocation();
+  if (parser.parseRegion(region))
+    return failure();
+  if (ensureRegionTerm(parser, region, regionLoc).failed())
+    return failure();
+  return success();
+}
+
+static void printOmittedTerminatorRegion(mlir::OpAsmPrinter &printer,
+                                         mlir::cir::ScopeOp &op,
+                                         mlir::Region &region) {
+  printer.printRegion(region,
+                      /*printEntryBlockArgs=*/false,
+                      /*printBlockTerminators=*/!omitRegionTerm(region));
+}
+
+static mlir::ParseResult
+parseOmitDefaultVisibility(mlir::OpAsmParser &parser,
+                           mlir::cir::VisibilityAttr &visibility) {
+  parseVisibilityAttr(parser, visibility);
+  return success();
+}
+
+static void printOmitDefaultVisibility(mlir::OpAsmPrinter &printer,
+                                       mlir::cir::GlobalOp &op,
+                                       mlir::cir::VisibilityAttr visibility) {
+  printVisibilityAttr(printer, visibility);
+}
+
+//===----------------------------------------------------------------------===//
+// AllocaOp
+//===----------------------------------------------------------------------===//
+
+void AllocaOp::build(::mlir::OpBuilder &odsBuilder,
+                     ::mlir::OperationState &odsState, ::mlir::Type addr,
+                     ::mlir::Type allocaType, ::llvm::StringRef name,
+                     ::mlir::IntegerAttr alignment) {
+  odsState.addAttribute(getAllocaTypeAttrName(odsState.name),
+                        ::mlir::TypeAttr::get(allocaType));
+  odsState.addAttribute(getNameAttrName(odsState.name),
+                        odsBuilder.getStringAttr(name));
+  if (alignment) {
+    odsState.addAttribute(getAlignmentAttrName(odsState.name), alignment);
+  }
+  odsState.addTypes(addr);
+}
+
+//===----------------------------------------------------------------------===//
+// BreakOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult BreakOp::verify() {
+  if (!getOperation()->getParentOfType<LoopOpInterface>() &&
+      !getOperation()->getParentOfType<SwitchOp>())
+    return emitOpError("must be within a loop or switch");
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// ConditionOp
+//===-----------------------------------------------------------------------===//
+
+//===----------------------------------
+// BranchOpTerminatorInterface Methods
+
+void ConditionOp::getSuccessorRegions(
+    ArrayRef<Attribute> operands, SmallVectorImpl<RegionSuccessor> &regions) {
+  // TODO(cir): The condition value may be folded to a constant, narrowing
+  // down its list of possible successors.
+
+  // Parent is a loop: condition may branch to the body or to the parent op.
+  if (auto loopOp = dyn_cast<LoopOpInterface>(getOperation()->getParentOp())) {
+    regions.emplace_back(&loopOp.getBody(), loopOp.getBody().getArguments());
+    regions.emplace_back(loopOp->getResults());
+  }
+
+  // Parent is an await: condition may branch to resume or suspend regions.
+  auto await = cast<AwaitOp>(getOperation()->getParentOp());
+  regions.emplace_back(&await.getResume(), await.getResume().getArguments());
+  regions.emplace_back(&await.getSuspend(), await.getSuspend().getArguments());
+}
+
+MutableOperandRange
+ConditionOp::getMutableSuccessorOperands(RegionBranchPoint point) {
+  // No values are yielded to the successor region.
+  return MutableOperandRange(getOperation(), 0, 0);
+}
+
+LogicalResult ConditionOp::verify() {
+  if (!isa<LoopOpInterface, AwaitOp>(getOperation()->getParentOp()))
+    return emitOpError("condition must be within a conditional region");
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// ConstantOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult checkConstantTypes(mlir::Operation *op, mlir::Type opType,
+                                        mlir::Attribute attrType) {
+  if (isa<ConstPtrAttr>(attrType)) {
+    if (::mlir::isa<::mlir::cir::PointerType>(opType))
+      return success();
+    return op->emitOpError("nullptr expects pointer type");
+  }
+
+  if (isa<DataMemberAttr, MethodAttr>(attrType)) {
+    // More detailed type verifications are already done in
+    // DataMemberAttr::verify. Don't need to repeat here.
+    return success();
+  }
+
+  if (isa<ZeroAttr>(attrType)) {
+    if (::mlir::isa<::mlir::cir::StructType, ::mlir::cir::ArrayType,
+                    ::mlir::cir::ComplexType>(opType))
+      return success();
+    return op->emitOpError("zero expects struct or array type");
+  }
+
+  if (mlir::isa<mlir::cir::BoolAttr>(attrType)) {
+    if (!mlir::isa<mlir::cir::BoolType>(opType))
+      return op->emitOpError("result type (")
+             << opType << ") must be '!cir.bool' for '" << attrType << "'";
+    return success();
+  }
+
+  if (mlir::isa<mlir::cir::IntAttr, mlir::cir::FPAttr, mlir::cir::ComplexAttr>(
+          attrType)) {
+    auto at = cast<TypedAttr>(attrType);
+    if (at.getType() != opType) {
+      return op->emitOpError("result type (")
+             << opType << ") does not match value type (" << at.getType()
+             << ")";
+    }
+    return success();
+  }
+
+  if (isa<SymbolRefAttr>(attrType)) {
+    if (::mlir::isa<::mlir::cir::PointerType>(opType))
+      return success();
+    return op->emitOpError("symbolref expects pointer type");
+  }
+
+  if (mlir::isa<mlir::cir::GlobalViewAttr>(attrType) ||
+      mlir::isa<mlir::cir::TypeInfoAttr>(attrType) ||
+      mlir::isa<mlir::cir::ConstArrayAttr>(attrType) ||
+      mlir::isa<mlir::cir::ConstVectorAttr>(attrType) ||
+      mlir::isa<mlir::cir::ConstStructAttr>(attrType) ||
+      mlir::isa<mlir::cir::VTableAttr>(attrType))
+    return success();
+  if (mlir::isa<mlir::cir::IntAttr>(attrType))
+    return success();
+
+  assert(isa<TypedAttr>(attrType) && "What else could we be looking at here?");
+  return op->emitOpError("global with type ")
+         << cast<TypedAttr>(attrType).getType() << " not supported";
+}
+
+LogicalResult ConstantOp::verify() {
+  // ODS already generates checks to make sure the result type is valid. We just
+  // need to additionally check that the value's attribute type is consistent
+  // with the result type.
+  return checkConstantTypes(getOperation(), getType(), getValue());
+}
+
+OpFoldResult ConstantOp::fold(FoldAdaptor /*adaptor*/) { return getValue(); }
+
+//===----------------------------------------------------------------------===//
+// ContinueOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult ContinueOp::verify() {
+  if (!this->getOperation()->getParentOfType<LoopOpInterface>())
+    return emitOpError("must be within a loop");
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// CastOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult CastOp::verify() {
+  auto resType = getResult().getType();
+  auto srcType = getSrc().getType();
+
+  if (mlir::isa<mlir::cir::VectorType>(srcType) &&
+      mlir::isa<mlir::cir::VectorType>(resType)) {
+    // Use the element type of the vector to verify the cast kind. (Except for
+    // bitcast, see below.)
+    srcType = mlir::dyn_cast<mlir::cir::VectorType>(srcType).getEltType();
+    resType = mlir::dyn_cast<mlir::cir::VectorType>(resType).getEltType();
+  }
+
+  switch (getKind()) {
+  case cir::CastKind::int_to_bool: {
+    if (!mlir::isa<mlir::cir::BoolType>(resType))
+      return emitOpError() << "requires !cir.bool type for result";
+    if (!mlir::isa<mlir::cir::IntType>(srcType))
+      return emitOpError() << "requires !cir.int type for source";
+    return success();
+  }
+  case cir::CastKind::ptr_to_bool: {
+    if (!mlir::isa<mlir::cir::BoolType>(resType))
+      return emitOpError() << "requires !cir.bool type for result";
+    if (!mlir::isa<mlir::cir::PointerType>(srcType))
+      return emitOpError() << "requires !cir.ptr type for source";
+    return success();
+  }
+  case cir::CastKind::integral: {
+    if (!mlir::isa<mlir::cir::IntType>(resType))
+      return emitOpError() << "requires !cir.int type for result";
+    if (!mlir::isa<mlir::cir::IntType>(srcType))
+      return emitOpError() << "requires !cir.int type for source";
+    return success();
+  }
+  case cir::CastKind::array_to_ptrdecay: {
+    auto arrayPtrTy = mlir::dyn_cast<mlir::cir::PointerType>(srcType);
+    auto flatPtrTy = mlir::dyn_cast<mlir::cir::PointerType>(resType);
+    if (!arrayPtrTy || !flatPtrTy)
+      return emitOpError() << "requires !cir.ptr type for source and result";
+
+    if (arrayPtrTy.getAddrSpace() != flatPtrTy.getAddrSpace()) {
+      return emitOpError()
+             << "requires same address space for source and result";
+    }
+
+    auto arrayTy =
+        mlir::dyn_cast<mlir::cir::ArrayType>(arrayPtrTy.getPointee());
+    if (!arrayTy)
+      return emitOpError() << "requires !cir.array pointee";
+
+    if (arrayTy.getEltType() != flatPtrTy.getPointee())
+      return emitOpError()
+             << "requires same type for array element and pointee result";
+    return success();
+  }
+  case cir::CastKind::bitcast: {
+    // Allow bitcast of structs for calling conventions.
+    if (isa<StructType>(srcType) || isa<StructType>(resType))
+      return success();
+
+    // Handle the pointer types first.
+    auto srcPtrTy = mlir::dyn_cast<mlir::cir::PointerType>(srcType);
+    auto resPtrTy = mlir::dyn_cast<mlir::cir::PointerType>(resType);
+
+    if (srcPtrTy && resPtrTy) {
+      if (srcPtrTy.getAddrSpace() != resPtrTy.getAddrSpace()) {
+        return emitOpError() << "result type address space does not match the "
+                                "address space of the operand";
+      }
+      return success();
+    }
+
+    // This is the only cast kind where we don't want vector types to decay
+    // into the element type.
+    if ((!mlir::isa<mlir::cir::VectorType>(getSrc().getType()) ||
+         !mlir::isa<mlir::cir::VectorType>(getResult().getType())))
+      return emitOpError()
+             << "requires !cir.ptr or !cir.vector type for source and result";
+    return success();
+  }
+  case cir::CastKind::floating: {
+    if (!mlir::isa<mlir::cir::CIRFPTypeInterface>(srcType) ||
+        !mlir::isa<mlir::cir::CIRFPTypeInterface>(resType))
+      return emitOpError() << "requires !cir.float type for source and result";
+    return success();
+  }
+  case cir::CastKind::float_to_int: {
+    if (!mlir::isa<mlir::cir::CIRFPTypeInterface>(srcType))
+      return emitOpError() << "requires !cir.float type for source";
+    if (!mlir::dyn_cast<mlir::cir::IntType>(resType))
+      return emitOpError() << "requires !cir.int type for result";
+    return success();
+  }
+  case cir::CastKind::int_to_ptr: {
+    if (!mlir::dyn_cast<mlir::cir::IntType>(srcType))
+      return emitOpError() << "requires !cir.int type for source";
+    if (!mlir::dyn_cast<mlir::cir::PointerType>(resType))
+      return emitOpError() << "requires !cir.ptr type for result";
+    return success();
+  }
+  case cir::CastKind::ptr_to_int: {
+    if (!mlir::dyn_cast<mlir::cir::PointerType>(srcType))
+      return emitOpError() << "requires !cir.ptr type for source";
+    if (!mlir::dyn_cast<mlir::cir::IntType>(resType))
+      return emitOpError() << "requires !cir.int type for result";
+    return success();
+  }
+  case cir::CastKind::float_to_bool: {
+    if (!mlir::isa<mlir::cir::CIRFPTypeInterface>(srcType))
+      return emitOpError() << "requires !cir.float type for source";
+    if (!mlir::isa<mlir::cir::BoolType>(resType))
+      return emitOpError() << "requires !cir.bool type for result";
+    return success();
+  }
+  case cir::CastKind::bool_to_int: {
+    if (!mlir::isa<mlir::cir::BoolType>(srcType))
+      return emitOpError() << "requires !cir.bool type for source";
+    if (!mlir::isa<mlir::cir::IntType>(resType))
+      return emitOpError() << "requires !cir.int type for result";
+    return success();
+  }
+  case cir::CastKind::int_to_float: {
+    if (!mlir::isa<mlir::cir::IntType>(srcType))
+      return emitOpError() << "requires !cir.int type for source";
+    if (!mlir::isa<mlir::cir::CIRFPTypeInterface>(resType))
+      return emitOpError() << "requires !cir.float type for result";
+    return success();
+  }
+  case cir::CastKind::bool_to_float: {
+    if (!mlir::isa<mlir::cir::BoolType>(srcType))
+      return emitOpError() << "requires !cir.bool type for source";
+    if (!mlir::isa<mlir::cir::CIRFPTypeInterface>(resType))
+      return emitOpError() << "requires !cir.float type for result";
+    return success();
+  }
+  case cir::CastKind::address_space: {
+    auto srcPtrTy = mlir::dyn_cast<mlir::cir::PointerType>(srcType);
+    auto resPtrTy = mlir::dyn_cast<mlir::cir::PointerType>(resType);
+    if (!srcPtrTy || !resPtrTy)
+      return emitOpError() << "requires !cir.ptr type for source and result";
+    if (srcPtrTy.getPointee() != resPtrTy.getPointee())
+      return emitOpError() << "requires two types differ in addrspace only";
+    return success();
+  }
+  case cir::CastKind::float_to_complex: {
+    if (!mlir::isa<mlir::cir::CIRFPTypeInterface>(srcType))
+      return emitOpError() << "requires !cir.float type for source";
+    auto resComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(resType);
+    if (!resComplexTy)
+      return emitOpError() << "requires !cir.complex type for result";
+    if (srcType != resComplexTy.getElementTy())
+      return emitOpError() << "requires source type match result element type";
+    return success();
+  }
+  case cir::CastKind::int_to_complex: {
+    if (!mlir::isa<mlir::cir::IntType>(srcType))
+      return emitOpError() << "requires !cir.int type for source";
+    auto resComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(resType);
+    if (!resComplexTy)
+      return emitOpError() << "requires !cir.complex type for result";
+    if (srcType != resComplexTy.getElementTy())
+      return emitOpError() << "requires source type match result element type";
+    return success();
+  }
+  case cir::CastKind::float_complex_to_real: {
+    auto srcComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(srcType);
+    if (!srcComplexTy)
+      return emitOpError() << "requires !cir.complex type for source";
+    if (!mlir::isa<mlir::cir::CIRFPTypeInterface>(resType))
+      return emitOpError() << "requires !cir.float type for result";
+    if (srcComplexTy.getElementTy() != resType)
+      return emitOpError() << "requires source element type match result type";
+    return success();
+  }
+  case cir::CastKind::int_complex_to_real: {
+    auto srcComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(srcType);
+    if (!srcComplexTy)
+      return emitOpError() << "requires !cir.complex type for source";
+    if (!mlir::isa<mlir::cir::IntType>(resType))
+      return emitOpError() << "requires !cir.int type for result";
+    if (srcComplexTy.getElementTy() != resType)
+      return emitOpError() << "requires source element type match result type";
+    return success();
+  }
+  case cir::CastKind::float_complex_to_bool: {
+    auto srcComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(srcType);
+    if (!srcComplexTy ||
+        !mlir::isa<mlir::cir::CIRFPTypeInterface>(srcComplexTy.getElementTy()))
+      return emitOpError()
+             << "requires !cir.complex<!cir.float> type for source";
+    if (!mlir::isa<mlir::cir::BoolType>(resType))
+      return emitOpError() << "requires !cir.bool type for result";
+    return success();
+  }
+  case cir::CastKind::int_complex_to_bool: {
+    auto srcComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(srcType);
+    if (!srcComplexTy ||
+        !mlir::isa<mlir::cir::IntType>(srcComplexTy.getElementTy()))
+      return emitOpError()
+             << "requires !cir.complex<!cir.float> type for source";
+    if (!mlir::isa<mlir::cir::BoolType>(resType))
+      return emitOpError() << "requires !cir.bool type for result";
+    return success();
+  }
+  case cir::CastKind::float_complex: {
+    auto srcComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(srcType);
+    if (!srcComplexTy ||
+        !mlir::isa<mlir::cir::CIRFPTypeInterface>(srcComplexTy.getElementTy()))
+      return emitOpError()
+             << "requires !cir.complex<!cir.float> type for source";
+    auto resComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(resType);
+    if (!resComplexTy ||
+        !mlir::isa<mlir::cir::CIRFPTypeInterface>(resComplexTy.getElementTy()))
+      return emitOpError()
+             << "requires !cir.complex<!cir.float> type for result";
+    return success();
+  }
+  case cir::CastKind::float_complex_to_int_complex: {
+    auto srcComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(srcType);
+    if (!srcComplexTy ||
+        !mlir::isa<mlir::cir::CIRFPTypeInterface>(srcComplexTy.getElementTy()))
+      return emitOpError()
+             << "requires !cir.complex<!cir.float> type for source";
+    auto resComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(resType);
+    if (!resComplexTy ||
+        !mlir::isa<mlir::cir::IntType>(resComplexTy.getElementTy()))
+      return emitOpError() << "requires !cir.complex<!cir.int> type for result";
+    return success();
+  }
+  case cir::CastKind::int_complex: {
+    auto srcComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(srcType);
+    if (!srcComplexTy ||
+        !mlir::isa<mlir::cir::IntType>(srcComplexTy.getElementTy()))
+      return emitOpError() << "requires !cir.complex<!cir.int> type for source";
+    auto resComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(resType);
+    if (!resComplexTy ||
+        !mlir::isa<mlir::cir::IntType>(resComplexTy.getElementTy()))
+      return emitOpError() << "requires !cir.complex<!cir.int> type for result";
+    return success();
+  }
+  case cir::CastKind::int_complex_to_float_complex: {
+    auto srcComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(srcType);
+    if (!srcComplexTy ||
+        !mlir::isa<mlir::cir::IntType>(srcComplexTy.getElementTy()))
+      return emitOpError() << "requires !cir.complex<!cir.int> type for source";
+    auto resComplexTy = mlir::dyn_cast<mlir::cir::ComplexType>(resType);
+    if (!resComplexTy ||
+        !mlir::isa<mlir::cir::CIRFPTypeInterface>(resComplexTy.getElementTy()))
+      return emitOpError()
+             << "requires !cir.complex<!cir.float> type for result";
+    return success();
+  }
+  }
+
+  llvm_unreachable("Unknown CastOp kind?");
+}
+
+bool isIntOrBoolCast(mlir::cir::CastOp op) {
+  auto kind = op.getKind();
+  return kind == mlir::cir::CastKind::bool_to_int ||
+         kind == mlir::cir::CastKind::int_to_bool ||
+         kind == mlir::cir::CastKind::integral;
+}
+
+Value tryFoldCastChain(CastOp op) {
+  CastOp head = op, tail = op;
+
+  while (op) {
+    if (!isIntOrBoolCast(op))
+      break;
+    head = op;
+    op = dyn_cast_or_null<CastOp>(head.getSrc().getDefiningOp());
+  }
+
+  if (head == tail)
+    return {};
+
+  // if bool_to_int -> ...  -> int_to_bool: take the bool
+  // as we had it was before all casts
+  if (head.getKind() == mlir::cir::CastKind::bool_to_int &&
+      tail.getKind() == mlir::cir::CastKind::int_to_bool)
+    return head.getSrc();
+
+  // if int_to_bool -> ...  -> int_to_bool: take the result
+  // of the first one, as no other casts (and ext casts as well)
+  // don't change the first result
+  if (head.getKind() == mlir::cir::CastKind::int_to_bool &&
+      tail.getKind() == mlir::cir::CastKind::int_to_bool)
+    return head.getResult();
+
+  return {};
+}
+
+OpFoldResult CastOp::fold(FoldAdaptor adaptor) {
+  if (getSrc().getType() == getResult().getType()) {
+    switch (getKind()) {
+    case mlir::cir::CastKind::integral: {
+      // TODO: for sign differences, it's possible in certain conditions to
+      // create a new attribute that's capable of representing the source.
+      SmallVector<mlir::OpFoldResult, 1> foldResults;
+      auto foldOrder = getSrc().getDefiningOp()->fold(foldResults);
+      if (foldOrder.succeeded() && foldResults[0].is<mlir::Attribute>())
+        return foldResults[0].get<mlir::Attribute>();
+      return {};
+    }
+    case mlir::cir::CastKind::bitcast:
+    case mlir::cir::CastKind::address_space:
+    case mlir::cir::CastKind::float_complex:
+    case mlir::cir::CastKind::int_complex: {
+      return getSrc();
+    }
+    default:
+      return {};
+    }
+  }
+  return tryFoldCastChain(*this);
+}
+
+static bool isBoolNot(mlir::cir::UnaryOp op) {
+  return isa<BoolType>(op.getInput().getType()) &&
+         op.getKind() == mlir::cir::UnaryOpKind::Not;
+}
+
+// This folder simplifies the sequential boolean not operations.
+// For instance, the next two unary operations will be eliminated:
+//
+// ```mlir
+// %1 = cir.unary(not, %0) : !cir.bool, !cir.bool
+// %2 = cir.unary(not, %1) : !cir.bool, !cir.bool
+// ```
+//
+// and the argument of the first one (%0) will be used instead.
+OpFoldResult UnaryOp::fold(FoldAdaptor adaptor) {
+  if (isBoolNot(*this))
+    if (auto previous = dyn_cast_or_null<UnaryOp>(getInput().getDefiningOp()))
+      if (isBoolNot(previous))
+        return previous.getInput();
+
+  return {};
+}
+
+//===----------------------------------------------------------------------===//
+// DynamicCastOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult DynamicCastOp::verify() {
+  auto resultPointeeTy =
+      mlir::cast<mlir::cir::PointerType>(getType()).getPointee();
+  if (!mlir::isa<mlir::cir::VoidType, mlir::cir::StructType>(resultPointeeTy))
+    return emitOpError()
+           << "cir.dyn_cast must produce a void ptr or struct ptr";
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// ComplexCreateOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult ComplexCreateOp::verify() {
+  if (getType().getElementTy() != getReal().getType()) {
+    emitOpError()
+        << "operand type of cir.complex.create does not match its result type";
+    return failure();
+  }
+
+  return success();
+}
+
+OpFoldResult ComplexCreateOp::fold(FoldAdaptor adaptor) {
+  auto real = adaptor.getReal();
+  auto imag = adaptor.getImag();
+
+  if (!real || !imag)
+    return nullptr;
+
+  // When both of real and imag are constants, we can fold the operation into an
+  // `cir.const #cir.complex` operation.
+
+  auto realAttr = mlir::cast<mlir::TypedAttr>(real);
+  auto imagAttr = mlir::cast<mlir::TypedAttr>(imag);
+  assert(realAttr.getType() == imagAttr.getType() &&
+         "real part and imag part should be of the same type");
+
+  auto complexTy =
+      mlir::cir::ComplexType::get(getContext(), realAttr.getType());
+  return mlir::cir::ComplexAttr::get(complexTy, realAttr, imagAttr);
+}
+
+//===----------------------------------------------------------------------===//
+// ComplexRealOp and ComplexImagOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult ComplexRealOp::verify() {
+  if (getType() != getOperand().getType().getElementTy()) {
+    emitOpError() << "cir.complex.real result type does not match operand type";
+    return failure();
+  }
+  return success();
+}
+
+OpFoldResult ComplexRealOp::fold(FoldAdaptor adaptor) {
+  auto input =
+      mlir::cast_if_present<mlir::cir::ComplexAttr>(adaptor.getOperand());
+  if (input)
+    return input.getReal();
+  return nullptr;
+}
+
+LogicalResult ComplexImagOp::verify() {
+  if (getType() != getOperand().getType().getElementTy()) {
+    emitOpError() << "cir.complex.imag result type does not match operand type";
+    return failure();
+  }
+  return success();
+}
+
+OpFoldResult ComplexImagOp::fold(FoldAdaptor adaptor) {
+  auto input =
+      mlir::cast_if_present<mlir::cir::ComplexAttr>(adaptor.getOperand());
+  if (input)
+    return input.getImag();
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// ComplexRealPtrOp and ComplexImagPtrOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult ComplexRealPtrOp::verify() {
+  auto resultPointeeTy =
+      mlir::cast<mlir::cir::PointerType>(getType()).getPointee();
+  auto operandPtrTy =
+      mlir::cast<mlir::cir::PointerType>(getOperand().getType());
+  auto operandPointeeTy =
+      mlir::cast<mlir::cir::ComplexType>(operandPtrTy.getPointee());
+
+  if (resultPointeeTy != operandPointeeTy.getElementTy()) {
+    emitOpError()
+        << "cir.complex.real_ptr result type does not match operand type";
+    return failure();
+  }
+
+  return success();
+}
+
+LogicalResult ComplexImagPtrOp::verify() {
+  auto resultPointeeTy =
+      mlir::cast<mlir::cir::PointerType>(getType()).getPointee();
+  auto operandPtrTy =
+      mlir::cast<mlir::cir::PointerType>(getOperand().getType());
+  auto operandPointeeTy =
+      mlir::cast<mlir::cir::ComplexType>(operandPtrTy.getPointee());
+
+  if (resultPointeeTy != operandPointeeTy.getElementTy()) {
+    emitOpError()
+        << "cir.complex.imag_ptr result type does not match operand type";
+    return failure();
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// VecCreateOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult VecCreateOp::verify() {
+  // Verify that the number of arguments matches the number of elements in the
+  // vector, and that the type of all the arguments matches the type of the
+  // elements in the vector.
+  auto VecTy = getResult().getType();
+  if (getElements().size() != VecTy.getSize()) {
+    return emitOpError() << "operand count of " << getElements().size()
+                         << " doesn't match vector type " << VecTy
+                         << " element count of " << VecTy.getSize();
+  }
+  auto ElementType = VecTy.getEltType();
+  for (auto Element : getElements()) {
+    if (Element.getType() != ElementType) {
+      return emitOpError() << "operand type " << Element.getType()
+                           << " doesn't match vector element type "
+                           << ElementType;
+    }
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// VecTernaryOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult VecTernaryOp::verify() {
+  // Verify that the condition operand has the same number of elements as the
+  // other operands.  (The automatic verification already checked that all
+  // operands are vector types and that the second and third operands are the
+  // same type.)
+  if (mlir::cast<mlir::cir::VectorType>(getCond().getType()).getSize() !=
+      getVec1().getType().getSize()) {
+    return emitOpError() << ": the number of elements in "
+                         << getCond().getType() << " and "
+                         << getVec1().getType() << " don't match";
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// VecShuffle
+//===----------------------------------------------------------------------===//
+
+LogicalResult VecShuffleOp::verify() {
+  // The number of elements in the indices array must match the number of
+  // elements in the result type.
+  if (getIndices().size() != getResult().getType().getSize()) {
+    return emitOpError() << ": the number of elements in " << getIndices()
+                         << " and " << getResult().getType() << " don't match";
+  }
+  // The element types of the two input vectors and of the result type must
+  // match.
+  if (getVec1().getType().getEltType() != getResult().getType().getEltType()) {
+    return emitOpError() << ": element types of " << getVec1().getType()
+                         << " and " << getResult().getType() << " don't match";
+  }
+  // The indices must all be integer constants
+  if (not std::all_of(getIndices().begin(), getIndices().end(),
+                      [](mlir::Attribute attr) {
+                        return mlir::isa<mlir::cir::IntAttr>(attr);
+                      })) {
+    return emitOpError() << "all index values must be integers";
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// VecShuffleDynamic
+//===----------------------------------------------------------------------===//
+
+LogicalResult VecShuffleDynamicOp::verify() {
+  // The number of elements in the two input vectors must match.
+  if (getVec().getType().getSize() !=
+      mlir::cast<mlir::cir::VectorType>(getIndices().getType()).getSize()) {
+    return emitOpError() << ": the number of elements in " << getVec().getType()
+                         << " and " << getIndices().getType() << " don't match";
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// ReturnOp
+//===----------------------------------------------------------------------===//
+
+static mlir::LogicalResult checkReturnAndFunction(ReturnOp op,
+                                                  cir::FuncOp function) {
+  // ReturnOps currently only have a single optional operand.
+  if (op.getNumOperands() > 1)
+    return op.emitOpError() << "expects at most 1 return operand";
+
+  // Ensure returned type matches the function signature.
+  auto expectedTy = function.getFunctionType().getReturnType();
+  auto actualTy =
+      (op.getNumOperands() == 0 ? mlir::cir::VoidType::get(op.getContext())
+                                : op.getOperand(0).getType());
+  if (actualTy != expectedTy)
+    return op.emitOpError() << "returns " << actualTy
+                            << " but enclosing function returns " << expectedTy;
+
+  return mlir::success();
+}
+
+mlir::LogicalResult ReturnOp::verify() {
+  // Returns can be present in multiple different scopes, get the
+  // wrapping function and start from there.
+  auto *fnOp = getOperation()->getParentOp();
+  while (!isa<cir::FuncOp>(fnOp))
+    fnOp = fnOp->getParentOp();
+
+  // Make sure return types match function return type.
+  if (checkReturnAndFunction(*this, cast<cir::FuncOp>(fnOp)).failed())
+    return failure();
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// ThrowOp
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult ThrowOp::verify() {
+  // For the no-rethrow version, it must have at least the exception pointer.
+  if (rethrows())
+    return success();
+
+  if (getNumOperands() == 1) {
+    if (!getTypeInfo())
+      return emitOpError() << "'type_info' symbol attribute missing";
+    return success();
+  }
+
+  return failure();
+}
+
+//===----------------------------------------------------------------------===//
+// IfOp
+//===----------------------------------------------------------------------===//
+
+ParseResult cir::IfOp::parse(OpAsmParser &parser, OperationState &result) {
+  // Create the regions for 'then'.
+  result.regions.reserve(2);
+  Region *thenRegion = result.addRegion();
+  Region *elseRegion = result.addRegion();
+
+  auto &builder = parser.getBuilder();
+  OpAsmParser::UnresolvedOperand cond;
+  Type boolType = ::mlir::cir::BoolType::get(builder.getContext());
+
+  if (parser.parseOperand(cond) ||
+      parser.resolveOperand(cond, boolType, result.operands))
+    return failure();
+
+  // Parse the 'then' region.
+  auto parseThenLoc = parser.getCurrentLocation();
+  if (parser.parseRegion(*thenRegion, /*arguments=*/{},
+                         /*argTypes=*/{}))
+    return failure();
+  if (ensureRegionTerm(parser, *thenRegion, parseThenLoc).failed())
+    return failure();
+
+  // If we find an 'else' keyword, parse the 'else' region.
+  if (!parser.parseOptionalKeyword("else")) {
+    auto parseElseLoc = parser.getCurrentLocation();
+    if (parser.parseRegion(*elseRegion, /*arguments=*/{}, /*argTypes=*/{}))
+      return failure();
+    if (ensureRegionTerm(parser, *elseRegion, parseElseLoc).failed())
+      return failure();
+  }
+
+  // Parse the optional attribute list.
+  if (parser.parseOptionalAttrDict(result.attributes))
+    return failure();
+  return success();
+}
+
+void cir::IfOp::print(OpAsmPrinter &p) {
+  p << " " << getCondition() << " ";
+  auto &thenRegion = this->getThenRegion();
+  p.printRegion(thenRegion,
+                /*printEntryBlockArgs=*/false,
+                /*printBlockTerminators=*/!omitRegionTerm(thenRegion));
+
+  // Print the 'else' regions if it exists and has a block.
+  auto &elseRegion = this->getElseRegion();
+  if (!elseRegion.empty()) {
+    p << " else ";
+    p.printRegion(elseRegion,
+                  /*printEntryBlockArgs=*/false,
+                  /*printBlockTerminators=*/!omitRegionTerm(elseRegion));
+  }
+
+  p.printOptionalAttrDict(getOperation()->getAttrs());
+}
+
+/// Default callback for IfOp builders. Inserts nothing for now.
+void mlir::cir::buildTerminatedBody(OpBuilder &builder, Location loc) {}
+
+/// Given the region at `index`, or the parent operation if `index` is None,
+/// return the successor regions. These are the regions that may be selected
+/// during the flow of control. `operands` is a set of optional attributes that
+/// correspond to a constant value for each operand, or null if that operand is
+/// not a constant.
+void IfOp::getSuccessorRegions(mlir::RegionBranchPoint point,
+                               SmallVectorImpl<RegionSuccessor> &regions) {
+  // The `then` and the `else` region branch back to the parent operation.
+  if (!point.isParent()) {
+    regions.push_back(RegionSuccessor());
+    return;
+  }
+
+  // Don't consider the else region if it is empty.
+  Region *elseRegion = &this->getElseRegion();
+  if (elseRegion->empty())
+    elseRegion = nullptr;
+
+  // Otherwise, the successor is dependent on the condition.
+  // bool condition;
+  // if (auto condAttr = operands.front().dyn_cast_or_null<IntegerAttr>()) {
+  //   assert(0 && "not implemented");
+  // condition = condAttr.getValue().isOneValue();
+  // Add the successor regions using the condition.
+  // regions.push_back(RegionSuccessor(condition ? &thenRegion() :
+  // elseRegion));
+  // return;
+  // }
+
+  // If the condition isn't constant, both regions may be executed.
+  regions.push_back(RegionSuccessor(&getThenRegion()));
+  // If the else region does not exist, it is not a viable successor.
+  if (elseRegion)
+    regions.push_back(RegionSuccessor(elseRegion));
+  return;
+}
+
+void IfOp::build(OpBuilder &builder, OperationState &result, Value cond,
+                 bool withElseRegion,
+                 function_ref<void(OpBuilder &, Location)> thenBuilder,
+                 function_ref<void(OpBuilder &, Location)> elseBuilder) {
+  assert(thenBuilder && "the builder callback for 'then' must be present");
+
+  result.addOperands(cond);
+
+  OpBuilder::InsertionGuard guard(builder);
+  Region *thenRegion = result.addRegion();
+  builder.createBlock(thenRegion);
+  thenBuilder(builder, result.location);
+
+  Region *elseRegion = result.addRegion();
+  if (!withElseRegion)
+    return;
+
+  builder.createBlock(elseRegion);
+  elseBuilder(builder, result.location);
+}
+
+LogicalResult IfOp::verify() { return success(); }
+
+//===----------------------------------------------------------------------===//
+// ScopeOp
+//===----------------------------------------------------------------------===//
+
+/// Given the region at `index`, or the parent operation if `index` is None,
+/// return the successor regions. These are the regions that may be selected
+/// during the flow of control. `operands` is a set of optional attributes that
+/// correspond to a constant value for each operand, or null if that operand is
+/// not a constant.
+void ScopeOp::getSuccessorRegions(mlir::RegionBranchPoint point,
+                                  SmallVectorImpl<RegionSuccessor> &regions) {
+  // The only region always branch back to the parent operation.
+  if (!point.isParent()) {
+    regions.push_back(RegionSuccessor(getODSResults(0)));
+    return;
+  }
+
+  // If the condition isn't constant, both regions may be executed.
+  regions.push_back(RegionSuccessor(&getScopeRegion()));
+}
+
+void ScopeOp::build(
+    OpBuilder &builder, OperationState &result,
+    function_ref<void(OpBuilder &, Type &, Location)> scopeBuilder) {
+  assert(scopeBuilder && "the builder callback for 'then' must be present");
+
+  OpBuilder::InsertionGuard guard(builder);
+  Region *scopeRegion = result.addRegion();
+  builder.createBlock(scopeRegion);
+
+  mlir::Type yieldTy;
+  scopeBuilder(builder, yieldTy, result.location);
+
+  if (yieldTy)
+    result.addTypes(TypeRange{yieldTy});
+}
+
+void ScopeOp::build(OpBuilder &builder, OperationState &result,
+                    function_ref<void(OpBuilder &, Location)> scopeBuilder) {
+  assert(scopeBuilder && "the builder callback for 'then' must be present");
+  OpBuilder::InsertionGuard guard(builder);
+  Region *scopeRegion = result.addRegion();
+  builder.createBlock(scopeRegion);
+  scopeBuilder(builder, result.location);
+}
+
+LogicalResult ScopeOp::verify() { return success(); }
+
+//===----------------------------------------------------------------------===//
+// TryOp
+//===----------------------------------------------------------------------===//
+
+void TryOp::build(
+    OpBuilder &builder, OperationState &result,
+    function_ref<void(OpBuilder &, Location)> tryBodyBuilder,
+    function_ref<void(OpBuilder &, Location, OperationState &)> catchBuilder) {
+  assert(tryBodyBuilder && "expected builder callback for 'cir.try' body");
+
+  OpBuilder::InsertionGuard guard(builder);
+
+  // Try body region
+  Region *tryBodyRegion = result.addRegion();
+
+  // Create try body region and set insertion point
+  builder.createBlock(tryBodyRegion);
+  tryBodyBuilder(builder, result.location);
+  catchBuilder(builder, result.location, result);
+}
+
+void TryOp::getSuccessorRegions(mlir::RegionBranchPoint point,
+                                SmallVectorImpl<RegionSuccessor> &regions) {
+  // If any index all the underlying regions branch back to the parent
+  // operation.
+  if (!point.isParent()) {
+    regions.push_back(RegionSuccessor());
+    return;
+  }
+
+  // If the condition isn't constant, both regions may be executed.
+  regions.push_back(RegionSuccessor(&getTryRegion()));
+
+  // FIXME: optimize, ideas include:
+  // - If we know a target function never throws a specific type, we can
+  //   remove the catch handler.
+  for (auto &r : this->getCatchRegions())
+    regions.push_back(RegionSuccessor(&r));
+}
+
+void printCatchRegions(OpAsmPrinter &p, TryOp op,
+                       mlir::MutableArrayRef<::mlir::Region> regions,
+                       mlir::ArrayAttr catchList) {
+
+  int currCatchIdx = 0;
+  if (!catchList)
+    return;
+  p << "catch [";
+  llvm::interleaveComma(catchList, p, [&](const Attribute &a) {
+    auto exRtti = a;
+
+    if (mlir::isa<mlir::cir::CatchUnwindAttr>(a)) {
+      p.printAttribute(a);
+      p << " ";
+    } else if (!exRtti) {
+      p << "all";
+    } else {
+      p << "type ";
+      p.printAttribute(exRtti);
+      p << " ";
+    }
+    p.printRegion(regions[currCatchIdx], /*printEntryBLockArgs=*/false,
+                  /*printBlockTerminators=*/true);
+    currCatchIdx++;
+  });
+  p << "]";
+}
+
+ParseResult parseCatchRegions(
+    OpAsmParser &parser,
+    llvm::SmallVectorImpl<std::unique_ptr<::mlir::Region>> &regions,
+    ::mlir::ArrayAttr &catchersAttr) {
+  SmallVector<mlir::Attribute, 4> catchList;
+
+  auto parseAndCheckRegion = [&]() -> ParseResult {
+    // Parse region attached to catch
+    regions.emplace_back(new Region);
+    Region &currRegion = *regions.back().get();
+    auto parserLoc = parser.getCurrentLocation();
+    if (parser.parseRegion(currRegion, /*arguments=*/{}, /*argTypes=*/{})) {
+      regions.clear();
+      return failure();
+    }
+
+    if (currRegion.empty()) {
+      return parser.emitError(parser.getCurrentLocation(),
+                              "catch region shall not be empty");
+    }
+
+    if (!(currRegion.back().mightHaveTerminator() &&
+          currRegion.back().getTerminator()))
+      return parser.emitError(
+          parserLoc, "blocks are expected to be explicitly terminated");
+
+    return success();
+  };
+
+  auto parseCatchEntry = [&]() -> ParseResult {
+    mlir::Type exceptionType;
+    mlir::Attribute exceptionTypeInfo;
+
+    // FIXME: support most recent syntax, currently broken.
+    ::llvm::StringRef attrStr;
+    if (!parser.parseOptionalKeyword(&attrStr, {"all"})) {
+      if (parser.parseKeyword("type").failed())
+        return parser.emitError(parser.getCurrentLocation(),
+                                "expected 'type' keyword here");
+      if (parser.parseType(exceptionType).failed())
+        return parser.emitError(parser.getCurrentLocation(),
+                                "expected valid exception type");
+      if (parser.parseAttribute(exceptionTypeInfo).failed())
+        return parser.emitError(parser.getCurrentLocation(),
+                                "expected valid RTTI info attribute");
+    }
+    catchList.push_back(exceptionTypeInfo);
+    return parseAndCheckRegion();
+  };
+
+  if (parser.parseKeyword("catch").failed())
+    return parser.emitError(parser.getCurrentLocation(),
+                            "expected 'catch' keyword here");
+
+  if (parser
+          .parseCommaSeparatedList(OpAsmParser::Delimiter::Square,
+                                   parseCatchEntry, " in catch list")
+          .failed())
+    return failure();
+
+  catchersAttr = parser.getBuilder().getArrayAttr(catchList);
+  return ::mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// TernaryOp
+//===----------------------------------------------------------------------===//
+
+/// Given the region at `index`, or the parent operation if `index` is None,
+/// return the successor regions. These are the regions that may be selected
+/// during the flow of control. `operands` is a set of optional attributes that
+/// correspond to a constant value for each operand, or null if that operand is
+/// not a constant.
+void TernaryOp::getSuccessorRegions(mlir::RegionBranchPoint point,
+                                    SmallVectorImpl<RegionSuccessor> &regions) {
+  // The `true` and the `false` region branch back to the parent operation.
+  if (!point.isParent()) {
+    regions.push_back(RegionSuccessor(this->getODSResults(0)));
+    return;
+  }
+
+  // Try optimize if we have more information
+  // if (auto condAttr = operands.front().dyn_cast_or_null<IntegerAttr>()) {
+  //   assert(0 && "not implemented");
+  // }
+
+  // If the condition isn't constant, both regions may be executed.
+  regions.push_back(RegionSuccessor(&getTrueRegion()));
+  regions.push_back(RegionSuccessor(&getFalseRegion()));
+  return;
+}
+
+void TernaryOp::build(OpBuilder &builder, OperationState &result, Value cond,
+                      function_ref<void(OpBuilder &, Location)> trueBuilder,
+                      function_ref<void(OpBuilder &, Location)> falseBuilder) {
+  result.addOperands(cond);
+  OpBuilder::InsertionGuard guard(builder);
+  Region *trueRegion = result.addRegion();
+  auto *block = builder.createBlock(trueRegion);
+  trueBuilder(builder, result.location);
+  Region *falseRegion = result.addRegion();
+  builder.createBlock(falseRegion);
+  falseBuilder(builder, result.location);
+
+  auto yield = dyn_cast<YieldOp>(block->getTerminator());
+  assert((yield && yield.getNumOperands() <= 1) &&
+         "expected zero or one result type");
+  if (yield.getNumOperands() == 1)
+    result.addTypes(TypeRange{yield.getOperandTypes().front()});
+}
+
+//===----------------------------------------------------------------------===//
+// SelectOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult SelectOp::fold(FoldAdaptor adaptor) {
+  auto condition = adaptor.getCondition();
+  if (condition) {
+    auto conditionValue = mlir::cast<mlir::cir::BoolAttr>(condition).getValue();
+    return conditionValue ? getTrueValue() : getFalseValue();
+  }
+
+  // cir.select if %0 then x else x -> x
+  auto trueValue = adaptor.getTrueValue();
+  auto falseValue = adaptor.getFalseValue();
+  if (trueValue && trueValue == falseValue)
+    return trueValue;
+  if (getTrueValue() == getFalseValue())
+    return getTrueValue();
+
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// BrOp
+//===----------------------------------------------------------------------===//
+
+mlir::SuccessorOperands BrOp::getSuccessorOperands(unsigned index) {
+  assert(index == 0 && "invalid successor index");
+  return mlir::SuccessorOperands(getDestOperandsMutable());
+}
+
+Block *BrOp::getSuccessorForOperands(ArrayRef<Attribute>) { return getDest(); }
+
+//===----------------------------------------------------------------------===//
+// BrCondOp
+//===----------------------------------------------------------------------===//
+
+mlir::SuccessorOperands BrCondOp::getSuccessorOperands(unsigned index) {
+  assert(index < getNumSuccessors() && "invalid successor index");
+  return SuccessorOperands(index == 0 ? getDestOperandsTrueMutable()
+                                      : getDestOperandsFalseMutable());
+}
+
+Block *BrCondOp::getSuccessorForOperands(ArrayRef<Attribute> operands) {
+  if (IntegerAttr condAttr = dyn_cast_if_present<IntegerAttr>(operands.front()))
+    return condAttr.getValue().isOne() ? getDestTrue() : getDestFalse();
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// SwitchOp
+//===----------------------------------------------------------------------===//
+
+ParseResult
+parseSwitchOp(OpAsmParser &parser,
+              llvm::SmallVectorImpl<std::unique_ptr<::mlir::Region>> &regions,
+              ::mlir::ArrayAttr &casesAttr,
+              mlir::OpAsmParser::UnresolvedOperand &cond,
+              mlir::Type &condType) {
+  mlir::cir::IntType intCondType;
+  SmallVector<mlir::Attribute, 4> cases;
+
+  auto parseAndCheckRegion = [&]() -> ParseResult {
+    // Parse region attached to case
+    regions.emplace_back(new Region);
+    Region &currRegion = *regions.back().get();
+    auto parserLoc = parser.getCurrentLocation();
+    if (parser.parseRegion(currRegion, /*arguments=*/{}, /*argTypes=*/{})) {
+      regions.clear();
+      return failure();
+    }
+
+    if (currRegion.empty()) {
+      return parser.emitError(parser.getCurrentLocation(),
+                              "case region shall not be empty");
+    }
+
+    if (!(currRegion.back().mightHaveTerminator() &&
+          currRegion.back().getTerminator()))
+      return parser.emitError(parserLoc,
+                              "case regions must be explicitly terminated");
+
+    return success();
+  };
+
+  auto parseCase = [&]() -> ParseResult {
+    auto loc = parser.getCurrentLocation();
+    if (parser.parseKeyword("case").failed())
+      return parser.emitError(loc, "expected 'case' keyword here");
+
+    if (parser.parseLParen().failed())
+      return parser.emitError(parser.getCurrentLocation(), "expected '('");
+
+    ::llvm::StringRef attrStr;
+    ::mlir::NamedAttrList attrStorage;
+
+    //   case (equal, 20) {
+    //   ...
+    // 1. Get the case kind
+    // 2. Get the value (next in list)
+
+    // These needs to be in sync with CIROps.td
+    if (parser.parseOptionalKeyword(&attrStr,
+                                    {"default", "equal", "anyof", "range"})) {
+      ::mlir::StringAttr attrVal;
+      ::mlir::OptionalParseResult parseResult = parser.parseOptionalAttribute(
+          attrVal, parser.getBuilder().getNoneType(), "kind", attrStorage);
+      if (parseResult.has_value()) {
+        if (failed(*parseResult))
+          return ::mlir::failure();
+        attrStr = attrVal.getValue();
+      }
+    }
+
+    if (attrStr.empty()) {
+      return parser.emitError(
+          loc,
+          "expected string or keyword containing one of the following "
+          "enum values for attribute 'kind' [default, equal, anyof, range]");
+    }
+
+    auto attrOptional = ::mlir::cir::symbolizeCaseOpKind(attrStr.str());
+    if (!attrOptional)
+      return parser.emitError(loc, "invalid ")
+             << "kind attribute specification: \"" << attrStr << '"';
+
+    auto kindAttr = ::mlir::cir::CaseOpKindAttr::get(
+        parser.getBuilder().getContext(), attrOptional.value());
+
+    // `,` value or `,` [values,...]
+    SmallVector<mlir::Attribute, 4> caseEltValueListAttr;
+    mlir::ArrayAttr caseValueList;
+
+    switch (kindAttr.getValue()) {
+    case cir::CaseOpKind::Equal: {
+      if (parser.parseComma().failed())
+        return mlir::failure();
+      int64_t val = 0;
+      if (parser.parseInteger(val).failed())
+        return ::mlir::failure();
+      caseEltValueListAttr.push_back(mlir::cir::IntAttr::get(intCondType, val));
+      break;
+    }
+    case cir::CaseOpKind::Range:
+    case cir::CaseOpKind::Anyof: {
+      if (parser.parseComma().failed())
+        return mlir::failure();
+      if (parser.parseLSquare().failed())
+        return mlir::failure();
+      if (parser.parseCommaSeparatedList([&]() {
+            int64_t val = 0;
+            if (parser.parseInteger(val).failed())
+              return ::mlir::failure();
+            caseEltValueListAttr.push_back(
+                mlir::cir::IntAttr::get(intCondType, val));
+            return ::mlir::success();
+          }))
+        return mlir::failure();
+      if (parser.parseRSquare().failed())
+        return mlir::failure();
+      break;
+    }
+    case cir::CaseOpKind::Default: {
+      if (parser.parseRParen().failed())
+        return parser.emitError(parser.getCurrentLocation(), "expected ')'");
+      cases.push_back(cir::CaseAttr::get(
+          parser.getContext(), parser.getBuilder().getArrayAttr({}), kindAttr));
+      return parseAndCheckRegion();
+    }
+    }
+
+    caseValueList = parser.getBuilder().getArrayAttr(caseEltValueListAttr);
+    cases.push_back(
+        cir::CaseAttr::get(parser.getContext(), caseValueList, kindAttr));
+    if (succeeded(parser.parseOptionalColon())) {
+      Type caseIntTy;
+      if (parser.parseType(caseIntTy).failed())
+        return parser.emitError(parser.getCurrentLocation(), "expected type");
+      if (intCondType != caseIntTy)
+        return parser.emitError(parser.getCurrentLocation(),
+                                "expected a match with the condition type");
+    }
+    if (parser.parseRParen().failed())
+      return parser.emitError(parser.getCurrentLocation(), "expected ')'");
+    return parseAndCheckRegion();
+  };
+
+  if (parser.parseLParen())
+    return ::mlir::failure();
+
+  if (parser.parseOperand(cond))
+    return ::mlir::failure();
+  if (parser.parseColon())
+    return ::mlir::failure();
+  if (parser.parseCustomTypeWithFallback(intCondType))
+    return ::mlir::failure();
+  condType = intCondType;
+  if (parser.parseRParen())
+    return ::mlir::failure();
+
+  if (parser
+          .parseCommaSeparatedList(OpAsmParser::Delimiter::Square, parseCase,
+                                   " in cases list")
+          .failed())
+    return failure();
+
+  casesAttr = parser.getBuilder().getArrayAttr(cases);
+  return ::mlir::success();
+}
+
+void printSwitchOp(OpAsmPrinter &p, SwitchOp op,
+                   mlir::MutableArrayRef<::mlir::Region> regions,
+                   mlir::ArrayAttr casesAttr, mlir::Value condition,
+                   mlir::Type condType) {
+  int idx = 0, lastIdx = regions.size() - 1;
+
+  p << "(";
+  p << condition;
+  p << " : ";
+  p.printStrippedAttrOrType(condType);
+  p << ") [";
+  // FIXME: ideally we want some extra indentation for "cases" but too
+  // cumbersome to pull it out now, since most handling is private. Perhaps
+  // better improve overall mechanism.
+  p.printNewline();
+  for (auto &r : regions) {
+    p << "case (";
+
+    auto attr = cast<CaseAttr>(casesAttr[idx]);
+    auto kind = attr.getKind().getValue();
+    assert((kind == CaseOpKind::Default || kind == CaseOpKind::Equal ||
+            kind == CaseOpKind::Anyof || kind == CaseOpKind::Range) &&
+           "unknown case");
+
+    // Case kind
+    p << stringifyCaseOpKind(kind);
+
+    // Case value
+    switch (kind) {
+    case cir::CaseOpKind::Equal: {
+      p << ", ";
+      auto intAttr = cast<cir::IntAttr>(attr.getValue()[0]);
+      auto intAttrTy = cast<cir::IntType>(intAttr.getType());
+      (intAttrTy.isSigned() ? p << intAttr.getSInt() : p << intAttr.getUInt());
+      break;
+    }
+    case cir::CaseOpKind::Range:
+      assert(attr.getValue().size() == 2 && "range must have two values");
+      // The print format of the range is the same as anyof
+      LLVM_FALLTHROUGH;
+    case cir::CaseOpKind::Anyof: {
+      p << ", [";
+      llvm::interleaveComma(attr.getValue(), p, [&](const Attribute &a) {
+        auto intAttr = cast<cir::IntAttr>(a);
+        auto intAttrTy = cast<cir::IntType>(intAttr.getType());
+        (intAttrTy.isSigned() ? p << intAttr.getSInt()
+                              : p << intAttr.getUInt());
+      });
+      p << "] : ";
+      auto typedAttr = dyn_cast<TypedAttr>(attr.getValue()[0]);
+      assert(typedAttr && "this should never not have a type!");
+      p.printType(typedAttr.getType());
+      break;
+    }
+    case cir::CaseOpKind::Default:
+      break;
+    }
+
+    p << ") ";
+    p.printRegion(r, /*printEntryBLockArgs=*/false,
+                  /*printBlockTerminators=*/true);
+    if (idx < lastIdx)
+      p << ",";
+    p.printNewline();
+    idx++;
+  }
+  p << "]";
+}
+
+/// Given the region at `index`, or the parent operation if `index` is None,
+/// return the successor regions. These are the regions that may be selected
+/// during the flow of control. `operands` is a set of optional attributes
+/// that correspond to a constant value for each operand, or null if that
+/// operand is not a constant.
+void SwitchOp::getSuccessorRegions(mlir::RegionBranchPoint point,
+                                   SmallVectorImpl<RegionSuccessor> &regions) {
+  // If any index all the underlying regions branch back to the parent
+  // operation.
+  if (!point.isParent()) {
+    regions.push_back(RegionSuccessor());
+    return;
+  }
+
+  // for (auto &r : this->getRegions()) {
+  // If we can figure out the case stmt we are landing, this can be
+  // overly simplified.
+  // bool condition;
+  // if (auto condAttr = operands.front().dyn_cast_or_null<IntegerAttr>()) {
+  //   assert(0 && "not implemented");
+  //   (void)r;
+  // condition = condAttr.getValue().isOneValue();
+  // Add the successor regions using the condition.
+  // regions.push_back(RegionSuccessor(condition ? &thenRegion() :
+  // elseRegion));
+  // return;
+  // }
+  // }
+
+  // If the condition isn't constant, all regions may be executed.
+  for (auto &r : this->getRegions())
+    regions.push_back(RegionSuccessor(&r));
+}
+
+LogicalResult SwitchOp::verify() {
+  if (getCases().has_value() && getCases()->size() != getNumRegions())
+    return emitOpError("number of cases attributes and regions must match");
+  return success();
+}
+
+void SwitchOp::build(
+    OpBuilder &builder, OperationState &result, Value cond,
+    function_ref<void(OpBuilder &, Location, OperationState &)> switchBuilder) {
+  assert(switchBuilder && "the builder callback for regions must be present");
+  OpBuilder::InsertionGuard guardSwitch(builder);
+  result.addOperands({cond});
+  switchBuilder(builder, result.location, result);
+}
+
+//===----------------------------------------------------------------------===//
+// SwitchFlatOp
+//===----------------------------------------------------------------------===//
+
+void SwitchFlatOp::build(OpBuilder &builder, OperationState &result,
+                         Value value, Block *defaultDestination,
+                         ValueRange defaultOperands, ArrayRef<APInt> caseValues,
+                         BlockRange caseDestinations,
+                         ArrayRef<ValueRange> caseOperands) {
+
+  std::vector<mlir::Attribute> caseValuesAttrs;
+  for (auto &val : caseValues) {
+    caseValuesAttrs.push_back(mlir::cir::IntAttr::get(value.getType(), val));
+  }
+  auto attrs = ArrayAttr::get(builder.getContext(), caseValuesAttrs);
+
+  build(builder, result, value, defaultOperands, caseOperands, attrs,
+        defaultDestination, caseDestinations);
+}
+
+/// <cases> ::= `[` (case (`,` case )* )? `]`
+/// <case>  ::= integer `:` bb-id (`(` ssa-use-and-type-list `)`)?
+static ParseResult parseSwitchFlatOpCases(
+    OpAsmParser &parser, Type flagType, mlir::ArrayAttr &caseValues,
+    SmallVectorImpl<Block *> &caseDestinations,
+    SmallVectorImpl<SmallVector<OpAsmParser::UnresolvedOperand>> &caseOperands,
+    SmallVectorImpl<SmallVector<Type>> &caseOperandTypes) {
+  if (failed(parser.parseLSquare()))
+    return failure();
+  if (succeeded(parser.parseOptionalRSquare()))
+    return success();
+  SmallVector<mlir::Attribute> values;
+
+  auto parseCase = [&]() {
+    int64_t value = 0;
+    if (failed(parser.parseInteger(value)))
+      return failure();
+
+    values.push_back(IntAttr::get(flagType, value));
+
+    Block *destination;
+    SmallVector<OpAsmParser::UnresolvedOperand> operands;
+    SmallVector<Type> operandTypes;
+    if (parser.parseColon() || parser.parseSuccessor(destination))
+      return failure();
+    if (!parser.parseOptionalLParen()) {
+      if (parser.parseOperandList(operands, OpAsmParser::Delimiter::None,
+                                  /*allowResultNumber=*/false) ||
+          parser.parseColonTypeList(operandTypes) || parser.parseRParen())
+        return failure();
+    }
+    caseDestinations.push_back(destination);
+    caseOperands.emplace_back(operands);
+    caseOperandTypes.emplace_back(operandTypes);
+    return success();
+  };
+  if (failed(parser.parseCommaSeparatedList(parseCase)))
+    return failure();
+
+  caseValues = ArrayAttr::get(flagType.getContext(), values);
+
+  return parser.parseRSquare();
+}
+
+static void printSwitchFlatOpCases(OpAsmPrinter &p, SwitchFlatOp op,
+                                   Type flagType, mlir::ArrayAttr caseValues,
+                                   SuccessorRange caseDestinations,
+                                   OperandRangeRange caseOperands,
+                                   const TypeRangeRange &caseOperandTypes) {
+  p << '[';
+  p.printNewline();
+  if (!caseValues) {
+    p << ']';
+    return;
+  }
+
+  size_t index = 0;
+  llvm::interleave(
+      llvm::zip(caseValues, caseDestinations),
+      [&](auto i) {
+        p << "  ";
+        mlir::Attribute a = std::get<0>(i);
+        p << mlir::cast<mlir::cir::IntAttr>(a).getValue();
+        p << ": ";
+        p.printSuccessorAndUseList(std::get<1>(i), caseOperands[index++]);
+      },
+      [&] {
+        p << ',';
+        p.printNewline();
+      });
+  p.printNewline();
+  p << ']';
+}
+
+//===----------------------------------------------------------------------===//
+// LoopOpInterface Methods
+//===----------------------------------------------------------------------===//
+
+void DoWhileOp::getSuccessorRegions(
+    ::mlir::RegionBranchPoint point,
+    ::llvm::SmallVectorImpl<::mlir::RegionSuccessor> &regions) {
+  LoopOpInterface::getLoopOpSuccessorRegions(*this, point, regions);
+}
+
+::llvm::SmallVector<Region *> DoWhileOp::getLoopRegions() {
+  return {&getBody()};
+}
+
+void WhileOp::getSuccessorRegions(
+    ::mlir::RegionBranchPoint point,
+    ::llvm::SmallVectorImpl<::mlir::RegionSuccessor> &regions) {
+  LoopOpInterface::getLoopOpSuccessorRegions(*this, point, regions);
+}
+
+::llvm::SmallVector<Region *> WhileOp::getLoopRegions() { return {&getBody()}; }
+
+void ForOp::getSuccessorRegions(
+    ::mlir::RegionBranchPoint point,
+    ::llvm::SmallVectorImpl<::mlir::RegionSuccessor> &regions) {
+  LoopOpInterface::getLoopOpSuccessorRegions(*this, point, regions);
+}
+
+::llvm::SmallVector<Region *> ForOp::getLoopRegions() { return {&getBody()}; }
+
+//===----------------------------------------------------------------------===//
+// GlobalOp
+//===----------------------------------------------------------------------===//
+
+static ParseResult parseConstantValue(OpAsmParser &parser,
+                                      mlir::Attribute &valueAttr) {
+  NamedAttrList attr;
+  return parser.parseAttribute(valueAttr, "value", attr);
+}
+
+// FIXME: create a CIRConstAttr and hide this away for both global
+// initialization and cir.const operation.
+static void printConstant(OpAsmPrinter &p, Attribute value) {
+  p.printAttribute(value);
+}
+
+static ParseResult parseGlobalOpAddrSpace(OpAsmParser &p,
+                                          AddressSpaceAttr &addrSpaceAttr) {
+  return parseAddrSpaceAttribute(p, addrSpaceAttr);
+}
+
+static void printGlobalOpAddrSpace(OpAsmPrinter &p, GlobalOp op,
+                                   AddressSpaceAttr addrSpaceAttr) {
+  printAddrSpaceAttribute(p, addrSpaceAttr);
+}
+
+static void printGlobalOpTypeAndInitialValue(OpAsmPrinter &p, GlobalOp op,
+                                             TypeAttr type, Attribute initAttr,
+                                             mlir::Region &ctorRegion,
+                                             mlir::Region &dtorRegion) {
+  auto printType = [&]() { p << ": " << type; };
+  if (!op.isDeclaration()) {
+    p << "= ";
+    if (!ctorRegion.empty()) {
+      p << "ctor ";
+      printType();
+      p << " ";
+      p.printRegion(ctorRegion,
+                    /*printEntryBlockArgs=*/false,
+                    /*printBlockTerminators=*/false);
+    } else {
+      // This also prints the type...
+      if (initAttr)
+        printConstant(p, initAttr);
+    }
+
+    if (!dtorRegion.empty()) {
+      p << " dtor ";
+      p.printRegion(dtorRegion,
+                    /*printEntryBlockArgs=*/false,
+                    /*printBlockTerminators=*/false);
+    }
+  } else {
+    printType();
+  }
+}
+
+static ParseResult parseGlobalOpTypeAndInitialValue(OpAsmParser &parser,
+                                                    TypeAttr &typeAttr,
+                                                    Attribute &initialValueAttr,
+                                                    mlir::Region &ctorRegion,
+                                                    mlir::Region &dtorRegion) {
+  mlir::Type opTy;
+  if (parser.parseOptionalEqual().failed()) {
+    // Absence of equal means a declaration, so we need to parse the type.
+    //  cir.global @a : i32
+    if (parser.parseColonType(opTy))
+      return failure();
+  } else {
+    // Parse contructor, example:
+    //  cir.global @rgb = ctor : type { ... }
+    if (!parser.parseOptionalKeyword("ctor")) {
+      if (parser.parseColonType(opTy))
+        return failure();
+      auto parseLoc = parser.getCurrentLocation();
+      if (parser.parseRegion(ctorRegion, /*arguments=*/{}, /*argTypes=*/{}))
+        return failure();
+      if (!ctorRegion.hasOneBlock())
+        return parser.emitError(parser.getCurrentLocation(),
+                                "ctor region must have exactly one block");
+      if (ctorRegion.back().empty())
+        return parser.emitError(parser.getCurrentLocation(),
+                                "ctor region shall not be empty");
+      if (ensureRegionTerm(parser, ctorRegion, parseLoc).failed())
+        return failure();
+    } else {
+      // Parse constant with initializer, examples:
+      //  cir.global @y = 3.400000e+00 : f32
+      //  cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>>
+      if (parseConstantValue(parser, initialValueAttr).failed())
+        return failure();
+
+      assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) &&
+             "Non-typed attrs shouldn't appear here.");
+      auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr);
+      opTy = typedAttr.getType();
+    }
+
+    // Parse destructor, example:
+    //   dtor { ... }
+    if (!parser.parseOptionalKeyword("dtor")) {
+      auto parseLoc = parser.getCurrentLocation();
+      if (parser.parseRegion(dtorRegion, /*arguments=*/{}, /*argTypes=*/{}))
+        return failure();
+      if (!dtorRegion.hasOneBlock())
+        return parser.emitError(parser.getCurrentLocation(),
+                                "dtor region must have exactly one block");
+      if (dtorRegion.back().empty())
+        return parser.emitError(parser.getCurrentLocation(),
+                                "dtor region shall not be empty");
+      if (ensureRegionTerm(parser, dtorRegion, parseLoc).failed())
+        return failure();
+    }
+  }
+
+  typeAttr = TypeAttr::get(opTy);
+  return success();
+}
+
+LogicalResult GlobalOp::verify() {
+  // Verify that the initial value, if present, is either a unit attribute or
+  // an attribute CIR supports.
+  if (getInitialValue().has_value()) {
+    if (checkConstantTypes(getOperation(), getSymType(), *getInitialValue())
+            .failed())
+      return failure();
+  }
+
+  // Verify that the constructor region, if present, has only one block which is
+  // not empty.
+  auto &ctorRegion = getCtorRegion();
+  if (!ctorRegion.empty()) {
+    if (!ctorRegion.hasOneBlock()) {
+      return emitError() << "ctor region must have exactly one block.";
+    }
+
+    auto &block = ctorRegion.front();
+    if (block.empty()) {
+      return emitError() << "ctor region shall not be empty.";
+    }
+  }
+
+  // Verify that the destructor region, if present, has only one block which is
+  // not empty.
+  auto &dtorRegion = getDtorRegion();
+  if (!dtorRegion.empty()) {
+    if (!dtorRegion.hasOneBlock()) {
+      return emitError() << "dtor region must have exactly one block.";
+    }
+
+    auto &block = dtorRegion.front();
+    if (block.empty()) {
+      return emitError() << "dtor region shall not be empty.";
+    }
+  }
+
+  if (std::optional<uint64_t> alignAttr = getAlignment()) {
+    uint64_t alignment = alignAttr.value();
+    if (!llvm::isPowerOf2_64(alignment))
+      return emitError() << "alignment attribute value " << alignment
+                         << " is not a power of 2";
+  }
+
+  switch (getLinkage()) {
+  case GlobalLinkageKind::InternalLinkage:
+  case GlobalLinkageKind::PrivateLinkage:
+    if (isPublic())
+      return emitError() << "public visibility not allowed with '"
+                         << stringifyGlobalLinkageKind(getLinkage())
+                         << "' linkage";
+    break;
+  case GlobalLinkageKind::ExternalLinkage:
+  case GlobalLinkageKind::ExternalWeakLinkage:
+  case GlobalLinkageKind::LinkOnceODRLinkage:
+  case GlobalLinkageKind::LinkOnceAnyLinkage:
+  case GlobalLinkageKind::CommonLinkage:
+  case GlobalLinkageKind::WeakAnyLinkage:
+  case GlobalLinkageKind::WeakODRLinkage:
+    // FIXME: mlir's concept of visibility gets tricky with LLVM ones,
+    // for instance, symbol declarations cannot be "public", so we
+    // have to mark them "private" to workaround the symbol verifier.
+    if (isPrivate() && !isDeclaration())
+      return emitError() << "private visibility not allowed with '"
+                         << stringifyGlobalLinkageKind(getLinkage())
+                         << "' linkage";
+    break;
+  default:
+    emitError() << stringifyGlobalLinkageKind(getLinkage())
+                << ": verifier not implemented\n";
+    return failure();
+  }
+
+  // TODO: verify visibility for declarations?
+  return success();
+}
+
+void GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState,
+                     StringRef sym_name, Type sym_type, bool isConstant,
+                     cir::GlobalLinkageKind linkage,
+                     cir::AddressSpaceAttr addrSpace,
+                     function_ref<void(OpBuilder &, Location)> ctorBuilder,
+                     function_ref<void(OpBuilder &, Location)> dtorBuilder) {
+  odsState.addAttribute(getSymNameAttrName(odsState.name),
+                        odsBuilder.getStringAttr(sym_name));
+  odsState.addAttribute(getSymTypeAttrName(odsState.name),
+                        ::mlir::TypeAttr::get(sym_type));
+  if (isConstant)
+    odsState.addAttribute(getConstantAttrName(odsState.name),
+                          odsBuilder.getUnitAttr());
+
+  ::mlir::cir::GlobalLinkageKindAttr linkageAttr =
+      cir::GlobalLinkageKindAttr::get(odsBuilder.getContext(), linkage);
+  odsState.addAttribute(getLinkageAttrName(odsState.name), linkageAttr);
+
+  if (addrSpace)
+    odsState.addAttribute(getAddrSpaceAttrName(odsState.name), addrSpace);
+
+  Region *ctorRegion = odsState.addRegion();
+  if (ctorBuilder) {
+    odsBuilder.createBlock(ctorRegion);
+    ctorBuilder(odsBuilder, odsState.location);
+  }
+
+  Region *dtorRegion = odsState.addRegion();
+  if (dtorBuilder) {
+    odsBuilder.createBlock(dtorRegion);
+    dtorBuilder(odsBuilder, odsState.location);
+  }
+
+  odsState.addAttribute(
+      getGlobalVisibilityAttrName(odsState.name),
+      mlir::cir::VisibilityAttr::get(odsBuilder.getContext()));
+}
+
+/// Given the region at `index`, or the parent operation if `index` is None,
+/// return the successor regions. These are the regions that may be selected
+/// during the flow of control. `operands` is a set of optional attributes that
+/// correspond to a constant value for each operand, or null if that operand is
+/// not a constant.
+void GlobalOp::getSuccessorRegions(mlir::RegionBranchPoint point,
+                                   SmallVectorImpl<RegionSuccessor> &regions) {
+  // The `ctor` and `dtor` regions always branch back to the parent operation.
+  if (!point.isParent()) {
+    regions.push_back(RegionSuccessor());
+    return;
+  }
+
+  // Don't consider the ctor region if it is empty.
+  Region *ctorRegion = &this->getCtorRegion();
+  if (ctorRegion->empty())
+    ctorRegion = nullptr;
+
+  // Don't consider the dtor region if it is empty.
+  Region *dtorRegion = &this->getCtorRegion();
+  if (dtorRegion->empty())
+    dtorRegion = nullptr;
+
+  // If the condition isn't constant, both regions may be executed.
+  if (ctorRegion)
+    regions.push_back(RegionSuccessor(ctorRegion));
+  if (dtorRegion)
+    regions.push_back(RegionSuccessor(dtorRegion));
+}
+
+//===----------------------------------------------------------------------===//
+// GetGlobalOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult
+GetGlobalOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
+  // Verify that the result type underlying pointer type matches the type of
+  // the referenced cir.global or cir.func op.
+  auto op = symbolTable.lookupNearestSymbolFrom(*this, getNameAttr());
+  if (!(isa<GlobalOp>(op) || isa<FuncOp>(op)))
+    return emitOpError("'")
+           << getName()
+           << "' does not reference a valid cir.global or cir.func";
+
+  mlir::Type symTy;
+  mlir::cir::AddressSpaceAttr symAddrSpace{};
+  if (auto g = dyn_cast<GlobalOp>(op)) {
+    symTy = g.getSymType();
+    symAddrSpace = g.getAddrSpaceAttr();
+    // Verify that for thread local global access, the global needs to
+    // be marked with tls bits.
+    if (getTls() && !g.getTlsModel())
+      return emitOpError("access to global not marked thread local");
+  } else if (auto f = dyn_cast<FuncOp>(op))
+    symTy = f.getFunctionType();
+  else
+    llvm_unreachable("shall not get here");
+
+  auto resultType = dyn_cast<PointerType>(getAddr().getType());
+  if (!resultType || symTy != resultType.getPointee())
+    return emitOpError("result type pointee type '")
+           << resultType.getPointee() << "' does not match type " << symTy
+           << " of the global @" << getName();
+
+  if (symAddrSpace != resultType.getAddrSpace()) {
+    return emitOpError()
+           << "result type address space does not match the address "
+              "space of the global @"
+           << getName();
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// VTableAddrPointOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult
+VTableAddrPointOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
+  // vtable ptr is not coming from a symbol.
+  if (!getName())
+    return success();
+  auto name = *getName();
+
+  // Verify that the result type underlying pointer type matches the type of
+  // the referenced cir.global or cir.func op.
+  auto op = dyn_cast_or_null<GlobalOp>(
+      symbolTable.lookupNearestSymbolFrom(*this, getNameAttr()));
+  if (!op)
+    return emitOpError("'")
+           << name << "' does not reference a valid cir.global";
+  auto init = op.getInitialValue();
+  if (!init)
+    return success();
+  if (!isa<mlir::cir::VTableAttr>(*init))
+    return emitOpError("Expected #cir.vtable in initializer for global '")
+           << name << "'";
+  return success();
+}
+
+LogicalResult cir::VTableAddrPointOp::verify() {
+  // The operation uses either a symbol or a value to operate, but not both
+  if (getName() && getSymAddr())
+    return emitOpError("should use either a symbol or value, but not both");
+
+  // If not a symbol, stick with the concrete type used for getSymAddr.
+  if (getSymAddr())
+    return success();
+
+  auto resultType = getAddr().getType();
+  auto intTy = mlir::cir::IntType::get(getContext(), 32, /*isSigned=*/false);
+  auto fnTy = mlir::cir::FuncType::get({}, intTy);
+
+  auto resTy = mlir::cir::PointerType::get(
+      getContext(), mlir::cir::PointerType::get(getContext(), fnTy));
+
+  if (resultType != resTy)
+    return emitOpError("result type must be '")
+           << resTy << "', but provided result type is '" << resultType << "'";
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// FuncOp
+//===----------------------------------------------------------------------===//
+
+/// Returns the name used for the linkage attribute. This *must* correspond to
+/// the name of the attribute in ODS.
+static StringRef getLinkageAttrNameString() { return "linkage"; }
+
+void cir::FuncOp::build(OpBuilder &builder, OperationState &result,
+                        StringRef name, cir::FuncType type,
+                        GlobalLinkageKind linkage, CallingConv callingConv,
+                        ArrayRef<NamedAttribute> attrs,
+                        ArrayRef<DictionaryAttr> argAttrs) {
+  result.addRegion();
+  result.addAttribute(SymbolTable::getSymbolAttrName(),
+                      builder.getStringAttr(name));
+  result.addAttribute(getFunctionTypeAttrName(result.name),
+                      TypeAttr::get(type));
+  result.addAttribute(
+      getLinkageAttrNameString(),
+      GlobalLinkageKindAttr::get(builder.getContext(), linkage));
+  result.addAttribute(getCallingConvAttrName(result.name),
+                      CallingConvAttr::get(builder.getContext(), callingConv));
+  result.addAttribute(getGlobalVisibilityAttrName(result.name),
+                      mlir::cir::VisibilityAttr::get(builder.getContext()));
+
+  result.attributes.append(attrs.begin(), attrs.end());
+  if (argAttrs.empty())
+    return;
+
+  function_interface_impl::addArgAndResultAttrs(
+      builder, result, argAttrs,
+      /*resultAttrs=*/std::nullopt, getArgAttrsAttrName(result.name),
+      getResAttrsAttrName(result.name));
+}
+
+ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) {
+  llvm::SMLoc loc = parser.getCurrentLocation();
+
+  auto builtinNameAttr = getBuiltinAttrName(state.name);
+  auto coroutineNameAttr = getCoroutineAttrName(state.name);
+  auto lambdaNameAttr = getLambdaAttrName(state.name);
+  auto visNameAttr = getSymVisibilityAttrName(state.name);
+  auto noProtoNameAttr = getNoProtoAttrName(state.name);
+  auto visibilityNameAttr = getGlobalVisibilityAttrName(state.name);
+  auto dsolocalNameAttr = getDsolocalAttrName(state.name);
+  auto annotationsNameAttr = getAnnotationsAttrName(state.name);
+  if (::mlir::succeeded(parser.parseOptionalKeyword(builtinNameAttr.strref())))
+    state.addAttribute(builtinNameAttr, parser.getBuilder().getUnitAttr());
+  if (::mlir::succeeded(
+          parser.parseOptionalKeyword(coroutineNameAttr.strref())))
+    state.addAttribute(coroutineNameAttr, parser.getBuilder().getUnitAttr());
+  if (::mlir::succeeded(parser.parseOptionalKeyword(lambdaNameAttr.strref())))
+    state.addAttribute(lambdaNameAttr, parser.getBuilder().getUnitAttr());
+  if (parser.parseOptionalKeyword(noProtoNameAttr).succeeded())
+    state.addAttribute(noProtoNameAttr, parser.getBuilder().getUnitAttr());
+
+  // Default to external linkage if no keyword is provided.
+  state.addAttribute(getLinkageAttrNameString(),
+                     GlobalLinkageKindAttr::get(
+                         parser.getContext(),
+                         parseOptionalCIRKeyword<GlobalLinkageKind>(
+                             parser, GlobalLinkageKind::ExternalLinkage)));
+
+  ::llvm::StringRef visAttrStr;
+  if (parser.parseOptionalKeyword(&visAttrStr, {"private", "public", "nested"})
+          .succeeded()) {
+    state.addAttribute(visNameAttr,
+                       parser.getBuilder().getStringAttr(visAttrStr));
+  }
+
+  mlir::cir::VisibilityAttr cirVisibilityAttr;
+  parseVisibilityAttr(parser, cirVisibilityAttr);
+  state.addAttribute(visibilityNameAttr, cirVisibilityAttr);
+
+  if (parser.parseOptionalKeyword(dsolocalNameAttr).succeeded())
+    state.addAttribute(dsolocalNameAttr, parser.getBuilder().getUnitAttr());
+
+  if (parser.parseOptionalKeyword(annotationsNameAttr).succeeded())
+    state.addAttribute(annotationsNameAttr, parser.getBuilder().getUnitAttr());
+
+  StringAttr nameAttr;
+  SmallVector<OpAsmParser::Argument, 8> arguments;
+  SmallVector<DictionaryAttr, 1> resultAttrs;
+  SmallVector<Type, 8> argTypes;
+  SmallVector<Type, 4> resultTypes;
+  auto &builder = parser.getBuilder();
+
+  // Parse the name as a symbol.
+  if (parser.parseSymbolName(nameAttr, SymbolTable::getSymbolAttrName(),
+                             state.attributes))
+    return failure();
+
+  // Parse the function signature.
+  bool isVariadic = false;
+  if (function_interface_impl::parseFunctionSignature(
+          parser, /*allowVariadic=*/true, arguments, isVariadic, resultTypes,
+          resultAttrs))
+    return failure();
+
+  for (auto &arg : arguments)
+    argTypes.push_back(arg.type);
+
+  if (resultTypes.size() > 1)
+    return parser.emitError(loc, "functions only supports zero or one results");
+
+  // Fetch return type or set it to void if empty/ommited.
+  mlir::Type returnType =
+      (resultTypes.empty() ? mlir::cir::VoidType::get(builder.getContext())
+                           : resultTypes.front());
+
+  // Build the function type.
+  auto fnType = mlir::cir::FuncType::get(argTypes, returnType, isVariadic);
+  if (!fnType)
+    return failure();
+  state.addAttribute(getFunctionTypeAttrName(state.name),
+                     TypeAttr::get(fnType));
+
+  // If additional attributes are present, parse them.
+  if (parser.parseOptionalAttrDictWithKeyword(state.attributes))
+    return failure();
+
+  // Add the attributes to the function arguments.
+  assert(resultAttrs.size() == resultTypes.size());
+  function_interface_impl::addArgAndResultAttrs(
+      builder, state, arguments, resultAttrs, getArgAttrsAttrName(state.name),
+      getResAttrsAttrName(state.name));
+
+  bool hasAlias = false;
+  auto aliaseeNameAttr = getAliaseeAttrName(state.name);
+  if (::mlir::succeeded(parser.parseOptionalKeyword("alias"))) {
+    if (parser.parseLParen().failed())
+      return failure();
+    StringAttr aliaseeAttr;
+    if (parser.parseOptionalSymbolName(aliaseeAttr).failed())
+      return failure();
+    state.addAttribute(aliaseeNameAttr, FlatSymbolRefAttr::get(aliaseeAttr));
+    if (parser.parseRParen().failed())
+      return failure();
+    hasAlias = true;
+  }
+
+  // Default to C calling convention if no keyword is provided.
+  auto callConvNameAttr = getCallingConvAttrName(state.name);
+  CallingConv callConv = CallingConv::C;
+  if (parser.parseOptionalKeyword("cc").succeeded()) {
+    if (parser.parseLParen().failed())
+      return failure();
+    if (parseCIRKeyword<CallingConv>(parser, callConv).failed())
+      return parser.emitError(loc) << "unknown calling convention";
+    if (parser.parseRParen().failed())
+      return failure();
+  }
+  state.addAttribute(callConvNameAttr,
+                     CallingConvAttr::get(parser.getContext(), callConv));
+
+  auto parseGlobalDtorCtor =
+      [&](StringRef keyword,
+          llvm::function_ref<void(std::optional<int> prio)> createAttr)
+      -> mlir::LogicalResult {
+    if (::mlir::succeeded(parser.parseOptionalKeyword(keyword))) {
+      std::optional<int> prio;
+      if (mlir::succeeded(parser.parseOptionalLParen())) {
+        auto parsedPrio = mlir::FieldParser<int>::parse(parser);
+        if (mlir::failed(parsedPrio)) {
+          return parser.emitError(parser.getCurrentLocation(),
+                                  "failed to parse 'priority', of type 'int'");
+          return failure();
+        }
+        prio = parsedPrio.value_or(int());
+        // Parse literal ')'
+        if (parser.parseRParen())
+          return failure();
+      }
+      createAttr(prio);
+    }
+    return success();
+  };
+
+  if (parseGlobalDtorCtor("global_ctor", [&](std::optional<int> prio) {
+        mlir::cir::GlobalCtorAttr globalCtorAttr =
+            prio ? mlir::cir::GlobalCtorAttr::get(builder.getContext(),
+                                                  nameAttr, *prio)
+                 : mlir::cir::GlobalCtorAttr::get(builder.getContext(),
+                                                  nameAttr);
+        state.addAttribute(getGlobalCtorAttrName(state.name), globalCtorAttr);
+      }).failed())
+    return failure();
+
+  if (parseGlobalDtorCtor("global_dtor", [&](std::optional<int> prio) {
+        mlir::cir::GlobalDtorAttr globalDtorAttr =
+            prio ? mlir::cir::GlobalDtorAttr::get(builder.getContext(),
+                                                  nameAttr, *prio)
+                 : mlir::cir::GlobalDtorAttr::get(builder.getContext(),
+                                                  nameAttr);
+        state.addAttribute(getGlobalDtorAttrName(state.name), globalDtorAttr);
+      }).failed())
+    return failure();
+
+  Attribute extraAttrs;
+  if (::mlir::succeeded(parser.parseOptionalKeyword("extra"))) {
+    if (parser.parseLParen().failed())
+      return failure();
+    if (parser.parseAttribute(extraAttrs).failed())
+      return failure();
+    if (parser.parseRParen().failed())
+      return failure();
+  } else {
+    NamedAttrList empty;
+    extraAttrs = mlir::cir::ExtraFuncAttributesAttr::get(
+        builder.getContext(), empty.getDictionary(builder.getContext()));
+  }
+  state.addAttribute(getExtraAttrsAttrName(state.name), extraAttrs);
+
+  // Parse the optional function body.
+  auto *body = state.addRegion();
+  OptionalParseResult parseResult = parser.parseOptionalRegion(
+      *body, arguments, /*enableNameShadowing=*/false);
+  if (parseResult.has_value()) {
+    if (hasAlias)
+      parser.emitError(loc, "function alias shall not have a body");
+    if (failed(*parseResult))
+      return failure();
+    // Function body was parsed, make sure its not empty.
+    if (body->empty())
+      return parser.emitError(loc, "expected non-empty function body");
+  }
+  return success();
+}
+
+bool cir::FuncOp::isDeclaration() {
+  auto aliasee = getAliasee();
+  if (!aliasee)
+    return isExternal();
+
+  auto *modOp = getOperation()->getParentOp();
+  auto targetFn = dyn_cast_or_null<mlir::cir::FuncOp>(
+      mlir::SymbolTable::lookupSymbolIn(modOp, *aliasee));
+  assert(targetFn && "expected aliasee to exist");
+  return targetFn.isDeclaration();
+}
+
+::mlir::Region *cir::FuncOp::getCallableRegion() {
+  auto aliasee = getAliasee();
+  if (!aliasee)
+    return isExternal() ? nullptr : &getBody();
+
+  // Note that we forward the region from the original aliasee
+  // function.
+  auto *modOp = getOperation()->getParentOp();
+  auto targetFn = dyn_cast_or_null<mlir::cir::FuncOp>(
+      mlir::SymbolTable::lookupSymbolIn(modOp, *aliasee));
+  assert(targetFn && "expected aliasee to exist");
+  return targetFn.getCallableRegion();
+}
+
+void cir::FuncOp::print(OpAsmPrinter &p) {
+  p << ' ';
+
+  // When adding a specific keyword here, do not forget to omit it in
+  // printFunctionAttributes below or there will be a syntax error when
+  // parsing
+  if (getBuiltin())
+    p << "builtin ";
+
+  if (getCoroutine())
+    p << "coroutine ";
+
+  if (getLambda())
+    p << "lambda ";
+
+  if (getNoProto())
+    p << "no_proto ";
+
+  if (getComdat())
+    p << "comdat ";
+
+  if (getLinkage() != GlobalLinkageKind::ExternalLinkage)
+    p << stringifyGlobalLinkageKind(getLinkage()) << ' ';
+
+  auto vis = getVisibility();
+  if (vis != mlir::SymbolTable::Visibility::Public)
+    p << vis << " ";
+
+  auto cirVisibilityAttr = getGlobalVisibilityAttr();
+  printVisibilityAttr(p, cirVisibilityAttr);
+  p << " ";
+
+  // Print function name, signature, and control.
+  p.printSymbolName(getSymName());
+  auto fnType = getFunctionType();
+  SmallVector<Type, 1> resultTypes;
+  if (!fnType.isVoid())
+    function_interface_impl::printFunctionSignature(
+        p, *this, fnType.getInputs(), fnType.isVarArg(),
+        fnType.getReturnTypes());
+  else
+    function_interface_impl::printFunctionSignature(
+        p, *this, fnType.getInputs(), fnType.isVarArg(), {});
+
+  if (mlir::ArrayAttr annotations = getAnnotationsAttr()) {
+    p << " ";
+    p.printAttribute(annotations);
+  }
+
+  function_interface_impl::printFunctionAttributes(
+      p, *this,
+      // These are all omitted since they are custom printed already.
+      {getAliaseeAttrName(), getBuiltinAttrName(), getCoroutineAttrName(),
+       getDsolocalAttrName(), getExtraAttrsAttrName(),
+       getFunctionTypeAttrName(), getGlobalCtorAttrName(),
+       getGlobalDtorAttrName(), getLambdaAttrName(), getLinkageAttrName(),
+       getCallingConvAttrName(), getNoProtoAttrName(),
+       getSymVisibilityAttrName(), getArgAttrsAttrName(), getResAttrsAttrName(),
+       getComdatAttrName(), getGlobalVisibilityAttrName(),
+       getAnnotationsAttrName()});
+
+  if (auto aliaseeName = getAliasee()) {
+    p << " alias(";
+    p.printSymbolName(*aliaseeName);
+    p << ")";
+  }
+
+  if (getCallingConv() != CallingConv::C) {
+    p << " cc(";
+    p << stringifyCallingConv(getCallingConv());
+    p << ")";
+  }
+
+  if (auto globalCtor = getGlobalCtorAttr()) {
+    p << " global_ctor";
+    if (!globalCtor.isDefaultPriority())
+      p << "(" << globalCtor.getPriority() << ")";
+  }
+
+  if (auto globalDtor = getGlobalDtorAttr()) {
+    p << " global_dtor";
+    if (!globalDtor.isDefaultPriority())
+      p << "(" << globalDtor.getPriority() << ")";
+  }
+
+  if (!getExtraAttrs().getElements().empty()) {
+    p << " extra(";
+    p.printAttributeWithoutType(getExtraAttrs());
+    p << ")";
+  }
+
+  // Print the body if this is not an external function.
+  Region &body = getOperation()->getRegion(0);
+  if (!body.empty()) {
+    p << ' ';
+    p.printRegion(body, /*printEntryBlockArgs=*/false,
+                  /*printBlockTerminators=*/true);
+  }
+}
+
+// Hook for OpTrait::FunctionLike, called after verifying that the 'type'
+// attribute is present.  This can check for preconditions of the
+// getNumArguments hook not failing.
+LogicalResult cir::FuncOp::verifyType() {
+  auto type = getFunctionType();
+  if (!isa<cir::FuncType>(type))
+    return emitOpError("requires '" + getFunctionTypeAttrName().str() +
+                       "' attribute of function type");
+  if (!getNoProto() && type.isVarArg() && type.getNumInputs() == 0)
+    return emitError()
+           << "prototyped function must have at least one non-variadic input";
+  return success();
+}
+
+LogicalResult cir::IntrinsicCallOp::verify() {
+  if (!getIntrinsicName().starts_with("llvm."))
+    return emitOpError() << "intrinsic name must start with 'llvm.'";
+  return success();
+}
+
+// Verifies linkage types
+// - functions don't have 'common' linkage
+// - external functions have 'external' or 'extern_weak' linkage
+// - coroutine body must use at least one cir.await operation.
+LogicalResult cir::FuncOp::verify() {
+  if (getLinkage() == cir::GlobalLinkageKind::CommonLinkage)
+    return emitOpError() << "functions cannot have '"
+                         << stringifyGlobalLinkageKind(
+                                cir::GlobalLinkageKind::CommonLinkage)
+                         << "' linkage";
+
+  if (isExternal()) {
+    if (getLinkage() != cir::GlobalLinkageKind::ExternalLinkage &&
+        getLinkage() != cir::GlobalLinkageKind::ExternalWeakLinkage)
+      return emitOpError() << "external functions must have '"
+                           << stringifyGlobalLinkageKind(
+                                  cir::GlobalLinkageKind::ExternalLinkage)
+                           << "' or '"
+                           << stringifyGlobalLinkageKind(
+                                  cir::GlobalLinkageKind::ExternalWeakLinkage)
+                           << "' linkage";
+    return success();
+  }
+
+  if (!isDeclaration() && getCoroutine()) {
+    bool foundAwait = false;
+    this->walk([&](Operation *op) {
+      if (auto await = dyn_cast<AwaitOp>(op)) {
+        foundAwait = true;
+        return;
+      }
+    });
+    if (!foundAwait)
+      return emitOpError()
+             << "coroutine body must use at least one cir.await op";
+  }
+
+  // Function alias should have an empty body.
+  if (auto fn = getAliasee()) {
+    if (fn && !getBody().empty())
+      return emitOpError() << "a function alias '" << *fn
+                           << "' must have empty body";
+  }
+
+  std::set<llvm::StringRef> labels;
+  std::set<llvm::StringRef> gotos;
+
+  getOperation()->walk([&](mlir::Operation *op) {
+    if (auto lab = dyn_cast<mlir::cir::LabelOp>(op)) {
+      labels.emplace(lab.getLabel());
+    } else if (auto goTo = dyn_cast<mlir::cir::GotoOp>(op)) {
+      gotos.emplace(goTo.getLabel());
+    }
+  });
+
+  std::vector<llvm::StringRef> mismatched;
+  std::set_difference(gotos.begin(), gotos.end(), labels.begin(), labels.end(),
+                      std::back_inserter(mismatched));
+
+  if (!mismatched.empty())
+    return emitOpError() << "goto/label mismatch";
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// CallOp
+//===----------------------------------------------------------------------===//
+
+mlir::Value cir::CallOp::getIndirectCall() {
+  assert(isIndirect());
+  return getOperand(0);
+}
+
+mlir::Operation::operand_iterator cir::CallOp::arg_operand_begin() {
+  auto arg_begin = operand_begin();
+  if (isIndirect())
+    arg_begin++;
+  return arg_begin;
+}
+mlir::Operation::operand_iterator cir::CallOp::arg_operand_end() {
+  return operand_end();
+}
+
+/// Return the operand at index 'i', accounts for indirect call.
+Value cir::CallOp::getArgOperand(unsigned i) {
+  if (isIndirect())
+    i++;
+  return getOperand(i);
+}
+/// Return the number of operands, accounts for indirect call.
+unsigned cir::CallOp::getNumArgOperands() {
+  if (isIndirect())
+    return this->getOperation()->getNumOperands() - 1;
+  return this->getOperation()->getNumOperands();
+}
+
+static LogicalResult
+verifyCallCommInSymbolUses(Operation *op, SymbolTableCollection &symbolTable) {
+  // Callee attribute only need on indirect calls.
+  auto fnAttr = op->getAttrOfType<FlatSymbolRefAttr>("callee");
+  if (!fnAttr)
+    return success();
+
+  FuncOp fn =
+      symbolTable.lookupNearestSymbolFrom<mlir::cir::FuncOp>(op, fnAttr);
+  if (!fn)
+    return op->emitOpError() << "'" << fnAttr.getValue()
+                             << "' does not reference a valid function";
+  auto callIf = dyn_cast<mlir::cir::CIRCallOpInterface>(op);
+  assert(callIf && "expected CIR call interface to be always available");
+
+  // Verify that the operand and result types match the callee. Note that
+  // argument-checking is disabled for functions without a prototype.
+  auto fnType = fn.getFunctionType();
+  if (!fn.getNoProto()) {
+    unsigned numCallOperands = callIf.getNumArgOperands();
+    unsigned numFnOpOperands = fnType.getNumInputs();
+
+    if (!fnType.isVarArg() && numCallOperands != numFnOpOperands)
+      return op->emitOpError("incorrect number of operands for callee");
+
+    if (fnType.isVarArg() && numCallOperands < numFnOpOperands)
+      return op->emitOpError("too few operands for callee");
+
+    for (unsigned i = 0, e = numFnOpOperands; i != e; ++i)
+      if (callIf.getArgOperand(i).getType() != fnType.getInput(i))
+        return op->emitOpError("operand type mismatch: expected operand type ")
+               << fnType.getInput(i) << ", but provided "
+               << op->getOperand(i).getType() << " for operand number " << i;
+  }
+
+  // Calling convention must match.
+  if (callIf.getCallingConv() != fn.getCallingConv())
+    return op->emitOpError("calling convention mismatch: expected ")
+           << stringifyCallingConv(fn.getCallingConv()) << ", but provided "
+           << stringifyCallingConv(callIf.getCallingConv());
+
+  // Void function must not return any results.
+  if (fnType.isVoid() && op->getNumResults() != 0)
+    return op->emitOpError("callee returns void but call has results");
+
+  // Non-void function calls must return exactly one result.
+  if (!fnType.isVoid() && op->getNumResults() != 1)
+    return op->emitOpError("incorrect number of results for callee");
+
+  // Parent function and return value types must match.
+  if (!fnType.isVoid() &&
+      op->getResultTypes().front() != fnType.getReturnType()) {
+    return op->emitOpError("result type mismatch: expected ")
+           << fnType.getReturnType() << ", but provided "
+           << op->getResult(0).getType();
+  }
+
+  return success();
+}
+
+static mlir::ParseResult
+parseTryCallBranches(mlir::OpAsmParser &parser, mlir::OperationState &result,
+                     llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand>
+                         &continueOperands,
+                     llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand>
+                         &landingPadOperands,
+                     llvm::SmallVectorImpl<mlir::Type> &continueTypes,
+                     llvm::SmallVectorImpl<mlir::Type> &landingPadTypes,
+                     llvm::SMLoc &continueOperandsLoc,
+                     llvm::SMLoc &landingPadOperandsLoc) {
+  mlir::Block *continueSuccessor = nullptr;
+  mlir::Block *landingPadSuccessor = nullptr;
+
+  if (parser.parseSuccessor(continueSuccessor))
+    return mlir::failure();
+  if (mlir::succeeded(parser.parseOptionalLParen())) {
+    continueOperandsLoc = parser.getCurrentLocation();
+    if (parser.parseOperandList(continueOperands))
+      return mlir::failure();
+    if (parser.parseColon())
+      return mlir::failure();
+
+    if (parser.parseTypeList(continueTypes))
+      return mlir::failure();
+    if (parser.parseRParen())
+      return mlir::failure();
+  }
+  if (parser.parseComma())
+    return mlir::failure();
+
+  if (parser.parseSuccessor(landingPadSuccessor))
+    return mlir::failure();
+  if (mlir::succeeded(parser.parseOptionalLParen())) {
+
+    landingPadOperandsLoc = parser.getCurrentLocation();
+    if (parser.parseOperandList(landingPadOperands))
+      return mlir::failure();
+    if (parser.parseColon())
+      return mlir::failure();
+
+    if (parser.parseTypeList(landingPadTypes))
+      return mlir::failure();
+    if (parser.parseRParen())
+      return mlir::failure();
+  }
+  {
+    auto loc = parser.getCurrentLocation();
+    (void)loc;
+    if (parser.parseOptionalAttrDict(result.attributes))
+      return mlir::failure();
+  }
+  result.addSuccessors(continueSuccessor);
+  result.addSuccessors(landingPadSuccessor);
+  return mlir::success();
+}
+
+static ::mlir::ParseResult parseCallCommon(::mlir::OpAsmParser &parser,
+                                           ::mlir::OperationState &result,
+                                           llvm::StringRef extraAttrsAttrName,
+                                           bool hasDestinationBlocks = false) {
+  mlir::FlatSymbolRefAttr calleeAttr;
+  llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> ops;
+  llvm::SMLoc opsLoc;
+  (void)opsLoc;
+  llvm::ArrayRef<::mlir::Type> operandsTypes;
+  llvm::ArrayRef<::mlir::Type> allResultTypes;
+
+  // Control flow related
+  llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand, 4> continueOperands;
+  llvm::SMLoc continueOperandsLoc;
+  llvm::SmallVector<mlir::Type, 1> continueTypes;
+  llvm::SmallVector<mlir::OpAsmParser::UnresolvedOperand, 4> landingPadOperands;
+  llvm::SMLoc landingPadOperandsLoc;
+  llvm::SmallVector<mlir::Type, 1> landingPadTypes;
+
+  bool hasExceptions = false;
+  if (::mlir::succeeded(parser.parseOptionalKeyword("exception"))) {
+    result.addAttribute("exception", parser.getBuilder().getUnitAttr());
+    hasExceptions = true;
+  }
+
+  // If we cannot parse a string callee, it means this is an indirect call.
+  if (!parser.parseOptionalAttribute(calleeAttr, "callee", result.attributes)
+           .has_value()) {
+    OpAsmParser::UnresolvedOperand indirectVal;
+    // Do not resolve right now, since we need to figure out the type
+    if (parser.parseOperand(indirectVal).failed())
+      return failure();
+    ops.push_back(indirectVal);
+  }
+
+  if (parser.parseLParen())
+    return ::mlir::failure();
+
+  opsLoc = parser.getCurrentLocation();
+  if (parser.parseOperandList(ops))
+    return ::mlir::failure();
+  if (parser.parseRParen())
+    return ::mlir::failure();
+
+  if (hasDestinationBlocks)
+    if (parseTryCallBranches(parser, result, continueOperands,
+                             landingPadOperands, continueTypes, landingPadTypes,
+                             continueOperandsLoc, landingPadOperandsLoc)
+            .failed())
+      return ::mlir::failure();
+
+  if (parser.parseOptionalAttrDict(result.attributes))
+    return ::mlir::failure();
+  if (parser.parseColon())
+    return ::mlir::failure();
+
+  ::mlir::FunctionType opsFnTy;
+  if (parser.parseType(opsFnTy))
+    return ::mlir::failure();
+  operandsTypes = opsFnTy.getInputs();
+  allResultTypes = opsFnTy.getResults();
+  result.addTypes(allResultTypes);
+
+  if (parser.resolveOperands(ops, operandsTypes, opsLoc, result.operands))
+    return ::mlir::failure();
+
+  if (hasDestinationBlocks) {
+    // The TryCall ODS layout is: cont, landing_pad, operands.
+    llvm::copy(::llvm::ArrayRef<int32_t>(
+                   {static_cast<int32_t>(continueOperands.size()),
+                    static_cast<int32_t>(landingPadOperands.size()),
+                    static_cast<int32_t>(ops.size())}),
+               result.getOrAddProperties<TryCallOp::Properties>()
+                   .operandSegmentSizes.begin());
+    if (parser.resolveOperands(continueOperands, continueTypes,
+                               continueOperandsLoc, result.operands))
+      return ::mlir::failure();
+    if (parser.resolveOperands(landingPadOperands, landingPadTypes,
+                               landingPadOperandsLoc, result.operands))
+      return ::mlir::failure();
+  }
+
+  auto &builder = parser.getBuilder();
+  if (parser.parseOptionalKeyword("cc").succeeded()) {
+    if (parser.parseLParen().failed())
+      return failure();
+    mlir::cir::CallingConv callingConv;
+    if (parseCIRKeyword<mlir::cir::CallingConv>(parser, callingConv).failed())
+      return failure();
+    if (parser.parseRParen().failed())
+      return failure();
+    result.addAttribute("calling_conv", mlir::cir::CallingConvAttr::get(
+                                            builder.getContext(), callingConv));
+  }
+
+  Attribute extraAttrs;
+  if (::mlir::succeeded(parser.parseOptionalKeyword("extra"))) {
+    if (parser.parseLParen().failed())
+      return failure();
+    if (parser.parseAttribute(extraAttrs).failed())
+      return failure();
+    if (parser.parseRParen().failed())
+      return failure();
+  } else {
+    NamedAttrList empty;
+    extraAttrs = mlir::cir::ExtraFuncAttributesAttr::get(
+        builder.getContext(), empty.getDictionary(builder.getContext()));
+  }
+  result.addAttribute(extraAttrsAttrName, extraAttrs);
+
+  // If exception is present and there are cleanups, this should be latest thing
+  // present (after all attributes, etc).
+  mlir::Region *cleanupRegion = nullptr;
+  if (!hasDestinationBlocks) // Regular cir.call
+    cleanupRegion = result.addRegion();
+  if (hasExceptions) {
+    if (parser.parseOptionalKeyword("cleanup").succeeded()) {
+      if (parser.parseRegion(*cleanupRegion, /*arguments=*/{}, /*argTypes=*/{}))
+        return failure();
+    }
+  }
+
+  return ::mlir::success();
+}
+
+void printCallCommon(Operation *op, mlir::Value indirectCallee,
+                     mlir::FlatSymbolRefAttr flatSym,
+                     ::mlir::OpAsmPrinter &state,
+                     ::mlir::cir::ExtraFuncAttributesAttr extraAttrs,
+                     ::mlir::cir::CallingConv callingConv,
+                     ::mlir::UnitAttr exception = {},
+                     mlir::Block *cont = nullptr,
+                     mlir::Block *landingPad = nullptr) {
+  state << ' ';
+
+  auto callLikeOp = mlir::cast<mlir::cir::CIRCallOpInterface>(op);
+  auto ops = callLikeOp.getArgOperands();
+
+  if (exception)
+    state << "exception ";
+
+  if (flatSym) { // Direct calls
+    state.printAttributeWithoutType(flatSym);
+  } else { // Indirect calls
+    assert(indirectCallee);
+    state << indirectCallee;
+  }
+  state << "(";
+  state << ops;
+  state << ")";
+
+  if (cont) {
+    assert(landingPad && "expected two successors");
+    auto tryCall = dyn_cast<mlir::cir::TryCallOp>(op);
+    assert(tryCall && "regular calls do not branch");
+    state << ' ' << tryCall.getCont();
+    if (!tryCall.getContOperands().empty()) {
+      state << "(";
+      state << tryCall.getContOperands();
+      state << ' ' << ":";
+      state << ' ';
+      state << tryCall.getContOperands().getTypes();
+      state << ")";
+    }
+    state << ",";
+    state << ' ';
+    state << tryCall.getLandingPad();
+    if (!tryCall.getLandingPadOperands().empty()) {
+      state << "(";
+      state << tryCall.getLandingPadOperands();
+      state << ' ' << ":";
+      state << ' ';
+      state << tryCall.getLandingPadOperands().getTypes();
+      state << ")";
+    }
+  }
+
+  llvm::SmallVector<::llvm::StringRef, 4> elidedAttrs;
+  elidedAttrs.push_back("callee");
+  elidedAttrs.push_back("ast");
+  elidedAttrs.push_back("extra_attrs");
+  elidedAttrs.push_back("calling_conv");
+  elidedAttrs.push_back("exception");
+  elidedAttrs.push_back("operandSegmentSizes");
+
+  state.printOptionalAttrDict(op->getAttrs(), elidedAttrs);
+  state << ' ' << ":";
+  state << ' ';
+  state.printFunctionalType(op->getOperands().getTypes(), op->getResultTypes());
+
+  if (callingConv != mlir::cir::CallingConv::C) {
+    state << " cc(";
+    state << stringifyCallingConv(callingConv);
+    state << ")";
+  }
+
+  if (!extraAttrs.getElements().empty()) {
+    state << " extra(";
+    state.printAttributeWithoutType(extraAttrs);
+    state << ")";
+  }
+
+  // If exception is present and there are cleanups, this should be latest thing
+  // present (after all attributes, etc).
+  if (exception) {
+    auto call = dyn_cast<mlir::cir::CallOp>(op);
+    assert(call && "expected regular call");
+    if (!call.getCleanup().empty()) {
+      state << " cleanup ";
+      state.printRegion(call.getCleanup());
+    }
+  }
+}
+
+LogicalResult
+cir::CallOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
+  return verifyCallCommInSymbolUses(*this, symbolTable);
+}
+
+::mlir::ParseResult CallOp::parse(::mlir::OpAsmParser &parser,
+                                  ::mlir::OperationState &result) {
+
+  return parseCallCommon(parser, result, getExtraAttrsAttrName(result.name));
+}
+
+void CallOp::print(::mlir::OpAsmPrinter &state) {
+  mlir::Value indirectCallee = isIndirect() ? getIndirectCall() : nullptr;
+  mlir::cir::CallingConv callingConv = getCallingConv();
+  mlir::UnitAttr exception = getExceptionAttr();
+  printCallCommon(*this, indirectCallee, getCalleeAttr(), state,
+                  getExtraAttrs(), callingConv, exception);
+}
+
+//===----------------------------------------------------------------------===//
+// TryCallOp
+//===----------------------------------------------------------------------===//
+
+mlir::Value cir::TryCallOp::getIndirectCall() {
+  assert(isIndirect());
+  return getOperand(0);
+}
+
+mlir::Operation::operand_iterator cir::TryCallOp::arg_operand_begin() {
+  auto arg_begin = operand_begin();
+  if (isIndirect())
+    arg_begin++;
+  return arg_begin;
+}
+mlir::Operation::operand_iterator cir::TryCallOp::arg_operand_end() {
+  return operand_end();
+}
+
+/// Return the operand at index 'i', accounts for indirect call.
+Value cir::TryCallOp::getArgOperand(unsigned i) {
+  if (isIndirect())
+    i++;
+  return getOperand(i);
+}
+/// Return the number of operands, accounts for indirect call.
+unsigned cir::TryCallOp::getNumArgOperands() {
+  if (isIndirect())
+    return this->getOperation()->getNumOperands() - 1;
+  return this->getOperation()->getNumOperands();
+}
+
+LogicalResult
+cir::TryCallOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
+  return verifyCallCommInSymbolUses(*this, symbolTable);
+}
+
+::mlir::ParseResult TryCallOp::parse(::mlir::OpAsmParser &parser,
+                                     ::mlir::OperationState &result) {
+
+  return parseCallCommon(parser, result, getExtraAttrsAttrName(result.name),
+                         /*hasDestinationBlocks=*/true);
+}
+
+void TryCallOp::print(::mlir::OpAsmPrinter &state) {
+  mlir::Value indirectCallee = isIndirect() ? getIndirectCall() : nullptr;
+  mlir::cir::CallingConv callingConv = getCallingConv();
+  printCallCommon(*this, indirectCallee, getCalleeAttr(), state,
+                  getExtraAttrs(), callingConv, {}, getCont(), getLandingPad());
+}
+
+mlir::SuccessorOperands TryCallOp::getSuccessorOperands(unsigned index) {
+  assert(index < getNumSuccessors() && "invalid successor index");
+  if (index == 0)
+    return SuccessorOperands(getContOperandsMutable());
+  if (index == 1)
+    return SuccessorOperands(getLandingPadOperandsMutable());
+
+  // index == 2
+  return SuccessorOperands(getArgOperandsMutable());
+}
+
+//===----------------------------------------------------------------------===//
+// UnaryOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult UnaryOp::verify() {
+  switch (getKind()) {
+  case cir::UnaryOpKind::Inc:
+  case cir::UnaryOpKind::Dec:
+  case cir::UnaryOpKind::Plus:
+  case cir::UnaryOpKind::Minus:
+  case cir::UnaryOpKind::Not:
+    // Nothing to verify.
+    return success();
+  }
+
+  llvm_unreachable("Unknown UnaryOp kind?");
+}
+
+//===----------------------------------------------------------------------===//
+// AwaitOp
+//===----------------------------------------------------------------------===//
+
+void AwaitOp::build(OpBuilder &builder, OperationState &result,
+                    mlir::cir::AwaitKind kind,
+                    function_ref<void(OpBuilder &, Location)> readyBuilder,
+                    function_ref<void(OpBuilder &, Location)> suspendBuilder,
+                    function_ref<void(OpBuilder &, Location)> resumeBuilder) {
+  result.addAttribute(getKindAttrName(result.name),
+                      cir::AwaitKindAttr::get(builder.getContext(), kind));
+  {
+    OpBuilder::InsertionGuard guard(builder);
+    Region *readyRegion = result.addRegion();
+    builder.createBlock(readyRegion);
+    readyBuilder(builder, result.location);
+  }
+
+  {
+    OpBuilder::InsertionGuard guard(builder);
+    Region *suspendRegion = result.addRegion();
+    builder.createBlock(suspendRegion);
+    suspendBuilder(builder, result.location);
+  }
+
+  {
+    OpBuilder::InsertionGuard guard(builder);
+    Region *resumeRegion = result.addRegion();
+    builder.createBlock(resumeRegion);
+    resumeBuilder(builder, result.location);
+  }
+}
+
+/// Given the region at `index`, or the parent operation if `index` is None,
+/// return the successor regions. These are the regions that may be selected
+/// during the flow of control. `operands` is a set of optional attributes
+/// that correspond to a constant value for each operand, or null if that
+/// operand is not a constant.
+void AwaitOp::getSuccessorRegions(mlir::RegionBranchPoint point,
+                                  SmallVectorImpl<RegionSuccessor> &regions) {
+  // If any index all the underlying regions branch back to the parent
+  // operation.
+  if (!point.isParent()) {
+    regions.push_back(RegionSuccessor());
+    return;
+  }
+
+  // FIXME: we want to look at cond region for getting more accurate results
+  // if the other regions will get a chance to execute.
+  regions.push_back(RegionSuccessor(&this->getReady()));
+  regions.push_back(RegionSuccessor(&this->getSuspend()));
+  regions.push_back(RegionSuccessor(&this->getResume()));
+}
+
+LogicalResult AwaitOp::verify() {
+  if (!isa<ConditionOp>(this->getReady().back().getTerminator()))
+    return emitOpError("ready region must end with cir.condition");
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// CIR defined traits
+//===----------------------------------------------------------------------===//
+
+LogicalResult
+mlir::OpTrait::impl::verifySameFirstOperandAndResultType(Operation *op) {
+  if (failed(verifyAtLeastNOperands(op, 1)) || failed(verifyOneResult(op)))
+    return failure();
+
+  auto type = op->getResult(0).getType();
+  auto opType = op->getOperand(0).getType();
+
+  if (type != opType)
+    return op->emitOpError()
+           << "requires the same type for first operand and result";
+
+  return success();
+}
+
+LogicalResult
+mlir::OpTrait::impl::verifySameSecondOperandAndResultType(Operation *op) {
+  if (failed(verifyAtLeastNOperands(op, 2)) || failed(verifyOneResult(op)))
+    return failure();
+
+  auto type = op->getResult(0).getType();
+  auto opType = op->getOperand(1).getType();
+
+  if (type != opType)
+    return op->emitOpError()
+           << "requires the same type for second operand and result";
+
+  return success();
+}
+
+LogicalResult
+mlir::OpTrait::impl::verifySameFirstSecondOperandAndResultType(Operation *op) {
+  if (failed(verifyAtLeastNOperands(op, 3)) || failed(verifyOneResult(op)))
+    return failure();
+
+  auto checkType = op->getResult(0).getType();
+  if (checkType != op->getOperand(0).getType() &&
+      checkType != op->getOperand(1).getType())
+    return op->emitOpError()
+           << "requires the same type for first, second operand and result";
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// CIR attributes
+// FIXME: move all of these to CIRAttrs.cpp
+//===----------------------------------------------------------------------===//
+
+LogicalResult mlir::cir::ConstArrayAttr::verify(
+    ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError,
+    ::mlir::Type type, Attribute attr, int trailingZerosNum) {
+
+  if (!(mlir::isa<mlir::ArrayAttr>(attr) || mlir::isa<mlir::StringAttr>(attr)))
+    return emitError() << "constant array expects ArrayAttr or StringAttr";
+
+  if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(attr)) {
+    mlir::cir::ArrayType at = mlir::cast<mlir::cir::ArrayType>(type);
+    auto intTy = mlir::dyn_cast<cir::IntType>(at.getEltType());
+
+    // TODO: add CIR type for char.
+    if (!intTy || intTy.getWidth() != 8) {
+      emitError() << "constant array element for string literals expects "
+                     "!cir.int<u, 8> element type";
+      return failure();
+    }
+    return success();
+  }
+
+  assert(mlir::isa<mlir::ArrayAttr>(attr));
+  auto arrayAttr = mlir::cast<mlir::ArrayAttr>(attr);
+  auto at = mlir::cast<ArrayType>(type);
+
+  // Make sure both number of elements and subelement types match type.
+  if (at.getSize() != arrayAttr.size() + trailingZerosNum)
+    return emitError() << "constant array size should match type size";
+  LogicalResult eltTypeCheck = success();
+  arrayAttr.walkImmediateSubElements(
+      [&](Attribute attr) {
+        // Once we find a mismatch, stop there.
+        if (eltTypeCheck.failed())
+          return;
+        auto typedAttr = mlir::dyn_cast<TypedAttr>(attr);
+        if (!typedAttr || typedAttr.getType() != at.getEltType()) {
+          eltTypeCheck = failure();
+          emitError()
+              << "constant array element should match array element type";
+        }
+      },
+      [&](Type type) {});
+  return eltTypeCheck;
+}
+
+::mlir::Attribute ConstArrayAttr::parse(::mlir::AsmParser &parser,
+                                        ::mlir::Type type) {
+  ::mlir::FailureOr<::mlir::Type> resultTy;
+  ::mlir::FailureOr<Attribute> resultVal;
+  ::llvm::SMLoc loc = parser.getCurrentLocation();
+  (void)loc;
+  // Parse literal '<'
+  if (parser.parseLess())
+    return {};
+
+  // Parse variable 'value'
+  resultVal = ::mlir::FieldParser<Attribute>::parse(parser);
+  if (failed(resultVal)) {
+    parser.emitError(
+        parser.getCurrentLocation(),
+        "failed to parse ConstArrayAttr parameter 'value' which is "
+        "to be a `Attribute`");
+    return {};
+  }
+
+  // ArrayAttrrs have per-element type, not the type of the array...
+  if (mlir::dyn_cast<ArrayAttr>(*resultVal)) {
+    // Array has implicit type: infer from const array type.
+    if (parser.parseOptionalColon().failed()) {
+      resultTy = type;
+    } else { // Array has explicit type: parse it.
+      resultTy = ::mlir::FieldParser<::mlir::Type>::parse(parser);
+      if (failed(resultTy)) {
+        parser.emitError(
+            parser.getCurrentLocation(),
+            "failed to parse ConstArrayAttr parameter 'type' which is "
+            "to be a `::mlir::Type`");
+        return {};
+      }
+    }
+  } else {
+    assert(mlir::isa<TypedAttr>(*resultVal) && "IDK");
+    auto ta = mlir::cast<TypedAttr>(*resultVal);
+    resultTy = ta.getType();
+    if (mlir::isa<mlir::NoneType>(*resultTy)) {
+      parser.emitError(parser.getCurrentLocation(),
+                       "expected type declaration for string literal");
+      return {};
+    }
+  }
+
+  auto zeros = 0;
+  if (parser.parseOptionalComma().succeeded()) {
+    if (parser.parseOptionalKeyword("trailing_zeros").succeeded()) {
+      auto typeSize =
+          mlir::cast<mlir::cir::ArrayType>(resultTy.value()).getSize();
+      auto elts = resultVal.value();
+      if (auto str = mlir::dyn_cast<mlir::StringAttr>(elts))
+        zeros = typeSize - str.size();
+      else
+        zeros = typeSize - mlir::cast<mlir::ArrayAttr>(elts).size();
+    } else {
+      return {};
+    }
+  }
+
+  // Parse literal '>'
+  if (parser.parseGreater())
+    return {};
+
+  return parser.getChecked<ConstArrayAttr>(
+      loc, parser.getContext(), resultTy.value(), resultVal.value(), zeros);
+}
+
+void ConstArrayAttr::print(::mlir::AsmPrinter &printer) const {
+  printer << "<";
+  printer.printStrippedAttrOrType(getElts());
+  if (auto zeros = getTrailingZerosNum())
+    printer << ", trailing_zeros";
+  printer << ">";
+}
+
+LogicalResult mlir::cir::ConstVectorAttr::verify(
+    ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError,
+    ::mlir::Type type, mlir::ArrayAttr arrayAttr) {
+
+  if (!mlir::isa<mlir::cir::VectorType>(type)) {
+    return emitError()
+           << "type of cir::ConstVectorAttr is not a cir::VectorType: " << type;
+  }
+  auto vecType = mlir::cast<mlir::cir::VectorType>(type);
+
+  // Do the number of elements match?
+  if (vecType.getSize() != arrayAttr.size()) {
+    return emitError()
+           << "number of constant elements should match vector size";
+  }
+  // Do the types of the elements match?
+  LogicalResult elementTypeCheck = success();
+  arrayAttr.walkImmediateSubElements(
+      [&](Attribute element) {
+        if (elementTypeCheck.failed()) {
+          // An earlier element didn't match
+          return;
+        }
+        auto typedElement = mlir::dyn_cast<TypedAttr>(element);
+        if (!typedElement || typedElement.getType() != vecType.getEltType()) {
+          elementTypeCheck = failure();
+          emitError() << "constant type should match vector element type";
+        }
+      },
+      [&](Type) {});
+  return elementTypeCheck;
+}
+
+::mlir::Attribute ConstVectorAttr::parse(::mlir::AsmParser &parser,
+                                         ::mlir::Type type) {
+  ::mlir::FailureOr<::mlir::Type> resultType;
+  ::mlir::FailureOr<ArrayAttr> resultValue;
+  ::llvm::SMLoc loc = parser.getCurrentLocation();
+
+  // Parse literal '<'
+  if (parser.parseLess()) {
+    return {};
+  }
+
+  // Parse variable 'value'
+  resultValue = ::mlir::FieldParser<ArrayAttr>::parse(parser);
+  if (failed(resultValue)) {
+    parser.emitError(parser.getCurrentLocation(),
+                     "failed to parse ConstVectorAttr parameter 'value' as "
+                     "an attribute");
+    return {};
+  }
+
+  if (parser.parseOptionalColon().failed()) {
+    resultType = type;
+  } else {
+    resultType = ::mlir::FieldParser<::mlir::Type>::parse(parser);
+    if (failed(resultType)) {
+      parser.emitError(parser.getCurrentLocation(),
+                       "failed to parse ConstVectorAttr parameter 'type' as "
+                       "an MLIR type");
+      return {};
+    }
+  }
+
+  // Parse literal '>'
+  if (parser.parseGreater()) {
+    return {};
+  }
+
+  return parser.getChecked<ConstVectorAttr>(
+      loc, parser.getContext(), resultType.value(), resultValue.value());
+}
+
+void ConstVectorAttr::print(::mlir::AsmPrinter &printer) const {
+  printer << "<";
+  printer.printStrippedAttrOrType(getElts());
+  printer << ">";
+}
+
+::mlir::Attribute SignedOverflowBehaviorAttr::parse(::mlir::AsmParser &parser,
+                                                    ::mlir::Type type) {
+  if (parser.parseLess())
+    return {};
+  auto behavior = parseOptionalCIRKeyword(
+      parser, mlir::cir::sob::SignedOverflowBehavior::undefined);
+  if (parser.parseGreater())
+    return {};
+
+  return SignedOverflowBehaviorAttr::get(parser.getContext(), behavior);
+}
+
+void SignedOverflowBehaviorAttr::print(::mlir::AsmPrinter &printer) const {
+  printer << "<";
+  switch (getBehavior()) {
+  case sob::SignedOverflowBehavior::undefined:
+    printer << "undefined";
+    break;
+  case sob::SignedOverflowBehavior::defined:
+    printer << "defined";
+    break;
+  case sob::SignedOverflowBehavior::trapping:
+    printer << "trapping";
+    break;
+  }
+  printer << ">";
+}
+
+LogicalResult TypeInfoAttr::verify(
+    ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError,
+    ::mlir::Type type, ::mlir::ArrayAttr typeinfoData) {
+
+  if (mlir::cir::ConstStructAttr::verify(emitError, type, typeinfoData)
+          .failed())
+    return failure();
+
+  for (auto &member : typeinfoData) {
+    if (llvm::isa<GlobalViewAttr, IntAttr>(member))
+      continue;
+    emitError() << "expected GlobalViewAttr or IntAttr attribute";
+    return failure();
+  }
+
+  return success();
+}
+
+LogicalResult
+VTableAttr::verify(::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError,
+                   ::mlir::Type type, ::mlir::ArrayAttr vtableData) {
+  auto sTy = mlir::dyn_cast_if_present<mlir::cir::StructType>(type);
+  if (!sTy) {
+    emitError() << "expected !cir.struct type result";
+    return failure();
+  }
+  if (sTy.getMembers().empty() || vtableData.empty()) {
+    emitError() << "expected struct type with one or more subtype";
+    return failure();
+  }
+
+  for (size_t i = 0; i < sTy.getMembers().size(); ++i) {
+
+    auto arrayTy = mlir::dyn_cast<mlir::cir::ArrayType>(sTy.getMembers()[i]);
+    auto constArrayAttr =
+        mlir::dyn_cast<mlir::cir::ConstArrayAttr>(vtableData[i]);
+    if (!arrayTy || !constArrayAttr) {
+      emitError() << "expected struct type with one array element";
+      return failure();
+    }
+
+    if (mlir::cir::ConstStructAttr::verify(emitError, type, vtableData)
+            .failed())
+      return failure();
+
+    LogicalResult eltTypeCheck = success();
+    if (auto arrayElts = mlir::dyn_cast<ArrayAttr>(constArrayAttr.getElts())) {
+      arrayElts.walkImmediateSubElements(
+          [&](Attribute attr) {
+            if (mlir::isa<GlobalViewAttr>(attr) ||
+                mlir::isa<ConstPtrAttr>(attr))
+              return;
+            emitError() << "expected GlobalViewAttr attribute";
+            eltTypeCheck = failure();
+          },
+          [&](Type type) {});
+      if (eltTypeCheck.failed()) {
+        return eltTypeCheck;
+      }
+    }
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// CopyOp Definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult CopyOp::verify() {
+
+  // A data layout is required for us to know the number of bytes to be copied.
+  if (!getType().getPointee().hasTrait<DataLayoutTypeInterface::Trait>())
+    return emitError() << "missing data layout for pointee type";
+
+  if (getSrc() == getDst())
+    return emitError() << "source and destination are the same";
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// MemCpyOp Definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult MemCpyOp::verify() {
+  auto voidPtr =
+      cir::PointerType::get(getContext(), cir::VoidType::get(getContext()));
+
+  if (!getLenTy().isUnsigned())
+    return emitError() << "memcpy length must be an unsigned integer";
+
+  if (getSrcTy() != voidPtr || getDstTy() != voidPtr)
+    return emitError() << "memcpy src and dst must be void pointers";
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// GetMemberOp Definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult GetMemberOp::verify() {
+
+  const auto recordTy = dyn_cast<StructType>(getAddrTy().getPointee());
+  if (!recordTy)
+    return emitError() << "expected pointer to a record type";
+
+  if (recordTy.getMembers().size() <= getIndex())
+    return emitError() << "member index out of bounds";
+
+  // FIXME(cir): member type check is disabled for classes as the codegen for
+  // these still need to be patched.
+  if (!recordTy.isClass() &&
+      recordTy.getMembers()[getIndex()] != getResultTy().getPointee())
+    return emitError() << "member type mismatch";
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// GetRuntimeMemberOp Definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult GetRuntimeMemberOp::verify() {
+  auto recordTy =
+      cast<StructType>(cast<PointerType>(getAddr().getType()).getPointee());
+  auto memberPtrTy = getMember().getType();
+
+  if (recordTy != memberPtrTy.getClsTy()) {
+    emitError() << "record type does not match the member pointer type";
+    return mlir::failure();
+  }
+
+  if (getType().getPointee() != memberPtrTy.getMemberTy()) {
+    emitError() << "result type does not match the member pointer type";
+    return mlir::failure();
+  }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// GetMethodOp Definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult GetMethodOp::verify() {
+  auto methodTy = getMethod().getType();
+
+  // Assume objectTy is !cir.ptr<!T>
+  auto objectPtrTy = mlir::cast<mlir::cir::PointerType>(getObject().getType());
+  auto objectTy = objectPtrTy.getPointee();
+
+  if (methodTy.getClsTy() != objectTy) {
+    emitError() << "method class type and object type do not match";
+    return mlir::failure();
+  }
+
+  // Assume methodFuncTy is !cir.func<!Ret (!Args)>
+  auto calleePtrTy = mlir::cast<mlir::cir::PointerType>(getCallee().getType());
+  auto calleeTy = mlir::cast<mlir::cir::FuncType>(calleePtrTy.getPointee());
+  auto methodFuncTy = methodTy.getMemberFuncTy();
+
+  // We verify at here that calleeTy is !cir.func<!Ret (!cir.ptr<!void>, !Args)>
+  // Note that the first parameter type of the callee is !cir.ptr<!void> instead
+  // of !cir.ptr<!T> because the "this" pointer may be adjusted before calling
+  // the callee.
+
+  if (methodFuncTy.getReturnType() != calleeTy.getReturnType()) {
+    emitError() << "method return type and callee return type do not match";
+    return mlir::failure();
+  }
+
+  auto calleeArgsTy = calleeTy.getInputs();
+  auto methodFuncArgsTy = methodFuncTy.getInputs();
+
+  if (calleeArgsTy.empty()) {
+    emitError() << "callee parameter list lacks receiver object ptr";
+    return mlir::failure();
+  }
+
+  auto calleeThisArgPtrTy =
+      mlir::dyn_cast<mlir::cir::PointerType>(calleeArgsTy[0]);
+  if (!calleeThisArgPtrTy ||
+      !mlir::isa<mlir::cir::VoidType>(calleeThisArgPtrTy.getPointee())) {
+    emitError() << "the first parameter of callee must be a void pointer";
+    return mlir::failure();
+  }
+
+  if (calleeArgsTy.slice(1) != methodFuncArgsTy) {
+    emitError() << "callee parameters and method parameters do not match";
+    return mlir::failure();
+  }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// InlineAsmOp Definitions
+//===----------------------------------------------------------------------===//
+
+void cir::InlineAsmOp::print(OpAsmPrinter &p) {
+  p << '(' << getAsmFlavor() << ", ";
+  p.increaseIndent();
+  p.printNewline();
+
+  llvm::SmallVector<std::string, 3> names{"out", "in", "in_out"};
+  auto nameIt = names.begin();
+  auto attrIt = getOperandAttrs().begin();
+
+  for (auto ops : getOperands()) {
+    p << *nameIt << " = ";
+
+    p << '[';
+    llvm::interleaveComma(llvm::make_range(ops.begin(), ops.end()), p,
+                          [&](Value value) {
+                            p.printOperand(value);
+                            p << " : " << value.getType();
+                            if (*attrIt)
+                              p << " (maybe_memory)";
+                            attrIt++;
+                          });
+    p << "],";
+    p.printNewline();
+    ++nameIt;
+  }
+
+  p << "{";
+  p.printString(getAsmString());
+  p << " ";
+  p.printString(getConstraints());
+  p << "}";
+  p.decreaseIndent();
+  p << ')';
+  if (getSideEffects())
+    p << " side_effects";
+
+  llvm::SmallVector<::llvm::StringRef, 2> elidedAttrs;
+  elidedAttrs.push_back("asm_flavor");
+  elidedAttrs.push_back("asm_string");
+  elidedAttrs.push_back("constraints");
+  elidedAttrs.push_back("operand_attrs");
+  elidedAttrs.push_back("operands_segments");
+  elidedAttrs.push_back("side_effects");
+  p.printOptionalAttrDict(getOperation()->getAttrs(), elidedAttrs);
+
+  if (auto v = getRes())
+    p << " -> " << v.getType();
+}
+
+ParseResult cir::InlineAsmOp::parse(OpAsmParser &parser,
+                                    OperationState &result) {
+  llvm::SmallVector<mlir::Attribute> operand_attrs;
+  llvm::SmallVector<int32_t> operandsGroupSizes;
+  std::string asm_string, constraints;
+  Type resType;
+  auto *ctxt = parser.getBuilder().getContext();
+
+  auto error = [&](const Twine &msg) {
+    parser.emitError(parser.getCurrentLocation(), msg);
+    ;
+    return mlir::failure();
+  };
+
+  auto expected = [&](const std::string &c) {
+    return error("expected '" + c + "'");
+  };
+
+  if (parser.parseLParen().failed())
+    return expected("(");
+
+  auto flavor = mlir::FieldParser<AsmFlavor>::parse(parser);
+  if (failed(flavor))
+    return error("Unknown AsmFlavor");
+
+  if (parser.parseComma().failed())
+    return expected(",");
+
+  auto parseValue = [&](Value &v) {
+    OpAsmParser::UnresolvedOperand op;
+
+    if (parser.parseOperand(op) || parser.parseColon())
+      return mlir::failure();
+
+    Type typ;
+    if (parser.parseType(typ).failed())
+      return error("can't parse operand type");
+    llvm::SmallVector<mlir::Value> tmp;
+    if (parser.resolveOperand(op, typ, tmp))
+      return error("can't resolve operand");
+    v = tmp[0];
+    return mlir::success();
+  };
+
+  auto parseOperands = [&](llvm::StringRef name) {
+    if (parser.parseKeyword(name).failed())
+      return error("expected " + name + " operands here");
+    if (parser.parseEqual().failed())
+      return expected("=");
+    if (parser.parseLSquare().failed())
+      return expected("[");
+
+    int size = 0;
+    if (parser.parseOptionalRSquare().succeeded()) {
+      operandsGroupSizes.push_back(size);
+      if (parser.parseComma())
+        return expected(",");
+      return mlir::success();
+    }
+
+    if (parser.parseCommaSeparatedList([&]() {
+          Value val;
+          if (parseValue(val).succeeded()) {
+            result.operands.push_back(val);
+            size++;
+
+            if (parser.parseOptionalLParen().failed()) {
+              operand_attrs.push_back(mlir::Attribute());
+              return mlir::success();
+            }
+
+            if (parser.parseKeyword("maybe_memory").succeeded()) {
+              operand_attrs.push_back(mlir::UnitAttr::get(ctxt));
+              if (parser.parseRParen())
+                return expected(")");
+              return mlir::success();
+            }
+          }
+          return mlir::failure();
+        }))
+      return mlir::failure();
+
+    if (parser.parseRSquare().failed() || parser.parseComma().failed())
+      return expected("]");
+    operandsGroupSizes.push_back(size);
+    return mlir::success();
+  };
+
+  if (parseOperands("out").failed() || parseOperands("in").failed() ||
+      parseOperands("in_out").failed())
+    return error("failed to parse operands");
+
+  if (parser.parseLBrace())
+    return expected("{");
+  if (parser.parseString(&asm_string))
+    return error("asm string parsing failed");
+  if (parser.parseString(&constraints))
+    return error("constraints string parsing failed");
+  if (parser.parseRBrace())
+    return expected("}");
+  if (parser.parseRParen())
+    return expected(")");
+
+  if (parser.parseOptionalKeyword("side_effects").succeeded())
+    result.attributes.set("side_effects", UnitAttr::get(ctxt));
+
+  if (parser.parseOptionalArrow().failed())
+    return mlir::failure();
+
+  if (parser.parseType(resType).failed())
+    return mlir::failure();
+
+  if (parser.parseOptionalAttrDict(result.attributes))
+    return mlir::failure();
+
+  result.attributes.set("asm_flavor", AsmFlavorAttr::get(ctxt, *flavor));
+  result.attributes.set("asm_string", StringAttr::get(ctxt, asm_string));
+  result.attributes.set("constraints", StringAttr::get(ctxt, constraints));
+  result.attributes.set("operand_attrs", ArrayAttr::get(ctxt, operand_attrs));
+  result.getOrAddProperties<InlineAsmOp::Properties>().operands_segments =
+      parser.getBuilder().getDenseI32ArrayAttr(operandsGroupSizes);
+  if (resType)
+    result.addTypes(TypeRange{resType});
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic Definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult AtomicFetch::verify() {
+  if (getBinop() == mlir::cir::AtomicFetchKind::Add ||
+      getBinop() == mlir::cir::AtomicFetchKind::Sub)
+    return mlir::success();
+
+  if (!mlir::isa<mlir::cir::IntType>(getVal().getType()))
+    return emitError() << "only operates on integer values";
+
+  return mlir::success();
+}
+
+LogicalResult BinOp::verify() {
+  bool noWrap = getNoUnsignedWrap() || getNoSignedWrap();
+
+  if (!isa<mlir::cir::IntType>(getType()) && noWrap)
+    return emitError()
+           << "only operations on integer values may have nsw/nuw flags";
+
+  bool noWrapOps = getKind() == mlir::cir::BinOpKind::Add ||
+                   getKind() == mlir::cir::BinOpKind::Sub ||
+                   getKind() == mlir::cir::BinOpKind::Mul;
+
+  if (noWrap && !noWrapOps)
+    return emitError() << "The nsw/nuw flags are applicable to opcodes: 'add', "
+                          "'sub' and 'mul'";
+
+  bool complexOps = getKind() == mlir::cir::BinOpKind::Add ||
+                    getKind() == mlir::cir::BinOpKind::Sub;
+  if (isa<mlir::cir::ComplexType>(getType()) && !complexOps)
+    return emitError()
+           << "cir.binop can only represent 'add' and 'sub' on complex numbers";
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// LabelOp Definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult LabelOp::verify() {
+  auto *op = getOperation();
+  auto *blk = op->getBlock();
+  if (&blk->front() != op)
+    return emitError() << "must be the first operation in a block";
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// EhTypeIdOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult EhTypeIdOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
+  auto op = symbolTable.lookupNearestSymbolFrom(*this, getTypeSymAttr());
+  if (!isa<GlobalOp>(op))
+    return emitOpError("'")
+           << getTypeSym() << "' does not reference a valid cir.global";
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// CatchParamOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult cir::CatchParamOp::verify() {
+  if (getExceptionPtr()) {
+    auto kind = getKind();
+    if (!kind || *kind != mlir::cir::CatchParamKind::begin)
+      return emitOpError("needs 'begin' to work with exception pointer");
+    return success();
+  }
+  if (!getKind() && !(*this)->getParentOfType<mlir::cir::TryOp>())
+    return emitOpError("without 'kind' requires 'cir.try' surrounding scope");
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// TableGen'd op method definitions
+//===----------------------------------------------------------------------===//
+
+#define GET_OP_CLASSES
+#include "clang/CIR/Dialect/IR/CIROps.cpp.inc"
diff --git a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp
new file mode 100644
index 000000000000..2ced31cbbad8
--- /dev/null
+++ b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp
@@ -0,0 +1,184 @@
+//====- CIRMemorySlot.cpp - MemorySlot interfaces -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements MemorySlot-related interfaces for CIR dialect
+// operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/IR/Matchers.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Interfaces/DataLayoutInterfaces.h"
+#include "mlir/Interfaces/MemorySlotInterfaces.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+using namespace mlir;
+
+/// Conditions the deletion of the operation to the removal of all its uses.
+static bool forwardToUsers(Operation *op,
+                           SmallVectorImpl<OpOperand *> &newBlockingUses) {
+  for (Value result : op->getResults())
+    for (OpOperand &use : result.getUses())
+      newBlockingUses.push_back(&use);
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Interfaces for AllocaOp
+//===----------------------------------------------------------------------===//
+
+llvm::SmallVector<MemorySlot> cir::AllocaOp::getPromotableSlots() {
+  return {MemorySlot{getResult(), getAllocaType()}};
+}
+
+Value cir::AllocaOp::getDefaultValue(const MemorySlot &slot,
+                                     OpBuilder &builder) {
+  return builder.create<cir::UndefOp>(getLoc(), slot.elemType);
+}
+
+void cir::AllocaOp::handleBlockArgument(const MemorySlot &slot,
+                                        BlockArgument argument,
+                                        OpBuilder &builder) {}
+
+std::optional<PromotableAllocationOpInterface>
+cir::AllocaOp::handlePromotionComplete(const MemorySlot &slot,
+                                       Value defaultValue,
+                                       OpBuilder &builder) {
+  if (defaultValue && defaultValue.use_empty())
+    defaultValue.getDefiningOp()->erase();
+  this->erase();
+  return std::nullopt;
+}
+
+//===----------------------------------------------------------------------===//
+// Interfaces for LoadOp
+//===----------------------------------------------------------------------===//
+
+bool cir::LoadOp::loadsFrom(const MemorySlot &slot) {
+  return getAddr() == slot.ptr;
+}
+
+bool cir::LoadOp::storesTo(const MemorySlot &slot) { return false; }
+
+Value cir::LoadOp::getStored(const MemorySlot &slot, OpBuilder &builder,
+                             Value reachingDef, const DataLayout &dataLayout) {
+  llvm_unreachable("getStored should not be called on LoadOp");
+}
+
+bool cir::LoadOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses,
+    const DataLayout &dataLayout) {
+  if (blockingUses.size() != 1)
+    return false;
+  Value blockingUse = (*blockingUses.begin())->get();
+  return blockingUse == slot.ptr && getAddr() == slot.ptr &&
+         getResult().getType() == slot.elemType;
+}
+
+DeletionKind cir::LoadOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder, Value reachingDefinition,
+    const DataLayout &dataLayout) {
+  getResult().replaceAllUsesWith(reachingDefinition);
+  return DeletionKind::Delete;
+}
+
+//===----------------------------------------------------------------------===//
+// Interfaces for StoreOp
+//===----------------------------------------------------------------------===//
+
+bool cir::StoreOp::loadsFrom(const MemorySlot &slot) { return false; }
+
+bool cir::StoreOp::storesTo(const MemorySlot &slot) {
+  return getAddr() == slot.ptr;
+}
+
+Value cir::StoreOp::getStored(const MemorySlot &slot, OpBuilder &builder,
+                              Value reachingDef, const DataLayout &dataLayout) {
+  return getValue();
+}
+
+bool cir::StoreOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses,
+    const DataLayout &dataLayout) {
+  if (blockingUses.size() != 1)
+    return false;
+  Value blockingUse = (*blockingUses.begin())->get();
+  return blockingUse == slot.ptr && getAddr() == slot.ptr &&
+         getValue() != slot.ptr && slot.elemType == getValue().getType();
+}
+
+DeletionKind cir::StoreOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder, Value reachingDefinition,
+    const DataLayout &dataLayout) {
+  return DeletionKind::Delete;
+}
+
+//===----------------------------------------------------------------------===//
+// Interfaces for CopyOp
+//===----------------------------------------------------------------------===//
+
+bool cir::CopyOp::loadsFrom(const MemorySlot &slot) {
+  return getSrc() == slot.ptr;
+}
+
+bool cir::CopyOp::storesTo(const MemorySlot &slot) {
+  return getDst() == slot.ptr;
+}
+
+Value cir::CopyOp::getStored(const MemorySlot &slot, OpBuilder &builder,
+                             Value reachingDef, const DataLayout &dataLayout) {
+  return builder.create<cir::LoadOp>(getLoc(), slot.elemType, getSrc());
+}
+
+DeletionKind cir::CopyOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder, Value reachingDefinition,
+    const DataLayout &dataLayout) {
+  if (loadsFrom(slot))
+    builder.create<cir::StoreOp>(getLoc(), reachingDefinition, getDst(), false,
+                                  mlir::IntegerAttr{},
+                                  mlir::cir::MemOrderAttr());
+  return DeletionKind::Delete;
+}
+
+bool cir::CopyOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses,
+    const DataLayout &dataLayout) {
+
+  if (getDst() == getSrc())
+    return false;
+
+  return getLength() == dataLayout.getTypeSize(slot.elemType);
+}
+
+//===----------------------------------------------------------------------===//
+// Interfaces for CastOp
+//===----------------------------------------------------------------------===//
+
+bool cir::CastOp::canUsesBeRemoved(
+    const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses,
+    const DataLayout &dataLayout) {
+  if (getKind() == cir::CastKind::bitcast)
+    return forwardToUsers(*this, newBlockingUses);
+  else
+    return false;
+}
+
+DeletionKind cir::CastOp::removeBlockingUses(
+    const SmallPtrSetImpl<OpOperand *> &blockingUses, OpBuilder &builder) {
+  return DeletionKind::Delete;
+}
diff --git a/clang/lib/CIR/Dialect/IR/CIROpenCLAttrs.cpp b/clang/lib/CIR/Dialect/IR/CIROpenCLAttrs.cpp
new file mode 100644
index 000000000000..e16aad6d6867
--- /dev/null
+++ b/clang/lib/CIR/Dialect/IR/CIROpenCLAttrs.cpp
@@ -0,0 +1,116 @@
+//===- CIROpenCLAttrs.cpp - OpenCL specific attributes in CIR -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the OpenCL-specific attrs in the CIR dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/DialectImplementation.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+using namespace mlir;
+using namespace mlir::cir;
+
+//===----------------------------------------------------------------------===//
+// OpenCLKernelMetadataAttr definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult OpenCLKernelMetadataAttr::verify(
+    ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError,
+    ArrayAttr workGroupSizeHint, ArrayAttr reqdWorkGroupSize,
+    TypeAttr vecTypeHint, std::optional<bool> vecTypeHintSignedness,
+    IntegerAttr intelReqdSubGroupSize) {
+  // If no field is present, the attribute is considered invalid.
+  if (!workGroupSizeHint && !reqdWorkGroupSize && !vecTypeHint &&
+      !vecTypeHintSignedness && !intelReqdSubGroupSize) {
+    return emitError()
+           << "metadata attribute without any field present is invalid";
+  }
+
+  // Check for 3-dim integer tuples
+  auto is3dimIntTuple = [](ArrayAttr arr) {
+    auto isInt = [](Attribute dim) { return mlir::isa<IntegerAttr>(dim); };
+    return arr.size() == 3 && llvm::all_of(arr, isInt);
+  };
+  if (workGroupSizeHint && !is3dimIntTuple(workGroupSizeHint)) {
+    return emitError()
+           << "work_group_size_hint must have exactly 3 integer elements";
+  }
+  if (reqdWorkGroupSize && !is3dimIntTuple(reqdWorkGroupSize)) {
+    return emitError()
+           << "reqd_work_group_size must have exactly 3 integer elements";
+  }
+
+  // Check for co-presence of vecTypeHintSignedness
+  if (!!vecTypeHint != vecTypeHintSignedness.has_value()) {
+    return emitError() << "vec_type_hint_signedness should be present if and "
+                          "only if vec_type_hint is set";
+  }
+
+  if (vecTypeHint) {
+    Type vecTypeHintValue = vecTypeHint.getValue();
+    if (mlir::isa<cir::CIRDialect>(vecTypeHintValue.getDialect())) {
+      // Check for signedness alignment in CIR
+      if (isSignedHint(vecTypeHintValue) != vecTypeHintSignedness) {
+        return emitError() << "vec_type_hint_signedness must match the "
+                              "signedness of the vec_type_hint type";
+      }
+      // Check for the dialect of type hint
+    } else if (!LLVM::isCompatibleType(vecTypeHintValue)) {
+      return emitError() << "vec_type_hint must be a type from the CIR or LLVM "
+                            "dialect";
+    }
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// OpenCLKernelArgMetadataAttr definitions
+//===----------------------------------------------------------------------===//
+
+LogicalResult OpenCLKernelArgMetadataAttr::verify(
+    ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError,
+    ArrayAttr addrSpaces, ArrayAttr accessQuals, ArrayAttr types,
+    ArrayAttr baseTypes, ArrayAttr typeQuals, ArrayAttr argNames) {
+  auto isIntArray = [](ArrayAttr elt) {
+    return llvm::all_of(
+        elt, [](Attribute elt) { return mlir::isa<IntegerAttr>(elt); });
+  };
+  auto isStrArray = [](ArrayAttr elt) {
+    return llvm::all_of(
+        elt, [](Attribute elt) { return mlir::isa<StringAttr>(elt); });
+  };
+
+  if (!isIntArray(addrSpaces))
+    return emitError() << "addr_space must be integer arrays";
+  if (!llvm::all_of<ArrayRef<ArrayAttr>>(
+          {accessQuals, types, baseTypes, typeQuals}, isStrArray))
+    return emitError()
+           << "access_qual, type, base_type, type_qual must be string arrays";
+  if (argNames && !isStrArray(argNames)) {
+    return emitError() << "name must be a string array";
+  }
+
+  if (!llvm::all_of<ArrayRef<ArrayAttr>>(
+          {addrSpaces, accessQuals, types, baseTypes, typeQuals, argNames},
+          [&](ArrayAttr arr) {
+            return !arr || arr.size() == addrSpaces.size();
+          })) {
+    return emitError() << "all arrays must have the same number of elements";
+  }
+  return success();
+}
diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
new file mode 100644
index 000000000000..5945274f515a
--- /dev/null
+++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
@@ -0,0 +1,1051 @@
+//===- CIRTypes.cpp - MLIR CIR Types --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the types in the CIR dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIRTypesDetails.h"
+#include "clang/CIR/MissingFeatures.h"
+
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Diagnostics.h"
+#include "mlir/IR/DialectImplementation.h"
+#include "mlir/Interfaces/DataLayoutInterfaces.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/LogicalResult.h"
+
+#include "clang/CIR/Interfaces/ASTAttrInterfaces.h"
+#include "clang/CIR/Interfaces/CIRFPTypeInterface.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <optional>
+
+using cir::MissingFeatures;
+
+//===----------------------------------------------------------------------===//
+// CIR Custom Parser/Printer Signatures
+//===----------------------------------------------------------------------===//
+
+static mlir::ParseResult
+parseFuncTypeArgs(mlir::AsmParser &p, llvm::SmallVector<mlir::Type> &params,
+                  bool &isVarArg);
+static void printFuncTypeArgs(mlir::AsmPrinter &p,
+                              mlir::ArrayRef<mlir::Type> params, bool isVarArg);
+
+static mlir::ParseResult parsePointerAddrSpace(mlir::AsmParser &p,
+                                               mlir::Attribute &addrSpaceAttr);
+static void printPointerAddrSpace(mlir::AsmPrinter &p,
+                                  mlir::Attribute addrSpaceAttr);
+
+//===----------------------------------------------------------------------===//
+// Get autogenerated stuff
+//===----------------------------------------------------------------------===//
+
+#define GET_TYPEDEF_CLASSES
+#include "clang/CIR/Dialect/IR/CIROpsTypes.cpp.inc"
+
+using namespace mlir;
+using namespace mlir::cir;
+
+//===----------------------------------------------------------------------===//
+// General CIR parsing / printing
+//===----------------------------------------------------------------------===//
+
+Type CIRDialect::parseType(DialectAsmParser &parser) const {
+  llvm::SMLoc typeLoc = parser.getCurrentLocation();
+  StringRef mnemonic;
+  Type genType;
+
+  // Try to parse as a tablegen'd type.
+  OptionalParseResult parseResult =
+      generatedTypeParser(parser, &mnemonic, genType);
+  if (parseResult.has_value())
+    return genType;
+
+  // Type is not tablegen'd: try to parse as a raw C++ type.
+  return StringSwitch<function_ref<Type()>>(mnemonic)
+      .Case("struct", [&] { return StructType::parse(parser); })
+      .Default([&] {
+        parser.emitError(typeLoc) << "unknown CIR type: " << mnemonic;
+        return Type();
+      })();
+}
+
+void CIRDialect::printType(Type type, DialectAsmPrinter &os) const {
+  // Try to print as a tablegen'd type.
+  if (generatedTypePrinter(type, os).succeeded())
+    return;
+
+  // Type is not tablegen'd: try printing as a raw C++ type.
+  TypeSwitch<Type>(type)
+      .Case<StructType>([&](StructType type) {
+        os << type.getMnemonic();
+        type.print(os);
+      })
+      .Default([](Type) {
+        llvm::report_fatal_error("printer is missing a handler for this type");
+      });
+}
+
+Type BoolType::parse(mlir::AsmParser &parser) {
+  return get(parser.getContext());
+}
+
+void BoolType::print(mlir::AsmPrinter &printer) const {}
+
+//===----------------------------------------------------------------------===//
+// StructType Definitions
+//===----------------------------------------------------------------------===//
+
+/// Return the largest member of in the type.
+///
+/// Recurses into union members never returning a union as the largest member.
+Type StructType::getLargestMember(const ::mlir::DataLayout &dataLayout) const {
+  if (!layoutInfo)
+    computeSizeAndAlignment(dataLayout);
+  return mlir::cast<mlir::cir::StructLayoutAttr>(layoutInfo).getLargestMember();
+}
+
+Type StructType::parse(mlir::AsmParser &parser) {
+  FailureOr<AsmParser::CyclicParseReset> cyclicParseGuard;
+  const auto loc = parser.getCurrentLocation();
+  const auto eLoc = parser.getEncodedSourceLoc(loc);
+  bool packed = false;
+  RecordKind kind;
+  auto *context = parser.getContext();
+
+  if (parser.parseLess())
+    return {};
+
+  // TODO(cir): in the future we should probably separate types for different
+  // source language declarations such as cir.class, cir.union, and cir.struct
+  if (parser.parseOptionalKeyword("struct").succeeded())
+    kind = RecordKind::Struct;
+  else if (parser.parseOptionalKeyword("union").succeeded())
+    kind = RecordKind::Union;
+  else if (parser.parseOptionalKeyword("class").succeeded())
+    kind = RecordKind::Class;
+  else {
+    parser.emitError(loc, "unknown struct type");
+    return {};
+  }
+
+  mlir::StringAttr name;
+  parser.parseOptionalAttribute(name);
+
+  // Is a self reference: ensure referenced type was parsed.
+  if (name && parser.parseOptionalGreater().succeeded()) {
+    auto type = getChecked(eLoc, context, name, kind);
+    if (succeeded(parser.tryStartCyclicParse(type))) {
+      parser.emitError(loc, "invalid self-reference within record");
+      return {};
+    }
+    return type;
+  }
+
+  // Is a named record definition: ensure name has not been parsed yet.
+  if (name) {
+    auto type = getChecked(eLoc, context, name, kind);
+    cyclicParseGuard = parser.tryStartCyclicParse(type);
+    if (failed(cyclicParseGuard)) {
+      parser.emitError(loc, "record already defined");
+      return {};
+    }
+  }
+
+  if (parser.parseOptionalKeyword("packed").succeeded())
+    packed = true;
+
+  // Parse record members or lack thereof.
+  bool incomplete = true;
+  llvm::SmallVector<mlir::Type> members;
+  if (parser.parseOptionalKeyword("incomplete").failed()) {
+    incomplete = false;
+    const auto delimiter = AsmParser::Delimiter::Braces;
+    const auto parseElementFn = [&parser, &members]() {
+      return parser.parseType(members.emplace_back());
+    };
+    if (parser.parseCommaSeparatedList(delimiter, parseElementFn).failed())
+      return {};
+  }
+
+  // Parse optional AST attribute. This is just a formality for now, since CIR
+  // cannot yet read serialized AST.
+  mlir::cir::ASTRecordDeclAttr ast = nullptr;
+  parser.parseOptionalAttribute(ast);
+
+  if (parser.parseGreater())
+    return {};
+
+  // Try to create the proper record type.
+  ArrayRef<mlir::Type> membersRef(members); // Needed for template deduction.
+  mlir::Type type = {};
+  if (name && incomplete) { // Identified & incomplete
+    type = getChecked(eLoc, context, name, kind);
+  } else if (name && !incomplete) { // Identified & complete
+    type = getChecked(eLoc, context, membersRef, name, packed, kind);
+    // If the record has a self-reference, its type already exists in a
+    // incomplete state. In this case, we must complete it.
+    if (mlir::cast<StructType>(type).isIncomplete())
+      mlir::cast<StructType>(type).complete(membersRef, packed, ast);
+  } else if (!name && !incomplete) { // anonymous & complete
+    type = getChecked(eLoc, context, membersRef, packed, kind);
+  } else { // anonymous & incomplete
+    parser.emitError(loc, "anonymous structs must be complete");
+    return {};
+  }
+
+  return type;
+}
+
+void StructType::print(mlir::AsmPrinter &printer) const {
+  FailureOr<AsmPrinter::CyclicPrintReset> cyclicPrintGuard;
+  printer << '<';
+
+  switch (getKind()) {
+  case RecordKind::Struct:
+    printer << "struct ";
+    break;
+  case RecordKind::Union:
+    printer << "union ";
+    break;
+  case RecordKind::Class:
+    printer << "class ";
+    break;
+  }
+
+  if (getName())
+    printer << getName();
+
+  // Current type has already been printed: print as self reference.
+  cyclicPrintGuard = printer.tryStartCyclicPrint(*this);
+  if (failed(cyclicPrintGuard)) {
+    printer << '>';
+    return;
+  }
+
+  // Type not yet printed: continue printing the entire record.
+  printer << ' ';
+
+  if (getPacked())
+    printer << "packed ";
+
+  if (isIncomplete()) {
+    printer << "incomplete";
+  } else {
+    printer << "{";
+    llvm::interleaveComma(getMembers(), printer);
+    printer << "}";
+  }
+
+  if (getAst()) {
+    printer << " ";
+    printer.printAttribute(getAst());
+  }
+
+  printer << '>';
+}
+
+mlir::LogicalResult
+StructType::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
+                   llvm::ArrayRef<mlir::Type> members, mlir::StringAttr name,
+                   bool incomplete, bool packed,
+                   mlir::cir::StructType::RecordKind kind,
+                   ASTRecordDeclInterface ast) {
+  if (name && name.getValue().empty()) {
+    emitError() << "identified structs cannot have an empty name";
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+void StructType::dropAst() { getImpl()->ast = nullptr; }
+StructType StructType::get(::mlir::MLIRContext *context, ArrayRef<Type> members,
+                           StringAttr name, bool packed, RecordKind kind,
+                           ASTRecordDeclInterface ast) {
+  return Base::get(context, members, name, /*incomplete=*/false, packed, kind,
+                   ast);
+}
+
+StructType StructType::getChecked(
+    ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError,
+    ::mlir::MLIRContext *context, ArrayRef<Type> members, StringAttr name,
+    bool packed, RecordKind kind, ASTRecordDeclInterface ast) {
+  return Base::getChecked(emitError, context, members, name,
+                          /*incomplete=*/false, packed, kind, ast);
+}
+
+StructType StructType::get(::mlir::MLIRContext *context, StringAttr name,
+                           RecordKind kind) {
+  return Base::get(context, /*members=*/ArrayRef<Type>{}, name,
+                   /*incomplete=*/true, /*packed=*/false, kind,
+                   /*ast=*/ASTRecordDeclInterface{});
+}
+
+StructType StructType::getChecked(
+    ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError,
+    ::mlir::MLIRContext *context, StringAttr name, RecordKind kind) {
+  return Base::getChecked(emitError, context, ArrayRef<Type>{}, name,
+                          /*incomplete=*/true, /*packed=*/false, kind,
+                          ASTRecordDeclInterface{});
+}
+
+StructType StructType::get(::mlir::MLIRContext *context, ArrayRef<Type> members,
+                           bool packed, RecordKind kind,
+                           ASTRecordDeclInterface ast) {
+  return Base::get(context, members, StringAttr{}, /*incomplete=*/false, packed,
+                   kind, ast);
+}
+
+StructType StructType::getChecked(
+    ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError,
+    ::mlir::MLIRContext *context, ArrayRef<Type> members, bool packed,
+    RecordKind kind, ASTRecordDeclInterface ast) {
+  return Base::getChecked(emitError, context, members, StringAttr{},
+                          /*incomplete=*/false, packed, kind, ast);
+}
+
+::llvm::ArrayRef<mlir::Type> StructType::getMembers() const {
+  return getImpl()->members;
+}
+
+bool StructType::isIncomplete() const { return getImpl()->incomplete; }
+
+mlir::StringAttr StructType::getName() const { return getImpl()->name; }
+
+bool StructType::getIncomplete() const { return getImpl()->incomplete; }
+
+bool StructType::getPacked() const { return getImpl()->packed; }
+
+mlir::cir::StructType::RecordKind StructType::getKind() const {
+  return getImpl()->kind;
+}
+
+ASTRecordDeclInterface StructType::getAst() const { return getImpl()->ast; }
+
+void StructType::complete(ArrayRef<Type> members, bool packed,
+                          ASTRecordDeclInterface ast) {
+  if (mutate(members, packed, ast).failed())
+    llvm_unreachable("failed to complete struct");
+}
+
+bool StructType::isLayoutIdentical(const StructType &other) {
+  if (getImpl() == other.getImpl())
+    return true;
+
+  if (getPacked() != other.getPacked())
+    return false;
+
+  return getMembers() == other.getMembers();
+}
+
+//===----------------------------------------------------------------------===//
+// Data Layout information for types
+//===----------------------------------------------------------------------===//
+
+llvm::TypeSize
+BoolType::getTypeSizeInBits(const ::mlir::DataLayout &dataLayout,
+                            ::mlir::DataLayoutEntryListRef params) const {
+  return llvm::TypeSize::getFixed(8);
+}
+
+uint64_t
+BoolType::getABIAlignment(const ::mlir::DataLayout &dataLayout,
+                          ::mlir::DataLayoutEntryListRef params) const {
+  return 1;
+}
+
+uint64_t
+BoolType::getPreferredAlignment(const ::mlir::DataLayout &dataLayout,
+                                ::mlir::DataLayoutEntryListRef params) const {
+  return 1;
+}
+
+llvm::TypeSize
+PointerType::getTypeSizeInBits(const ::mlir::DataLayout &dataLayout,
+                               ::mlir::DataLayoutEntryListRef params) const {
+  // FIXME: improve this in face of address spaces
+  return llvm::TypeSize::getFixed(64);
+}
+
+uint64_t
+PointerType::getABIAlignment(const ::mlir::DataLayout &dataLayout,
+                             ::mlir::DataLayoutEntryListRef params) const {
+  // FIXME: improve this in face of address spaces
+  return 8;
+}
+
+uint64_t PointerType::getPreferredAlignment(
+    const ::mlir::DataLayout &dataLayout,
+    ::mlir::DataLayoutEntryListRef params) const {
+  // FIXME: improve this in face of address spaces
+  return 8;
+}
+
+llvm::TypeSize
+DataMemberType::getTypeSizeInBits(const ::mlir::DataLayout &dataLayout,
+                                  ::mlir::DataLayoutEntryListRef params) const {
+  // FIXME: consider size differences under different ABIs
+  assert(!MissingFeatures::cxxABI());
+  return llvm::TypeSize::getFixed(64);
+}
+
+uint64_t
+DataMemberType::getABIAlignment(const ::mlir::DataLayout &dataLayout,
+                                ::mlir::DataLayoutEntryListRef params) const {
+  // FIXME: consider alignment differences under different ABIs
+  assert(!MissingFeatures::cxxABI());
+  return 8;
+}
+
+uint64_t DataMemberType::getPreferredAlignment(
+    const ::mlir::DataLayout &dataLayout,
+    ::mlir::DataLayoutEntryListRef params) const {
+  // FIXME: consider alignment differences under different ABIs
+  assert(!MissingFeatures::cxxABI());
+  return 8;
+}
+
+llvm::TypeSize
+ArrayType::getTypeSizeInBits(const ::mlir::DataLayout &dataLayout,
+                             ::mlir::DataLayoutEntryListRef params) const {
+  return getSize() * dataLayout.getTypeSizeInBits(getEltType());
+}
+
+uint64_t
+ArrayType::getABIAlignment(const ::mlir::DataLayout &dataLayout,
+                           ::mlir::DataLayoutEntryListRef params) const {
+  return dataLayout.getTypeABIAlignment(getEltType());
+}
+
+uint64_t
+ArrayType::getPreferredAlignment(const ::mlir::DataLayout &dataLayout,
+                                 ::mlir::DataLayoutEntryListRef params) const {
+  return dataLayout.getTypePreferredAlignment(getEltType());
+}
+
+llvm::TypeSize mlir::cir::VectorType::getTypeSizeInBits(
+    const ::mlir::DataLayout &dataLayout,
+    ::mlir::DataLayoutEntryListRef params) const {
+  return llvm::TypeSize::getFixed(getSize() *
+                                  dataLayout.getTypeSizeInBits(getEltType()));
+}
+
+uint64_t mlir::cir::VectorType::getABIAlignment(
+    const ::mlir::DataLayout &dataLayout,
+    ::mlir::DataLayoutEntryListRef params) const {
+  return llvm::NextPowerOf2(dataLayout.getTypeSizeInBits(*this));
+}
+
+uint64_t mlir::cir::VectorType::getPreferredAlignment(
+    const ::mlir::DataLayout &dataLayout,
+    ::mlir::DataLayoutEntryListRef params) const {
+  return llvm::NextPowerOf2(dataLayout.getTypeSizeInBits(*this));
+}
+
+llvm::TypeSize
+StructType::getTypeSizeInBits(const ::mlir::DataLayout &dataLayout,
+                              ::mlir::DataLayoutEntryListRef params) const {
+  if (!layoutInfo)
+    computeSizeAndAlignment(dataLayout);
+  return llvm::TypeSize::getFixed(
+      mlir::cast<mlir::cir::StructLayoutAttr>(layoutInfo).getSize() * 8);
+}
+
+uint64_t
+StructType::getABIAlignment(const ::mlir::DataLayout &dataLayout,
+                            ::mlir::DataLayoutEntryListRef params) const {
+  if (!layoutInfo)
+    computeSizeAndAlignment(dataLayout);
+  return mlir::cast<mlir::cir::StructLayoutAttr>(layoutInfo).getAlignment();
+}
+
+uint64_t
+StructType::getPreferredAlignment(const ::mlir::DataLayout &dataLayout,
+                                  ::mlir::DataLayoutEntryListRef params) const {
+  llvm_unreachable("NYI");
+}
+
+bool StructType::isPadded(const ::mlir::DataLayout &dataLayout) const {
+  if (!layoutInfo)
+    computeSizeAndAlignment(dataLayout);
+  return mlir::cast<mlir::cir::StructLayoutAttr>(layoutInfo).getPadded();
+}
+
+uint64_t StructType::getElementOffset(const ::mlir::DataLayout &dataLayout,
+                                      unsigned idx) const {
+  assert(idx < getMembers().size() && "access not valid");
+  if (!layoutInfo)
+    computeSizeAndAlignment(dataLayout);
+  auto offsets =
+      mlir::cast<mlir::cir::StructLayoutAttr>(layoutInfo).getOffsets();
+  auto intAttr = mlir::cast<mlir::IntegerAttr>(offsets[idx]);
+  return intAttr.getInt();
+}
+
+void StructType::computeSizeAndAlignment(
+    const ::mlir::DataLayout &dataLayout) const {
+  assert(isComplete() && "Cannot get layout of incomplete structs");
+  // Do not recompute.
+  if (layoutInfo)
+    return;
+
+  // This is a similar algorithm to LLVM's StructLayout.
+  unsigned structSize = 0;
+  llvm::Align structAlignment{1};
+  bool isPadded = false;
+  unsigned numElements = getNumElements();
+  auto members = getMembers();
+  mlir::Type largestMember;
+  unsigned largestMemberSize = 0;
+  SmallVector<mlir::Attribute, 4> memberOffsets;
+
+  // Loop over each of the elements, placing them in memory.
+  memberOffsets.reserve(numElements);
+  for (unsigned i = 0, e = numElements; i != e; ++i) {
+    auto ty = members[i];
+
+    // Found a nested union: recurse into it to fetch its largest member.
+    auto structMember = mlir::dyn_cast<StructType>(ty);
+    if (structMember && structMember.isUnion()) {
+      auto candidate = structMember.getLargestMember(dataLayout);
+      if (dataLayout.getTypeSize(candidate) > largestMemberSize) {
+        largestMember = candidate;
+        largestMemberSize = dataLayout.getTypeSize(largestMember);
+      }
+    } else if (dataLayout.getTypeSize(ty) > largestMemberSize) {
+      largestMember = ty;
+      largestMemberSize = dataLayout.getTypeSize(largestMember);
+    }
+
+    // This matches LLVM since it uses the ABI instead of preferred alignment.
+    const llvm::Align tyAlign =
+        llvm::Align(getPacked() ? 1 : dataLayout.getTypeABIAlignment(ty));
+
+    // Add padding if necessary to align the data element properly.
+    if (!llvm::isAligned(tyAlign, structSize)) {
+      isPadded = true;
+      structSize = llvm::alignTo(structSize, tyAlign);
+    }
+
+    // Keep track of maximum alignment constraint.
+    structAlignment = std::max(tyAlign, structAlignment);
+
+    // Struct size up to each element is the element offset.
+    memberOffsets.push_back(mlir::IntegerAttr::get(
+        mlir::IntegerType::get(getContext(), 32), structSize));
+
+    // Consume space for this data item
+    structSize += dataLayout.getTypeSize(ty);
+  }
+
+  // For unions, the size and aligment is that of the largest element.
+  if (isUnion()) {
+    structSize = largestMemberSize;
+    isPadded = false;
+  } else {
+    // Add padding to the end of the struct so that it could be put in an array
+    // and all array elements would be aligned correctly.
+    if (!llvm::isAligned(structAlignment, structSize)) {
+      isPadded = true;
+      structSize = llvm::alignTo(structSize, structAlignment);
+    }
+  }
+
+  auto offsets = mlir::ArrayAttr::get(getContext(), memberOffsets);
+  layoutInfo = mlir::cir::StructLayoutAttr::get(
+      getContext(), structSize, structAlignment.value(), isPadded,
+      largestMember, offsets);
+}
+
+//===----------------------------------------------------------------------===//
+// IntType Definitions
+//===----------------------------------------------------------------------===//
+
+Type IntType::parse(mlir::AsmParser &parser) {
+  auto *context = parser.getBuilder().getContext();
+  auto loc = parser.getCurrentLocation();
+  bool isSigned;
+  unsigned width;
+
+  if (parser.parseLess())
+    return {};
+
+  // Fetch integer sign.
+  llvm::StringRef sign;
+  if (parser.parseKeyword(&sign))
+    return {};
+  if (sign == "s")
+    isSigned = true;
+  else if (sign == "u")
+    isSigned = false;
+  else {
+    parser.emitError(loc, "expected 's' or 'u'");
+    return {};
+  }
+
+  if (parser.parseComma())
+    return {};
+
+  // Fetch integer size.
+  if (parser.parseInteger(width))
+    return {};
+  if (width < 1 || width > 64) {
+    parser.emitError(loc, "expected integer width to be from 1 up to 64");
+    return {};
+  }
+
+  if (parser.parseGreater())
+    return {};
+
+  return IntType::get(context, width, isSigned);
+}
+
+void IntType::print(mlir::AsmPrinter &printer) const {
+  auto sign = isSigned() ? 's' : 'u';
+  printer << '<' << sign << ", " << getWidth() << '>';
+}
+
+llvm::TypeSize
+IntType::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
+                           mlir::DataLayoutEntryListRef params) const {
+  return llvm::TypeSize::getFixed(getWidth());
+}
+
+uint64_t IntType::getABIAlignment(const mlir::DataLayout &dataLayout,
+                                  mlir::DataLayoutEntryListRef params) const {
+  return (uint64_t)(getWidth() / 8);
+}
+
+uint64_t
+IntType::getPreferredAlignment(const ::mlir::DataLayout &dataLayout,
+                               ::mlir::DataLayoutEntryListRef params) const {
+  return (uint64_t)(getWidth() / 8);
+}
+
+mlir::LogicalResult
+IntType::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
+                unsigned width, bool isSigned) {
+
+  if (width < IntType::minBitwidth() || width > IntType::maxBitwidth()) {
+    emitError() << "IntType only supports widths from "
+                << IntType::minBitwidth() << "up to " << IntType::maxBitwidth();
+    return mlir::failure();
+  }
+
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point type definitions
+//===----------------------------------------------------------------------===//
+
+const llvm::fltSemantics &SingleType::getFloatSemantics() const {
+  return llvm::APFloat::IEEEsingle();
+}
+
+llvm::TypeSize
+SingleType::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
+                              mlir::DataLayoutEntryListRef params) const {
+  return llvm::TypeSize::getFixed(getWidth());
+}
+
+uint64_t
+SingleType::getABIAlignment(const mlir::DataLayout &dataLayout,
+                            mlir::DataLayoutEntryListRef params) const {
+  return (uint64_t)(getWidth() / 8);
+}
+
+uint64_t
+SingleType::getPreferredAlignment(const ::mlir::DataLayout &dataLayout,
+                                  ::mlir::DataLayoutEntryListRef params) const {
+  return (uint64_t)(getWidth() / 8);
+}
+
+const llvm::fltSemantics &DoubleType::getFloatSemantics() const {
+  return llvm::APFloat::IEEEdouble();
+}
+
+llvm::TypeSize
+DoubleType::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
+                              mlir::DataLayoutEntryListRef params) const {
+  return llvm::TypeSize::getFixed(getWidth());
+}
+
+uint64_t
+DoubleType::getABIAlignment(const mlir::DataLayout &dataLayout,
+                            mlir::DataLayoutEntryListRef params) const {
+  return (uint64_t)(getWidth() / 8);
+}
+
+uint64_t
+DoubleType::getPreferredAlignment(const ::mlir::DataLayout &dataLayout,
+                                  ::mlir::DataLayoutEntryListRef params) const {
+  return (uint64_t)(getWidth() / 8);
+}
+
+const llvm::fltSemantics &FP16Type::getFloatSemantics() const {
+  return llvm::APFloat::IEEEhalf();
+}
+
+llvm::TypeSize
+FP16Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
+                            mlir::DataLayoutEntryListRef params) const {
+  return llvm::TypeSize::getFixed(getWidth());
+}
+
+uint64_t FP16Type::getABIAlignment(const mlir::DataLayout &dataLayout,
+                                   mlir::DataLayoutEntryListRef params) const {
+  return (uint64_t)(getWidth() / 8);
+}
+
+uint64_t
+FP16Type::getPreferredAlignment(const ::mlir::DataLayout &dataLayout,
+                                ::mlir::DataLayoutEntryListRef params) const {
+  return (uint64_t)(getWidth() / 8);
+}
+
+const llvm::fltSemantics &BF16Type::getFloatSemantics() const {
+  return llvm::APFloat::BFloat();
+}
+
+llvm::TypeSize
+BF16Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
+                            mlir::DataLayoutEntryListRef params) const {
+  return llvm::TypeSize::getFixed(getWidth());
+}
+
+uint64_t BF16Type::getABIAlignment(const mlir::DataLayout &dataLayout,
+                                   mlir::DataLayoutEntryListRef params) const {
+  return (uint64_t)(getWidth() / 8);
+}
+
+uint64_t
+BF16Type::getPreferredAlignment(const ::mlir::DataLayout &dataLayout,
+                                ::mlir::DataLayoutEntryListRef params) const {
+  return (uint64_t)(getWidth() / 8);
+}
+
+const llvm::fltSemantics &FP80Type::getFloatSemantics() const {
+  return llvm::APFloat::x87DoubleExtended();
+}
+
+llvm::TypeSize
+FP80Type::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
+                            mlir::DataLayoutEntryListRef params) const {
+  return llvm::TypeSize::getFixed(16);
+}
+
+uint64_t FP80Type::getABIAlignment(const mlir::DataLayout &dataLayout,
+                                   mlir::DataLayoutEntryListRef params) const {
+  return 16;
+}
+
+uint64_t
+FP80Type::getPreferredAlignment(const ::mlir::DataLayout &dataLayout,
+                                ::mlir::DataLayoutEntryListRef params) const {
+  return 16;
+}
+
+const llvm::fltSemantics &LongDoubleType::getFloatSemantics() const {
+  return mlir::cast<mlir::cir::CIRFPTypeInterface>(getUnderlying())
+      .getFloatSemantics();
+}
+
+llvm::TypeSize
+LongDoubleType::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
+                                  mlir::DataLayoutEntryListRef params) const {
+  return mlir::cast<mlir::DataLayoutTypeInterface>(getUnderlying())
+      .getTypeSizeInBits(dataLayout, params);
+}
+
+uint64_t
+LongDoubleType::getABIAlignment(const mlir::DataLayout &dataLayout,
+                                mlir::DataLayoutEntryListRef params) const {
+  return mlir::cast<mlir::DataLayoutTypeInterface>(getUnderlying())
+      .getABIAlignment(dataLayout, params);
+}
+
+uint64_t LongDoubleType::getPreferredAlignment(
+    const ::mlir::DataLayout &dataLayout,
+    mlir::DataLayoutEntryListRef params) const {
+  return mlir::cast<mlir::DataLayoutTypeInterface>(getUnderlying())
+      .getPreferredAlignment(dataLayout, params);
+}
+
+LogicalResult
+LongDoubleType::verify(function_ref<InFlightDiagnostic()> emitError,
+                       mlir::Type underlying) {
+  if (!mlir::isa<DoubleType, FP80Type>(underlying)) {
+    emitError() << "invalid underlying type for long double";
+    return failure();
+  }
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point type helpers
+//===----------------------------------------------------------------------===//
+
+bool mlir::cir::isAnyFloatingPointType(mlir::Type t) {
+  return isa<mlir::cir::SingleType, mlir::cir::DoubleType,
+             mlir::cir::LongDoubleType, mlir::cir::FP80Type>(t);
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point and Float-point Vecotr type helpers
+//===----------------------------------------------------------------------===//
+
+bool mlir::cir::isFPOrFPVectorTy(mlir::Type t) {
+
+  if (isa<mlir::cir::VectorType>(t)) {
+    return isAnyFloatingPointType(
+        mlir::dyn_cast<mlir::cir::VectorType>(t).getEltType());
+  }
+  return isAnyFloatingPointType(t);
+}
+
+//===----------------------------------------------------------------------===//
+// ComplexType Definitions
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult mlir::cir::ComplexType::verify(
+    llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
+    mlir::Type elementTy) {
+  if (!mlir::isa<mlir::cir::IntType, mlir::cir::CIRFPTypeInterface>(
+          elementTy)) {
+    emitError() << "element type of !cir.complex must be either a "
+                   "floating-point type or an integer type";
+    return failure();
+  }
+
+  return success();
+}
+
+llvm::TypeSize mlir::cir::ComplexType::getTypeSizeInBits(
+    const mlir::DataLayout &dataLayout,
+    mlir::DataLayoutEntryListRef params) const {
+  // C17 6.2.5p13:
+  //   Each complex type has the same representation and alignment requirements
+  //   as an array type containing exactly two elements of the corresponding
+  //   real type.
+
+  auto elementTy = getElementTy();
+  return dataLayout.getTypeSizeInBits(elementTy) * 2;
+}
+
+uint64_t mlir::cir::ComplexType::getABIAlignment(
+    const mlir::DataLayout &dataLayout,
+    mlir::DataLayoutEntryListRef params) const {
+  // C17 6.2.5p13:
+  //   Each complex type has the same representation and alignment requirements
+  //   as an array type containing exactly two elements of the corresponding
+  //   real type.
+
+  auto elementTy = getElementTy();
+  return dataLayout.getTypeABIAlignment(elementTy);
+}
+
+uint64_t mlir::cir::ComplexType::getPreferredAlignment(
+    const ::mlir::DataLayout &dataLayout,
+    ::mlir::DataLayoutEntryListRef params) const {
+  // C17 6.2.5p13:
+  //   Each complex type has the same representation and alignment requirements
+  //   as an array type containing exactly two elements of the corresponding
+  //   real type.
+
+  auto elementTy = getElementTy();
+  return dataLayout.getTypePreferredAlignment(elementTy);
+}
+
+//===----------------------------------------------------------------------===//
+// FuncType Definitions
+//===----------------------------------------------------------------------===//
+
+FuncType FuncType::clone(TypeRange inputs, TypeRange results) const {
+  assert(results.size() == 1 && "expected exactly one result type");
+  return get(llvm::to_vector(inputs), results[0], isVarArg());
+}
+
+mlir::ParseResult parseFuncTypeArgs(mlir::AsmParser &p,
+                                    llvm::SmallVector<mlir::Type> &params,
+                                    bool &isVarArg) {
+  isVarArg = false;
+  // `(` `)`
+  if (succeeded(p.parseOptionalRParen()))
+    return mlir::success();
+
+  // `(` `...` `)`
+  if (succeeded(p.parseOptionalEllipsis())) {
+    isVarArg = true;
+    return p.parseRParen();
+  }
+
+  // type (`,` type)* (`,` `...`)?
+  mlir::Type type;
+  if (p.parseType(type))
+    return mlir::failure();
+  params.push_back(type);
+  while (succeeded(p.parseOptionalComma())) {
+    if (succeeded(p.parseOptionalEllipsis())) {
+      isVarArg = true;
+      return p.parseRParen();
+    }
+    if (p.parseType(type))
+      return mlir::failure();
+    params.push_back(type);
+  }
+
+  return p.parseRParen();
+}
+
+void printFuncTypeArgs(mlir::AsmPrinter &p, mlir::ArrayRef<mlir::Type> params,
+                       bool isVarArg) {
+  llvm::interleaveComma(params, p,
+                        [&p](mlir::Type type) { p.printType(type); });
+  if (isVarArg) {
+    if (!params.empty())
+      p << ", ";
+    p << "...";
+  }
+  p << ')';
+}
+
+llvm::ArrayRef<mlir::Type> FuncType::getReturnTypes() const {
+  return static_cast<detail::FuncTypeStorage *>(getImpl())->returnType;
+}
+
+bool FuncType::isVoid() const { return mlir::isa<VoidType>(getReturnType()); }
+
+//===----------------------------------------------------------------------===//
+// MethodType Definitions
+//===----------------------------------------------------------------------===//
+
+static mlir::Type getMethodLayoutType(mlir::MLIRContext *ctx) {
+  // With Itanium ABI, member function pointers have the same layout as the
+  // following struct: struct { fnptr_t, ptrdiff_t }, where fnptr_t is a
+  // function pointer type.
+  // TODO: consider member function pointer layout in other ABIs
+  auto voidPtrTy = mlir::cir::PointerType::get(mlir::cir::VoidType::get(ctx));
+  mlir::Type fields[2]{voidPtrTy, voidPtrTy};
+  return mlir::cir::StructType::get(ctx, fields, /*packed=*/false,
+                                    mlir::cir::StructType::Struct);
+}
+
+llvm::TypeSize
+MethodType::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
+                              mlir::DataLayoutEntryListRef params) const {
+  return dataLayout.getTypeSizeInBits(getMethodLayoutType(getContext()));
+}
+
+uint64_t
+MethodType::getABIAlignment(const mlir::DataLayout &dataLayout,
+                            mlir::DataLayoutEntryListRef params) const {
+  return dataLayout.getTypeSizeInBits(getMethodLayoutType(getContext()));
+}
+
+uint64_t
+MethodType::getPreferredAlignment(const ::mlir::DataLayout &dataLayout,
+                                  mlir::DataLayoutEntryListRef params) const {
+  return dataLayout.getTypeSizeInBits(getMethodLayoutType(getContext()));
+}
+
+//===----------------------------------------------------------------------===//
+// PointerType Definitions
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult
+PointerType::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
+                    mlir::Type pointee, mlir::Attribute addrSpace) {
+  if (addrSpace && !mlir::isa<mlir::cir::AddressSpaceAttr>(addrSpace)) {
+    emitError() << "unexpected addrspace attribute type";
+    return mlir::failure();
+  }
+  return mlir::success();
+}
+
+mlir::ParseResult parseAddrSpaceAttribute(mlir::AsmParser &p,
+                                          mlir::Attribute &addrSpaceAttr) {
+  using mlir::cir::AddressSpaceAttr;
+  auto attrLoc = p.getCurrentLocation();
+
+  llvm::StringRef addrSpaceKind;
+  if (mlir::failed(p.parseOptionalKeyword(&addrSpaceKind))) {
+    p.emitError(attrLoc, "expected keyword for addrspace kind");
+    return mlir::failure();
+  }
+
+  if (addrSpaceKind == AddressSpaceAttr::kTargetKeyword) {
+    int64_t targetValue = -1;
+    if (p.parseLess() || p.parseInteger(targetValue) || p.parseGreater()) {
+      return mlir::failure();
+    }
+    addrSpaceAttr = AddressSpaceAttr::get(
+        p.getContext(), AddressSpaceAttr::kFirstTargetASValue + targetValue);
+  } else {
+    std::optional<int64_t> value =
+        AddressSpaceAttr::parseValueFromString(addrSpaceKind);
+    // not target AS, must be wrong keyword if no value
+    if (!value.has_value()) {
+      p.emitError(attrLoc, "invalid addrspace kind keyword: " + addrSpaceKind);
+      return mlir::failure();
+    }
+
+    addrSpaceAttr = AddressSpaceAttr::get(p.getContext(), *value);
+  }
+
+  return mlir::success();
+}
+
+void printAddrSpaceAttribute(mlir::AsmPrinter &p,
+                             mlir::Attribute rawAddrSpaceAttr) {
+  using mlir::cir::AddressSpaceAttr;
+  auto addrSpaceAttr = mlir::cast<AddressSpaceAttr>(rawAddrSpaceAttr);
+  if (addrSpaceAttr.isTarget()) {
+    p << AddressSpaceAttr::kTargetKeyword << "<"
+      << addrSpaceAttr.getTargetValue() << ">";
+  } else {
+    p << AddressSpaceAttr::stringifyValue(addrSpaceAttr.getValue());
+  }
+}
+
+mlir::ParseResult parsePointerAddrSpace(mlir::AsmParser &p,
+                                        mlir::Attribute &addrSpaceAttr) {
+  return parseAddrSpaceAttribute(p, addrSpaceAttr);
+}
+
+void printPointerAddrSpace(mlir::AsmPrinter &p,
+                           mlir::Attribute rawAddrSpaceAttr) {
+  printAddrSpaceAttribute(p, rawAddrSpaceAttr);
+}
+
+//===----------------------------------------------------------------------===//
+// CIR Dialect
+//===----------------------------------------------------------------------===//
+
+void CIRDialect::registerTypes() {
+  // Register tablegen'd types.
+  addTypes<
+#define GET_TYPEDEF_LIST
+#include "clang/CIR/Dialect/IR/CIROpsTypes.cpp.inc"
+      >();
+
+  // Register raw C++ types.
+  addTypes<StructType>();
+}
diff --git a/clang/lib/CIR/Dialect/IR/CMakeLists.txt b/clang/lib/CIR/Dialect/IR/CMakeLists.txt
index 0d7476b55570..34e2f642cefe 100644
--- a/clang/lib/CIR/Dialect/IR/CMakeLists.txt
+++ b/clang/lib/CIR/Dialect/IR/CMakeLists.txt
@@ -1,3 +1,29 @@
 add_clang_library(MLIRCIR
+  CIRAttrs.cpp
+  CIROpenCLAttrs.cpp
+  CIRDataLayout.cpp
   CIRDialect.cpp
+  CIRMemorySlot.cpp
+  CIRTypes.cpp
+  FPEnv.cpp
+
+  DEPENDS
+  MLIRBuiltinLocationAttributesIncGen
+  MLIRCIROpsIncGen
+  MLIRCIREnumsGen
+  MLIRSymbolInterfacesIncGen
+  MLIRCIRASTAttrInterfacesIncGen
+  MLIRCIROpInterfacesIncGen
+  MLIRCIRLoopOpInterfaceIncGen
+
+  LINK_LIBS PUBLIC
+  MLIRIR
+  MLIRCIRInterfaces
+  MLIRDLTIDialect
+  MLIRDataLayoutInterfaces
+  MLIRFuncDialect
+  MLIRLoopLikeInterface
+  MLIRLLVMDialect
+  MLIRSideEffectInterfaces
+  clangAST
   )
diff --git a/clang/lib/CIR/Dialect/IR/FPEnv.cpp b/clang/lib/CIR/Dialect/IR/FPEnv.cpp
new file mode 100644
index 000000000000..01dfe1e92640
--- /dev/null
+++ b/clang/lib/CIR/Dialect/IR/FPEnv.cpp
@@ -0,0 +1,64 @@
+//===-- FPEnv.cpp ---- FP Environment -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// This file contains the implementations of entities that describe floating
+/// point environment.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CIR/Dialect/IR/FPEnv.h"
+
+namespace cir {
+
+std::optional<llvm::StringRef>
+convertRoundingModeToStr(llvm::RoundingMode UseRounding) {
+  std::optional<llvm::StringRef> RoundingStr;
+  switch (UseRounding) {
+  case llvm::RoundingMode::Dynamic:
+    RoundingStr = "round.dynamic";
+    break;
+  case llvm::RoundingMode::NearestTiesToEven:
+    RoundingStr = "round.tonearest";
+    break;
+  case llvm::RoundingMode::NearestTiesToAway:
+    RoundingStr = "round.tonearestaway";
+    break;
+  case llvm::RoundingMode::TowardNegative:
+    RoundingStr = "round.downward";
+    break;
+  case llvm::RoundingMode::TowardPositive:
+    RoundingStr = "round.upward";
+    break;
+  case llvm::RoundingMode::TowardZero:
+    RoundingStr = "round.towardZero";
+    break;
+  default:
+    break;
+  }
+  return RoundingStr;
+}
+
+std::optional<llvm::StringRef>
+convertExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) {
+  std::optional<llvm::StringRef> ExceptStr;
+  switch (UseExcept) {
+  case fp::ebStrict:
+    ExceptStr = "fpexcept.strict";
+    break;
+  case fp::ebIgnore:
+    ExceptStr = "fpexcept.ignore";
+    break;
+  case fp::ebMayTrap:
+    ExceptStr = "fpexcept.maytrap";
+    break;
+  }
+  return ExceptStr;
+}
+
+} // namespace cir
diff --git a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp
new file mode 100644
index 000000000000..5c5ba4b573ce
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp
@@ -0,0 +1,187 @@
+//===- CIRSimplify.cpp - performs CIR canonicalization --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Block.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/Region.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/Passes.h"
+
+using namespace mlir;
+using namespace cir;
+
+namespace {
+
+/// Removes branches between two blocks if it is the only branch.
+///
+/// From:
+///   ^bb0:
+///     cir.br ^bb1
+///   ^bb1:  // pred: ^bb0
+///     cir.return
+///
+/// To:
+///   ^bb0:
+///     cir.return
+struct RemoveRedundantBranches : public OpRewritePattern<BrOp> {
+  using OpRewritePattern<BrOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(BrOp op,
+                                PatternRewriter &rewriter) const final {
+    Block *block = op.getOperation()->getBlock();
+    Block *dest = op.getDest();
+
+    if (isa<mlir::cir::LabelOp>(dest->front()))
+      return failure();
+
+    // Single edge between blocks: merge it.
+    if (block->getNumSuccessors() == 1 &&
+        dest->getSinglePredecessor() == block) {
+      rewriter.eraseOp(op);
+      rewriter.mergeBlocks(dest, block);
+      return success();
+    }
+
+    return failure();
+  }
+};
+
+struct RemoveEmptyScope : public OpRewritePattern<ScopeOp> {
+  using OpRewritePattern<ScopeOp>::OpRewritePattern;
+
+  LogicalResult match(ScopeOp op) const final {
+    return success(op.getRegion().empty() ||
+                   (op.getRegion().getBlocks().size() == 1 &&
+                    op.getRegion().front().empty()));
+  }
+
+  void rewrite(ScopeOp op, PatternRewriter &rewriter) const final {
+    rewriter.eraseOp(op);
+  }
+};
+
+struct RemoveEmptySwitch : public OpRewritePattern<SwitchOp> {
+  using OpRewritePattern<SwitchOp>::OpRewritePattern;
+
+  LogicalResult match(SwitchOp op) const final {
+    return success(op.getRegions().empty());
+  }
+
+  void rewrite(SwitchOp op, PatternRewriter &rewriter) const final {
+    rewriter.eraseOp(op);
+  }
+};
+
+struct RemoveTrivialTry : public OpRewritePattern<TryOp> {
+  using OpRewritePattern<TryOp>::OpRewritePattern;
+
+  LogicalResult match(TryOp op) const final {
+    // FIXME: also check all catch regions are empty
+    // return success(op.getTryRegion().hasOneBlock());
+    return mlir::failure();
+  }
+
+  void rewrite(TryOp op, PatternRewriter &rewriter) const final {
+    // Move try body to the parent.
+    assert(op.getTryRegion().hasOneBlock());
+
+    Block *parentBlock = op.getOperation()->getBlock();
+    mlir::Block *tryBody = &op.getTryRegion().getBlocks().front();
+    YieldOp y = dyn_cast<YieldOp>(tryBody->getTerminator());
+    assert(y && "expected well wrapped up try block");
+    y->erase();
+
+    rewriter.inlineBlockBefore(tryBody, parentBlock, Block::iterator(op));
+    rewriter.eraseOp(op);
+  }
+};
+
+// Remove call exception with empty cleanups
+struct SimplifyCallOp : public OpRewritePattern<CallOp> {
+  using OpRewritePattern<CallOp>::OpRewritePattern;
+
+  LogicalResult match(CallOp op) const final {
+    // Applicable to cir.call exception ... clean { cir.yield }
+    mlir::Region *r = &op.getCleanup();
+    if (r->empty() || !r->hasOneBlock())
+      return failure();
+
+    mlir::Block *b = &r->getBlocks().back();
+    if (&b->back() != &b->front())
+      return failure();
+
+    return success(isa<YieldOp>(&b->getOperations().back()));
+  }
+
+  void rewrite(CallOp op, PatternRewriter &rewriter) const final {
+    mlir::Block *b = &op.getCleanup().back();
+    rewriter.eraseOp(&b->back());
+    rewriter.eraseBlock(b);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// CIRCanonicalizePass
+//===----------------------------------------------------------------------===//
+
+struct CIRCanonicalizePass : public CIRCanonicalizeBase<CIRCanonicalizePass> {
+  using CIRCanonicalizeBase::CIRCanonicalizeBase;
+
+  // The same operation rewriting done here could have been performed
+  // by CanonicalizerPass (adding hasCanonicalizer for target Ops and
+  // implementing the same from above in CIRDialects.cpp). However, it's
+  // currently too aggressive for static analysis purposes, since it might
+  // remove things where a diagnostic can be generated.
+  //
+  // FIXME: perhaps we can add one more mode to GreedyRewriteConfig to
+  // disable this behavior.
+  void runOnOperation() override;
+};
+
+void populateCIRCanonicalizePatterns(RewritePatternSet &patterns) {
+  // clang-format off
+  patterns.add<
+    RemoveRedundantBranches,
+    RemoveEmptyScope,
+    RemoveEmptySwitch,
+    RemoveTrivialTry,
+    SimplifyCallOp
+  >(patterns.getContext());
+  // clang-format on
+}
+
+void CIRCanonicalizePass::runOnOperation() {
+  // Collect rewrite patterns.
+  RewritePatternSet patterns(&getContext());
+  populateCIRCanonicalizePatterns(patterns);
+
+  // Collect operations to apply patterns.
+  SmallVector<Operation *, 16> ops;
+  getOperation()->walk([&](Operation *op) {
+    // CastOp here is to perform a manual `fold` in
+    // applyOpPatternsAndFold
+    if (isa<BrOp, BrCondOp, ScopeOp, SwitchOp, CastOp, TryOp, UnaryOp, SelectOp,
+            ComplexCreateOp, ComplexRealOp, ComplexImagOp, CallOp>(op))
+      ops.push_back(op);
+  });
+
+  // Apply patterns.
+  if (applyOpPatternsAndFold(ops, std::move(patterns)).failed())
+    signalPassFailure();
+}
+
+} // namespace
+
+std::unique_ptr<Pass> mlir::createCIRCanonicalizePass() {
+  return std::make_unique<CIRCanonicalizePass>();
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp b/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp
new file mode 100644
index 000000000000..f573691b20f4
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp
@@ -0,0 +1,185 @@
+//===- CIRSimplify.cpp - performs CIR simplification ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Block.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/Region.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/Passes.h"
+#include "llvm/ADT/SmallVector.h"
+
+using namespace mlir;
+using namespace cir;
+
+//===----------------------------------------------------------------------===//
+// Rewrite patterns
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// Simplify suitable ternary operations into select operations.
+///
+/// For now we only simplify those ternary operations whose true and false
+/// branches directly yield a value or a constant. That is, both of the true and
+/// the false branch must either contain a cir.yield operation as the only
+/// operation in the branch, or contain a cir.const operation followed by a
+/// cir.yield operation that yields the constant value.
+///
+/// For example, we will simplify the following ternary operation:
+///
+///   %0 = cir.ternary (%condition, true {
+///     %1 = cir.const ...
+///     cir.yield %1
+///   } false {
+///     cir.yield %2
+///   })
+///
+/// into the following sequence of operations:
+///
+///   %1 = cir.const ...
+///   %0 = cir.select if %condition then %1 else %2
+struct SimplifyTernary final : public OpRewritePattern<TernaryOp> {
+  using OpRewritePattern<TernaryOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(TernaryOp op,
+                                PatternRewriter &rewriter) const override {
+    if (op->getNumResults() != 1)
+      return mlir::failure();
+
+    if (!isSimpleTernaryBranch(op.getTrueRegion()) ||
+        !isSimpleTernaryBranch(op.getFalseRegion()))
+      return mlir::failure();
+
+    mlir::cir::YieldOp trueBranchYieldOp = mlir::cast<mlir::cir::YieldOp>(
+        op.getTrueRegion().front().getTerminator());
+    mlir::cir::YieldOp falseBranchYieldOp = mlir::cast<mlir::cir::YieldOp>(
+        op.getFalseRegion().front().getTerminator());
+    auto trueValue = trueBranchYieldOp.getArgs()[0];
+    auto falseValue = falseBranchYieldOp.getArgs()[0];
+
+    rewriter.inlineBlockBefore(&op.getTrueRegion().front(), op);
+    rewriter.inlineBlockBefore(&op.getFalseRegion().front(), op);
+    rewriter.eraseOp(trueBranchYieldOp);
+    rewriter.eraseOp(falseBranchYieldOp);
+    rewriter.replaceOpWithNewOp<mlir::cir::SelectOp>(op, op.getCond(),
+                                                     trueValue, falseValue);
+
+    return mlir::success();
+  }
+
+private:
+  bool isSimpleTernaryBranch(mlir::Region &region) const {
+    if (!region.hasOneBlock())
+      return false;
+
+    mlir::Block &onlyBlock = region.front();
+    auto &ops = onlyBlock.getOperations();
+
+    // The region/block could only contain at most 2 operations.
+    if (ops.size() > 2)
+      return false;
+
+    if (ops.size() == 1) {
+      // The region/block only contain a cir.yield operation.
+      return true;
+    }
+
+    // Check whether the region/block contains a cir.const followed by a
+    // cir.yield that yields the value.
+    auto yieldOp = mlir::cast<mlir::cir::YieldOp>(onlyBlock.getTerminator());
+    auto yieldValueDefOp = mlir::dyn_cast_if_present<mlir::cir::ConstantOp>(
+        yieldOp.getArgs()[0].getDefiningOp());
+    return yieldValueDefOp && yieldValueDefOp->getBlock() == &onlyBlock;
+  }
+};
+
+struct SimplifySelect : public OpRewritePattern<SelectOp> {
+  using OpRewritePattern<SelectOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(SelectOp op,
+                                PatternRewriter &rewriter) const final {
+    mlir::Operation *trueValueOp = op.getTrueValue().getDefiningOp();
+    mlir::Operation *falseValueOp = op.getFalseValue().getDefiningOp();
+    auto trueValueConstOp =
+        mlir::dyn_cast_if_present<mlir::cir::ConstantOp>(trueValueOp);
+    auto falseValueConstOp =
+        mlir::dyn_cast_if_present<mlir::cir::ConstantOp>(falseValueOp);
+    if (!trueValueConstOp || !falseValueConstOp)
+      return mlir::failure();
+
+    auto trueValue =
+        mlir::dyn_cast<mlir::cir::BoolAttr>(trueValueConstOp.getValue());
+    auto falseValue =
+        mlir::dyn_cast<mlir::cir::BoolAttr>(falseValueConstOp.getValue());
+    if (!trueValue || !falseValue)
+      return mlir::failure();
+
+    // cir.select if %0 then #true else #false -> %0
+    if (trueValue.getValue() && !falseValue.getValue()) {
+      rewriter.replaceAllUsesWith(op, op.getCondition());
+      rewriter.eraseOp(op);
+      return mlir::success();
+    }
+
+    // cir.select if %0 then #false else #true -> cir.unary not %0
+    if (!trueValue.getValue() && falseValue.getValue()) {
+      rewriter.replaceOpWithNewOp<mlir::cir::UnaryOp>(
+          op, mlir::cir::UnaryOpKind::Not, op.getCondition());
+      return mlir::success();
+    }
+
+    return mlir::failure();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// CIRSimplifyPass
+//===----------------------------------------------------------------------===//
+
+struct CIRSimplifyPass : public CIRSimplifyBase<CIRSimplifyPass> {
+  using CIRSimplifyBase::CIRSimplifyBase;
+
+  void runOnOperation() override;
+};
+
+void populateMergeCleanupPatterns(RewritePatternSet &patterns) {
+  // clang-format off
+  patterns.add<
+    SimplifyTernary,
+    SimplifySelect
+  >(patterns.getContext());
+  // clang-format on
+}
+
+void CIRSimplifyPass::runOnOperation() {
+  // Collect rewrite patterns.
+  RewritePatternSet patterns(&getContext());
+  populateMergeCleanupPatterns(patterns);
+
+  // Collect operations to apply patterns.
+  SmallVector<Operation *, 16> ops;
+  getOperation()->walk([&](Operation *op) {
+    if (isa<TernaryOp, SelectOp>(op))
+      ops.push_back(op);
+  });
+
+  // Apply patterns.
+  if (applyOpPatternsAndFold(ops, std::move(patterns)).failed())
+    signalPassFailure();
+}
+
+} // namespace
+
+std::unique_ptr<Pass> mlir::createCIRSimplifyPass() {
+  return std::make_unique<CIRSimplifyPass>();
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/CMakeLists.txt b/clang/lib/CIR/Dialect/Transforms/CMakeLists.txt
new file mode 100644
index 000000000000..d675f17042b6
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/CMakeLists.txt
@@ -0,0 +1,32 @@
+add_subdirectory(TargetLowering)
+
+add_clang_library(MLIRCIRTransforms
+  LifetimeCheck.cpp
+  LoweringPrepare.cpp
+  CIRCanonicalize.cpp
+  CIRSimplify.cpp
+  DropAST.cpp
+  IdiomRecognizer.cpp
+  LibOpt.cpp
+  StdHelpers.cpp
+  FlattenCFG.cpp
+  GotoSolver.cpp
+  SCFPrepare.cpp
+  CallConvLowering.cpp
+
+  DEPENDS
+  MLIRCIRPassIncGen
+
+  LINK_LIBS PUBLIC
+  clangAST
+  clangBasic
+  TargetLowering
+
+  MLIRAnalysis
+  MLIRIR
+  MLIRPass
+  MLIRTransformUtils
+
+  MLIRCIR
+  MLIRCIRInterfaces
+)
diff --git a/clang/lib/CIR/Dialect/Transforms/CallConvLowering.cpp b/clang/lib/CIR/Dialect/Transforms/CallConvLowering.cpp
new file mode 100644
index 000000000000..db031ae06b55
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/CallConvLowering.cpp
@@ -0,0 +1,103 @@
+//===- CallConvLowering.cpp - Rewrites functions according to call convs --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "TargetLowering/LowerModule.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+
+#define GEN_PASS_DEF_CALLCONVLOWERING
+#include "clang/CIR/Dialect/Passes.h.inc"
+
+namespace mlir {
+namespace cir {
+
+//===----------------------------------------------------------------------===//
+// Rewrite Patterns
+//===----------------------------------------------------------------------===//
+
+struct CallConvLoweringPattern : public OpRewritePattern<FuncOp> {
+  using OpRewritePattern<FuncOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(FuncOp op,
+                                PatternRewriter &rewriter) const final {
+    const auto module = op->getParentOfType<mlir::ModuleOp>();
+
+    if (!op.getAst())
+      return op.emitError("function has no AST information");
+
+    auto modOp = op->getParentOfType<ModuleOp>();
+    std::unique_ptr<LowerModule> lowerModule =
+        createLowerModule(modOp, rewriter);
+
+    // Rewrite function calls before definitions. This should be done before
+    // lowering the definition.
+    auto calls = op.getSymbolUses(module);
+    if (calls.has_value()) {
+      for (auto call : calls.value()) {
+        auto callOp = cast<CallOp>(call.getUser());
+        if (lowerModule->rewriteFunctionCall(callOp, op).failed())
+          return failure();
+      }
+    }
+
+    // TODO(cir): Instead of re-emmiting every load and store, bitcast arguments
+    // and return values to their ABI-specific counterparts when possible.
+    if (lowerModule->rewriteFunctionDefinition(op).failed())
+      return failure();
+
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Pass
+//===----------------------------------------------------------------------===//
+
+struct CallConvLoweringPass
+    : ::impl::CallConvLoweringBase<CallConvLoweringPass> {
+  using CallConvLoweringBase::CallConvLoweringBase;
+
+  void runOnOperation() override;
+  StringRef getArgument() const override { return "cir-call-conv-lowering"; };
+};
+
+void populateCallConvLoweringPassPatterns(RewritePatternSet &patterns) {
+  patterns.add<CallConvLoweringPattern>(patterns.getContext());
+}
+
+void CallConvLoweringPass::runOnOperation() {
+
+  // Collect rewrite patterns.
+  RewritePatternSet patterns(&getContext());
+  populateCallConvLoweringPassPatterns(patterns);
+
+  // Collect operations to be considered by the pass.
+  SmallVector<Operation *, 16> ops;
+  getOperation()->walk([&](FuncOp op) { ops.push_back(op); });
+
+  // Configure rewrite to ignore new ops created during the pass.
+  GreedyRewriteConfig config;
+  config.strictMode = GreedyRewriteStrictness::ExistingOps;
+
+  // Apply patterns.
+  if (failed(applyOpPatternsAndFold(ops, std::move(patterns), config)))
+    signalPassFailure();
+}
+
+} // namespace cir
+
+std::unique_ptr<Pass> createCallConvLoweringPass() {
+  return std::make_unique<cir::CallConvLoweringPass>();
+}
+
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/DropAST.cpp b/clang/lib/CIR/Dialect/Transforms/DropAST.cpp
new file mode 100644
index 000000000000..b8745cdf0c2f
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/DropAST.cpp
@@ -0,0 +1,50 @@
+//===- DropAST.cpp - emit diagnostic checks for lifetime violations -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CIR/Dialect/Passes.h"
+
+#include "PassDetail.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallSet.h"
+
+using namespace mlir;
+using namespace cir;
+
+namespace {
+struct DropASTPass : public DropASTBase<DropASTPass> {
+  DropASTPass() = default;
+  void runOnOperation() override;
+};
+} // namespace
+
+void DropASTPass::runOnOperation() {
+  Operation *op = getOperation();
+  // This needs to be updated with operations that start
+  // carrying AST around.
+  op->walk([&](Operation *op) {
+    if (auto alloca = dyn_cast<AllocaOp>(op)) {
+      alloca.removeAstAttr();
+      auto ty = mlir::dyn_cast<mlir::cir::StructType>(alloca.getAllocaType());
+      if (!ty)
+        return;
+      ty.dropAst();
+      return;
+    }
+
+    if (auto funcOp = dyn_cast<FuncOp>(op))
+      funcOp.removeAstAttr();
+  });
+}
+
+std::unique_ptr<Pass> mlir::createDropASTPass() {
+  return std::make_unique<DropASTPass>();
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
new file mode 100644
index 000000000000..122ee1fe07aa
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
@@ -0,0 +1,909 @@
+//====- FlattenCFG.cpp - Flatten CIR CFG ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements pass that inlines CIR operations regions into the parent
+// function region.
+//
+//===----------------------------------------------------------------------===//
+#include "PassDetail.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/Passes.h"
+
+using namespace mlir;
+using namespace mlir::cir;
+
+namespace {
+
+/// Lowers operations with the terminator trait that have a single successor.
+void lowerTerminator(mlir::Operation *op, mlir::Block *dest,
+                     mlir::PatternRewriter &rewriter) {
+  assert(op->hasTrait<mlir::OpTrait::IsTerminator>() && "not a terminator");
+  mlir::OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPoint(op);
+  rewriter.replaceOpWithNewOp<mlir::cir::BrOp>(op, dest);
+}
+
+/// Walks a region while skipping operations of type `Ops`. This ensures the
+/// callback is not applied to said operations and its children.
+template <typename... Ops>
+void walkRegionSkipping(
+    mlir::Region &region,
+    mlir::function_ref<mlir::WalkResult(mlir::Operation *)> callback) {
+  region.walk<mlir::WalkOrder::PreOrder>([&](mlir::Operation *op) {
+    if (isa<Ops...>(op))
+      return mlir::WalkResult::skip();
+    return callback(op);
+  });
+}
+
+struct FlattenCFGPass : public FlattenCFGBase<FlattenCFGPass> {
+
+  FlattenCFGPass() = default;
+  void runOnOperation() override;
+};
+
+struct CIRIfFlattening : public OpRewritePattern<IfOp> {
+  using OpRewritePattern<IfOp>::OpRewritePattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::IfOp ifOp,
+                  mlir::PatternRewriter &rewriter) const override {
+    mlir::OpBuilder::InsertionGuard guard(rewriter);
+    auto loc = ifOp.getLoc();
+    auto emptyElse = ifOp.getElseRegion().empty();
+
+    auto *currentBlock = rewriter.getInsertionBlock();
+    auto *remainingOpsBlock =
+        rewriter.splitBlock(currentBlock, rewriter.getInsertionPoint());
+    mlir::Block *continueBlock;
+    if (ifOp->getResults().size() == 0)
+      continueBlock = remainingOpsBlock;
+    else
+      llvm_unreachable("NYI");
+
+    // Inline then region
+    auto *thenBeforeBody = &ifOp.getThenRegion().front();
+    auto *thenAfterBody = &ifOp.getThenRegion().back();
+    rewriter.inlineRegionBefore(ifOp.getThenRegion(), continueBlock);
+
+    rewriter.setInsertionPointToEnd(thenAfterBody);
+    if (auto thenYieldOp =
+            dyn_cast<mlir::cir::YieldOp>(thenAfterBody->getTerminator())) {
+      rewriter.replaceOpWithNewOp<mlir::cir::BrOp>(
+          thenYieldOp, thenYieldOp.getArgs(), continueBlock);
+    }
+
+    rewriter.setInsertionPointToEnd(continueBlock);
+
+    // Has else region: inline it.
+    mlir::Block *elseBeforeBody = nullptr;
+    mlir::Block *elseAfterBody = nullptr;
+    if (!emptyElse) {
+      elseBeforeBody = &ifOp.getElseRegion().front();
+      elseAfterBody = &ifOp.getElseRegion().back();
+      rewriter.inlineRegionBefore(ifOp.getElseRegion(), thenAfterBody);
+    } else {
+      elseBeforeBody = elseAfterBody = continueBlock;
+    }
+
+    rewriter.setInsertionPointToEnd(currentBlock);
+    rewriter.create<mlir::cir::BrCondOp>(loc, ifOp.getCondition(),
+                                         thenBeforeBody, elseBeforeBody);
+
+    if (!emptyElse) {
+      rewriter.setInsertionPointToEnd(elseAfterBody);
+      if (auto elseYieldOp =
+              dyn_cast<mlir::cir::YieldOp>(elseAfterBody->getTerminator())) {
+        rewriter.replaceOpWithNewOp<mlir::cir::BrOp>(
+            elseYieldOp, elseYieldOp.getArgs(), continueBlock);
+      }
+    }
+
+    rewriter.replaceOp(ifOp, continueBlock->getArguments());
+    return mlir::success();
+  }
+};
+
+class CIRScopeOpFlattening : public mlir::OpRewritePattern<mlir::cir::ScopeOp> {
+public:
+  using OpRewritePattern<mlir::cir::ScopeOp>::OpRewritePattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ScopeOp scopeOp,
+                  mlir::PatternRewriter &rewriter) const override {
+    mlir::OpBuilder::InsertionGuard guard(rewriter);
+    auto loc = scopeOp.getLoc();
+
+    // Empty scope: just remove it.
+    if (scopeOp.getRegion().empty()) {
+      rewriter.eraseOp(scopeOp);
+      return mlir::success();
+    }
+
+    // Split the current block before the ScopeOp to create the inlining
+    // point.
+    auto *currentBlock = rewriter.getInsertionBlock();
+    auto *remainingOpsBlock =
+        rewriter.splitBlock(currentBlock, rewriter.getInsertionPoint());
+    mlir::Block *continueBlock;
+    if (scopeOp.getNumResults() == 0)
+      continueBlock = remainingOpsBlock;
+    else
+      llvm_unreachable("NYI");
+
+    // Inline body region.
+    auto *beforeBody = &scopeOp.getRegion().front();
+    auto *afterBody = &scopeOp.getRegion().back();
+    rewriter.inlineRegionBefore(scopeOp.getRegion(), continueBlock);
+
+    // Save stack and then branch into the body of the region.
+    rewriter.setInsertionPointToEnd(currentBlock);
+    // TODO(CIR): stackSaveOp
+    // auto stackSaveOp = rewriter.create<mlir::LLVM::StackSaveOp>(
+    //     loc, mlir::LLVM::LLVMPointerType::get(
+    //              mlir::IntegerType::get(scopeOp.getContext(), 8)));
+    rewriter.create<mlir::cir::BrOp>(loc, mlir::ValueRange(), beforeBody);
+
+    // Replace the scopeop return with a branch that jumps out of the body.
+    // Stack restore before leaving the body region.
+    rewriter.setInsertionPointToEnd(afterBody);
+    if (auto yieldOp =
+            dyn_cast<mlir::cir::YieldOp>(afterBody->getTerminator())) {
+      rewriter.replaceOpWithNewOp<mlir::cir::BrOp>(yieldOp, yieldOp.getArgs(),
+                                                   continueBlock);
+    }
+
+    // TODO(cir): stackrestore?
+
+    // Replace the op with values return from the body region.
+    rewriter.replaceOp(scopeOp, continueBlock->getArguments());
+
+    return mlir::success();
+  }
+};
+
+class CIRTryOpFlattening : public mlir::OpRewritePattern<mlir::cir::TryOp> {
+public:
+  using OpRewritePattern<mlir::cir::TryOp>::OpRewritePattern;
+
+  mlir::Block *buildTypeCase(mlir::PatternRewriter &rewriter, mlir::Region &r,
+                             mlir::Block *afterTry,
+                             mlir::Type exceptionPtrTy) const {
+    YieldOp yieldOp;
+    CatchParamOp paramOp;
+    r.walk([&](YieldOp op) {
+      assert(!yieldOp && "expect to only find one");
+      yieldOp = op;
+    });
+    r.walk([&](CatchParamOp op) {
+      assert(!paramOp && "expect to only find one");
+      paramOp = op;
+    });
+    rewriter.inlineRegionBefore(r, afterTry);
+
+    // Rewrite `cir.catch_param` to be scope aware and instead generate:
+    // ```
+    //   cir.catch_param begin %exception_ptr
+    //   ...
+    //   cir.catch_param end
+    //   cir.br ...
+    mlir::Value catchResult = paramOp.getParam();
+    assert(catchResult && "expected to be available");
+    rewriter.setInsertionPointAfterValue(catchResult);
+    auto catchType = catchResult.getType();
+    mlir::Block *entryBlock = paramOp->getBlock();
+    mlir::Location catchLoc = paramOp.getLoc();
+    // Catch handler only gets the exception pointer (selection not needed).
+    mlir::Value exceptionPtr =
+        entryBlock->addArgument(exceptionPtrTy, paramOp.getLoc());
+
+    rewriter.replaceOpWithNewOp<mlir::cir::CatchParamOp>(
+        paramOp, catchType, exceptionPtr,
+        mlir::cir::CatchParamKindAttr::get(rewriter.getContext(),
+                                           mlir::cir::CatchParamKind::begin));
+
+    rewriter.setInsertionPoint(yieldOp);
+    rewriter.create<mlir::cir::CatchParamOp>(
+        catchLoc, mlir::Type{}, nullptr,
+        mlir::cir::CatchParamKindAttr::get(rewriter.getContext(),
+                                           mlir::cir::CatchParamKind::end));
+
+    rewriter.setInsertionPointToEnd(yieldOp->getBlock());
+    rewriter.replaceOpWithNewOp<mlir::cir::BrOp>(yieldOp, afterTry);
+    return entryBlock;
+  }
+
+  void buildUnwindCase(mlir::PatternRewriter &rewriter, mlir::Region &r,
+                       mlir::Block *unwindBlock) const {
+    assert(&r.front() == &r.back() && "only one block expected");
+    rewriter.mergeBlocks(&r.back(), unwindBlock);
+    auto resume = dyn_cast<mlir::cir::ResumeOp>(unwindBlock->getTerminator());
+    assert(resume && "expected 'cir.resume'");
+    rewriter.setInsertionPointToEnd(unwindBlock);
+    rewriter.replaceOpWithNewOp<mlir::cir::ResumeOp>(
+        resume, unwindBlock->getArgument(0), unwindBlock->getArgument(1));
+  }
+
+  void buildAllCase(mlir::PatternRewriter &rewriter, mlir::Region &r,
+                    mlir::Block *afterTry, mlir::Block *catchAllBlock,
+                    mlir::Value exceptionPtr) const {
+    YieldOp yieldOp;
+    CatchParamOp paramOp;
+    r.walk([&](YieldOp op) {
+      assert(!yieldOp && "expect to only find one");
+      yieldOp = op;
+    });
+    r.walk([&](CatchParamOp op) {
+      assert(!paramOp && "expect to only find one");
+      paramOp = op;
+    });
+    mlir::Block *catchAllStartBB = &r.front();
+    rewriter.inlineRegionBefore(r, afterTry);
+    rewriter.mergeBlocks(catchAllStartBB, catchAllBlock);
+
+    // Rewrite `cir.catch_param` to be scope aware and instead generate:
+    // ```
+    //   cir.catch_param begin %exception_ptr
+    //   ...
+    //   cir.catch_param end
+    //   cir.br ...
+    mlir::Value catchResult = paramOp.getParam();
+    assert(catchResult && "expected to be available");
+    rewriter.setInsertionPointAfterValue(catchResult);
+    auto catchType = catchResult.getType();
+    mlir::Location catchLoc = paramOp.getLoc();
+    rewriter.replaceOpWithNewOp<mlir::cir::CatchParamOp>(
+        paramOp, catchType, exceptionPtr,
+        mlir::cir::CatchParamKindAttr::get(rewriter.getContext(),
+                                           mlir::cir::CatchParamKind::begin));
+
+    rewriter.setInsertionPoint(yieldOp);
+    rewriter.create<mlir::cir::CatchParamOp>(
+        catchLoc, mlir::Type{}, nullptr,
+        mlir::cir::CatchParamKindAttr::get(rewriter.getContext(),
+                                           mlir::cir::CatchParamKind::end));
+
+    rewriter.setInsertionPointToEnd(yieldOp->getBlock());
+    rewriter.replaceOpWithNewOp<mlir::cir::BrOp>(yieldOp, afterTry);
+  }
+
+  mlir::ArrayAttr collectTypeSymbols(mlir::cir::TryOp tryOp) const {
+    mlir::ArrayAttr caseAttrList = tryOp.getCatchTypesAttr();
+    llvm::SmallVector<mlir::Attribute, 4> symbolList;
+
+    for (mlir::Attribute caseAttr : caseAttrList) {
+      auto typeIdGlobal = dyn_cast<mlir::cir::GlobalViewAttr>(caseAttr);
+      if (!typeIdGlobal)
+        continue;
+      symbolList.push_back(typeIdGlobal.getSymbol());
+    }
+
+    // Return an empty attribute instead of an empty list...
+    if (symbolList.empty())
+      return {};
+    return mlir::ArrayAttr::get(caseAttrList.getContext(), symbolList);
+  }
+
+  void buildLandingPad(mlir::cir::TryOp tryOp, mlir::PatternRewriter &rewriter,
+                       mlir::Block *beforeCatch, mlir::Block *landingPadBlock,
+                       mlir::Block *catchDispatcher,
+                       SmallVectorImpl<mlir::cir::CallOp> &callsToRewrite,
+                       unsigned callIdx, bool tryOnlyHasCatchAll,
+                       mlir::Type exceptionPtrType,
+                       mlir::Type typeIdType) const {
+    rewriter.setInsertionPointToEnd(landingPadBlock);
+    mlir::ArrayAttr symlist = collectTypeSymbols(tryOp);
+    auto inflightEh = rewriter.create<mlir::cir::EhInflightOp>(
+        tryOp.getLoc(), exceptionPtrType, typeIdType,
+        tryOp.getCleanup() ? mlir::UnitAttr::get(tryOp.getContext()) : nullptr,
+        symlist);
+    auto selector = inflightEh.getTypeId();
+    auto exceptionPtr = inflightEh.getExceptionPtr();
+
+    // Time to emit cleanup's.
+    mlir::cir::CallOp callOp = callsToRewrite[callIdx];
+    if (!callOp.getCleanup().empty()) {
+      mlir::Block *cleanupBlock = &callOp.getCleanup().getBlocks().back();
+      auto cleanupYield =
+          cast<mlir::cir::YieldOp>(cleanupBlock->getTerminator());
+      cleanupYield->erase();
+      rewriter.mergeBlocks(cleanupBlock, landingPadBlock);
+      rewriter.setInsertionPointToEnd(landingPadBlock);
+    }
+
+    // Branch out to the catch clauses dispatcher.
+    assert(catchDispatcher->getNumArguments() >= 1 &&
+           "expected at least one argument in place");
+    SmallVector<mlir::Value> dispatcherInitOps = {exceptionPtr};
+    if (!tryOnlyHasCatchAll) {
+      assert(catchDispatcher->getNumArguments() == 2 &&
+             "expected two arguments in place");
+      dispatcherInitOps.push_back(selector);
+    }
+    rewriter.create<mlir::cir::BrOp>(tryOp.getLoc(), catchDispatcher,
+                                     dispatcherInitOps);
+    return;
+  }
+
+  mlir::Block *
+  buildLandingPads(mlir::cir::TryOp tryOp, mlir::PatternRewriter &rewriter,
+                   mlir::Block *beforeCatch, mlir::Block *afterTry,
+                   SmallVectorImpl<mlir::cir::CallOp> &callsToRewrite,
+                   SmallVectorImpl<mlir::Block *> &landingPads,
+                   bool tryOnlyHasCatchAll) const {
+    unsigned numCalls = callsToRewrite.size();
+    // Create the first landing pad block and a placeholder for the initial
+    // catch dispatcher (which will be the common destination for every new
+    // landing pad we create).
+    auto *landingPadBlock =
+        rewriter.splitBlock(beforeCatch, rewriter.getInsertionPoint());
+
+    // For the dispatcher, already add the block arguments and prepare the
+    // proper types the landing pad should use to jump to.
+    mlir::Block *dispatcher = rewriter.createBlock(afterTry);
+    auto exceptionPtrType = mlir::cir::PointerType::get(
+        mlir::cir::VoidType::get(rewriter.getContext()));
+    auto typeIdType = mlir::cir::IntType::get(getContext(), 32, false);
+    dispatcher->addArgument(exceptionPtrType, tryOp.getLoc());
+    if (!tryOnlyHasCatchAll)
+      dispatcher->addArgument(typeIdType, tryOp.getLoc());
+
+    for (unsigned callIdx = 0; callIdx != numCalls; ++callIdx) {
+      buildLandingPad(tryOp, rewriter, beforeCatch, landingPadBlock, dispatcher,
+                      callsToRewrite, callIdx, tryOnlyHasCatchAll,
+                      exceptionPtrType, typeIdType);
+      landingPads.push_back(landingPadBlock);
+      if (callIdx < numCalls - 1)
+        landingPadBlock = rewriter.createBlock(dispatcher);
+    }
+
+    return dispatcher;
+  }
+
+  mlir::Block *buildCatch(mlir::cir::TryOp tryOp,
+                          mlir::PatternRewriter &rewriter,
+                          mlir::Block *afterTry, mlir::Block *dispatcher,
+                          SmallVectorImpl<mlir::cir::CallOp> &callsToRewrite,
+                          mlir::Attribute catchAttr,
+                          mlir::Attribute nextCatchAttr,
+                          mlir::Region &catchRegion) const {
+    mlir::Location loc = tryOp.getLoc();
+    mlir::Block *nextDispatcher = nullptr;
+    if (auto typeIdGlobal = dyn_cast<mlir::cir::GlobalViewAttr>(catchAttr)) {
+      auto *previousDispatcher = dispatcher;
+      auto typeId =
+          rewriter.create<mlir::cir::EhTypeIdOp>(loc, typeIdGlobal.getSymbol());
+      auto ehPtr = previousDispatcher->getArgument(0);
+      auto ehSel = previousDispatcher->getArgument(1);
+
+      auto match = rewriter.create<mlir::cir::CmpOp>(
+          loc, mlir::cir::BoolType::get(rewriter.getContext()),
+          mlir::cir::CmpOpKind::eq, ehSel, typeId);
+
+      mlir::Block *typeCatchBlock =
+          buildTypeCase(rewriter, catchRegion, afterTry, ehPtr.getType());
+      nextDispatcher = rewriter.createBlock(afterTry);
+      rewriter.setInsertionPointToEnd(previousDispatcher);
+
+      // Next dispatcher gets by default both exception ptr and selector info,
+      // but on a catch all we don't need selector info.
+      nextDispatcher->addArgument(ehPtr.getType(), loc);
+      SmallVector<mlir::Value> nextDispatchOps = {ehPtr};
+      if (!isa<mlir::cir::CatchAllAttr>(nextCatchAttr)) {
+        nextDispatcher->addArgument(ehSel.getType(), loc);
+        nextDispatchOps.push_back(ehSel);
+      }
+
+      rewriter.create<mlir::cir::BrCondOp>(
+          loc, match, typeCatchBlock, nextDispatcher, mlir::ValueRange{ehPtr},
+          nextDispatchOps);
+      rewriter.setInsertionPointToEnd(nextDispatcher);
+    } else if (auto catchAll = dyn_cast<mlir::cir::CatchAllAttr>(catchAttr)) {
+      // In case the catch(...) is all we got, `dispatcher` shall be
+      // non-empty.
+      assert(dispatcher->getArguments().size() == 1 &&
+             "expected one block argument");
+      auto ehPtr = dispatcher->getArgument(0);
+      buildAllCase(rewriter, catchRegion, afterTry, dispatcher, ehPtr);
+      // Do not update `nextDispatcher`, no more business in try/catch
+    } else if (auto catchUnwind =
+                   dyn_cast<mlir::cir::CatchUnwindAttr>(catchAttr)) {
+      // assert(dispatcher->empty() && "expect empty dispatcher");
+      // assert(!dispatcher->args_empty() && "expected block argument");
+      assert(dispatcher->getArguments().size() == 2 &&
+             "expected two block argument");
+      buildUnwindCase(rewriter, catchRegion, dispatcher);
+      // Do not update `nextDispatcher`, no more business in try/catch
+    }
+    return nextDispatcher;
+  }
+
+  void buildCatchers(mlir::cir::TryOp tryOp, mlir::PatternRewriter &rewriter,
+                     mlir::Block *afterBody, mlir::Block *afterTry,
+                     SmallVectorImpl<mlir::cir::CallOp> &callsToRewrite,
+                     SmallVectorImpl<mlir::Block *> &landingPads) const {
+    // Replace the tryOp return with a branch that jumps out of the body.
+    rewriter.setInsertionPointToEnd(afterBody);
+    auto tryBodyYield = cast<mlir::cir::YieldOp>(afterBody->getTerminator());
+
+    mlir::Block *beforeCatch = rewriter.getInsertionBlock();
+    rewriter.setInsertionPointToEnd(beforeCatch);
+    rewriter.replaceOpWithNewOp<mlir::cir::BrOp>(tryBodyYield, afterTry);
+
+    // Retrieve catch list and some properties.
+    mlir::ArrayAttr catchAttrList = tryOp.getCatchTypesAttr();
+    bool tryOnlyHasCatchAll = catchAttrList.size() == 1 &&
+                              isa<mlir::cir::CatchAllAttr>(catchAttrList[0]);
+
+    // Start the landing pad by getting the inflight exception information.
+    mlir::Block *nextDispatcher =
+        buildLandingPads(tryOp, rewriter, beforeCatch, afterTry, callsToRewrite,
+                         landingPads, tryOnlyHasCatchAll);
+
+    // Fill in dispatcher to all catch clauses.
+    rewriter.setInsertionPointToEnd(nextDispatcher);
+    llvm::MutableArrayRef<mlir::Region> catchRegions = tryOp.getCatchRegions();
+    unsigned catchIdx = 0;
+
+    // Build control-flow for all catch clauses.
+    for (mlir::Attribute catchAttr : catchAttrList) {
+      mlir::Attribute nextCatchAttr;
+      if (catchIdx + 1 < catchAttrList.size())
+        nextCatchAttr = catchAttrList[catchIdx + 1];
+      nextDispatcher =
+          buildCatch(tryOp, rewriter, afterTry, nextDispatcher, callsToRewrite,
+                     catchAttr, nextCatchAttr, catchRegions[catchIdx]);
+      catchIdx++;
+    }
+
+    assert(!nextDispatcher && "last dispatch expected to be nullptr");
+  }
+
+  mlir::Block *buildTryBody(mlir::cir::TryOp tryOp,
+                            mlir::PatternRewriter &rewriter) const {
+    auto loc = tryOp.getLoc();
+    // Split the current block before the TryOp to create the inlining
+    // point.
+    auto *beforeTryScopeBlock = rewriter.getInsertionBlock();
+    mlir::Block *afterTry =
+        rewriter.splitBlock(beforeTryScopeBlock, rewriter.getInsertionPoint());
+
+    // Inline body region.
+    auto *beforeBody = &tryOp.getTryRegion().front();
+    rewriter.inlineRegionBefore(tryOp.getTryRegion(), afterTry);
+
+    // Branch into the body of the region.
+    rewriter.setInsertionPointToEnd(beforeTryScopeBlock);
+    rewriter.create<mlir::cir::BrOp>(loc, mlir::ValueRange(), beforeBody);
+    return afterTry;
+  }
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::TryOp tryOp,
+                  mlir::PatternRewriter &rewriter) const override {
+    mlir::OpBuilder::InsertionGuard guard(rewriter);
+    auto *afterBody = &tryOp.getTryRegion().back();
+
+    // Empty scope: just remove it.
+    if (tryOp.getTryRegion().empty()) {
+      rewriter.eraseOp(tryOp);
+      return mlir::success();
+    }
+
+    // Grab the collection of `cir.call exception`s to rewrite to
+    // `cir.try_call`.
+    SmallVector<mlir::cir::CallOp, 4> callsToRewrite;
+    tryOp.getTryRegion().walk([&](CallOp op) {
+      // Only grab calls within immediate closest TryOp scope.
+      if (op->getParentOfType<mlir::cir::TryOp>() != tryOp)
+        return;
+      if (!op.getException())
+        return;
+      callsToRewrite.push_back(op);
+    });
+
+    // Build try body.
+    mlir::Block *afterTry = buildTryBody(tryOp, rewriter);
+
+    // Build catchers.
+    SmallVector<mlir::Block *, 4> landingPads;
+    buildCatchers(tryOp, rewriter, afterBody, afterTry, callsToRewrite,
+                  landingPads);
+    rewriter.eraseOp(tryOp);
+    assert((landingPads.size() == callsToRewrite.size()) &&
+           "expected matching number of entries");
+
+    // Rewrite calls.
+    unsigned callIdx = 0;
+    for (CallOp callOp : callsToRewrite) {
+      mlir::Block *callBlock = callOp->getBlock();
+      mlir::Block *cont =
+          rewriter.splitBlock(callBlock, mlir::Block::iterator(callOp));
+      mlir::cir::ExtraFuncAttributesAttr extraAttrs = callOp.getExtraAttrs();
+      std::optional<mlir::cir::ASTCallExprInterface> ast = callOp.getAst();
+
+      mlir::FlatSymbolRefAttr symbol;
+      if (!callOp.isIndirect())
+        symbol = callOp.getCalleeAttr();
+      rewriter.setInsertionPointToEnd(callBlock);
+      mlir::Type resTy = nullptr;
+      if (callOp.getNumResults() > 0)
+        resTy = callOp.getResult().getType();
+      auto tryCall = rewriter.replaceOpWithNewOp<mlir::cir::TryCallOp>(
+          callOp, symbol, resTy, cont, landingPads[callIdx],
+          callOp.getOperands());
+      tryCall.setExtraAttrsAttr(extraAttrs);
+      if (ast)
+        tryCall.setAstAttr(*ast);
+      callIdx++;
+    }
+
+    // Quick block cleanup: no indirection to the post try block.
+    auto brOp = dyn_cast<mlir::cir::BrOp>(afterTry->getTerminator());
+    if (brOp) {
+      mlir::Block *srcBlock = brOp.getDest();
+      rewriter.eraseOp(brOp);
+      rewriter.mergeBlocks(srcBlock, afterTry);
+    }
+    return mlir::success();
+  }
+};
+
+class CIRLoopOpInterfaceFlattening
+    : public mlir::OpInterfaceRewritePattern<mlir::cir::LoopOpInterface> {
+public:
+  using mlir::OpInterfaceRewritePattern<
+      mlir::cir::LoopOpInterface>::OpInterfaceRewritePattern;
+
+  inline void lowerConditionOp(mlir::cir::ConditionOp op, mlir::Block *body,
+                               mlir::Block *exit,
+                               mlir::PatternRewriter &rewriter) const {
+    mlir::OpBuilder::InsertionGuard guard(rewriter);
+    rewriter.setInsertionPoint(op);
+    rewriter.replaceOpWithNewOp<mlir::cir::BrCondOp>(op, op.getCondition(),
+                                                     body, exit);
+  }
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::LoopOpInterface op,
+                  mlir::PatternRewriter &rewriter) const final {
+    // Setup CFG blocks.
+    auto *entry = rewriter.getInsertionBlock();
+    auto *exit = rewriter.splitBlock(entry, rewriter.getInsertionPoint());
+    auto *cond = &op.getCond().front();
+    auto *body = &op.getBody().front();
+    auto *step = (op.maybeGetStep() ? &op.maybeGetStep()->front() : nullptr);
+
+    // Setup loop entry branch.
+    rewriter.setInsertionPointToEnd(entry);
+    rewriter.create<mlir::cir::BrOp>(op.getLoc(), &op.getEntry().front());
+
+    // Branch from condition region to body or exit.
+    auto conditionOp = cast<mlir::cir::ConditionOp>(cond->getTerminator());
+    lowerConditionOp(conditionOp, body, exit, rewriter);
+
+    // TODO(cir): Remove the walks below. It visits operations unnecessarily,
+    // however, to solve this we would likely need a custom DialecConversion
+    // driver to customize the order that operations are visited.
+
+    // Lower continue statements.
+    mlir::Block *dest = (step ? step : cond);
+    op.walkBodySkippingNestedLoops([&](mlir::Operation *op) {
+      if (!isa<mlir::cir::ContinueOp>(op))
+        return mlir::WalkResult::advance();
+
+      lowerTerminator(op, dest, rewriter);
+      return mlir::WalkResult::skip();
+    });
+
+    // Lower break statements.
+    walkRegionSkipping<mlir::cir::LoopOpInterface, mlir::cir::SwitchOp>(
+        op.getBody(), [&](mlir::Operation *op) {
+          if (!isa<mlir::cir::BreakOp>(op))
+            return mlir::WalkResult::advance();
+
+          lowerTerminator(op, exit, rewriter);
+          return mlir::WalkResult::skip();
+        });
+
+    // Lower optional body region yield.
+    for (auto &blk : op.getBody().getBlocks()) {
+      auto bodyYield = dyn_cast<mlir::cir::YieldOp>(blk.getTerminator());
+      if (bodyYield)
+        lowerTerminator(bodyYield, (step ? step : cond), rewriter);
+    }
+
+    // Lower mandatory step region yield.
+    if (step)
+      lowerTerminator(cast<mlir::cir::YieldOp>(step->getTerminator()), cond,
+                      rewriter);
+
+    // Move region contents out of the loop op.
+    rewriter.inlineRegionBefore(op.getCond(), exit);
+    rewriter.inlineRegionBefore(op.getBody(), exit);
+    if (step)
+      rewriter.inlineRegionBefore(*op.maybeGetStep(), exit);
+
+    rewriter.eraseOp(op);
+    return mlir::success();
+  }
+};
+
+class CIRSwitchOpFlattening
+    : public mlir::OpRewritePattern<mlir::cir::SwitchOp> {
+public:
+  using OpRewritePattern<mlir::cir::SwitchOp>::OpRewritePattern;
+
+  inline void rewriteYieldOp(mlir::PatternRewriter &rewriter,
+                             mlir::cir::YieldOp yieldOp,
+                             mlir::Block *destination) const {
+    rewriter.setInsertionPoint(yieldOp);
+    rewriter.replaceOpWithNewOp<mlir::cir::BrOp>(yieldOp, yieldOp.getOperands(),
+                                                 destination);
+  }
+
+  // Return the new defaultDestination block.
+  Block *condBrToRangeDestination(mlir::cir::SwitchOp op,
+                                  mlir::PatternRewriter &rewriter,
+                                  mlir::Block *rangeDestination,
+                                  mlir::Block *defaultDestination,
+                                  APInt lowerBound, APInt upperBound) const {
+    assert(lowerBound.sle(upperBound) && "Invalid range");
+    auto resBlock = rewriter.createBlock(defaultDestination);
+    auto sIntType = mlir::cir::IntType::get(op.getContext(), 32, true);
+    auto uIntType = mlir::cir::IntType::get(op.getContext(), 32, false);
+
+    auto rangeLength = rewriter.create<mlir::cir::ConstantOp>(
+        op.getLoc(), sIntType,
+        mlir::cir::IntAttr::get(op.getContext(), sIntType,
+                                upperBound - lowerBound));
+
+    auto lowerBoundValue = rewriter.create<mlir::cir::ConstantOp>(
+        op.getLoc(), sIntType,
+        mlir::cir::IntAttr::get(op.getContext(), sIntType, lowerBound));
+    auto diffValue = rewriter.create<mlir::cir::BinOp>(
+        op.getLoc(), sIntType, mlir::cir::BinOpKind::Sub, op.getCondition(),
+        lowerBoundValue);
+
+    // Use unsigned comparison to check if the condition is in the range.
+    auto uDiffValue = rewriter.create<mlir::cir::CastOp>(
+        op.getLoc(), uIntType, CastKind::integral, diffValue);
+    auto uRangeLength = rewriter.create<mlir::cir::CastOp>(
+        op.getLoc(), uIntType, CastKind::integral, rangeLength);
+
+    auto cmpResult = rewriter.create<mlir::cir::CmpOp>(
+        op.getLoc(), mlir::cir::BoolType::get(op.getContext()),
+        mlir::cir::CmpOpKind::le, uDiffValue, uRangeLength);
+    rewriter.create<mlir::cir::BrCondOp>(op.getLoc(), cmpResult,
+                                         rangeDestination, defaultDestination);
+    return resBlock;
+  }
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::SwitchOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    // Empty switch statement: just erase it.
+    if (!op.getCases().has_value() || op.getCases()->empty()) {
+      rewriter.eraseOp(op);
+      return mlir::success();
+    }
+
+    // Create exit block.
+    rewriter.setInsertionPointAfter(op);
+    auto *exitBlock =
+        rewriter.splitBlock(rewriter.getBlock(), rewriter.getInsertionPoint());
+
+    // Allocate required data structures (disconsider default case in
+    // vectors).
+    llvm::SmallVector<mlir::APInt, 8> caseValues;
+    llvm::SmallVector<mlir::Block *, 8> caseDestinations;
+    llvm::SmallVector<mlir::ValueRange, 8> caseOperands;
+
+    llvm::SmallVector<std::pair<APInt, APInt>> rangeValues;
+    llvm::SmallVector<mlir::Block *> rangeDestinations;
+    llvm::SmallVector<mlir::ValueRange> rangeOperands;
+
+    // Initialize default case as optional.
+    mlir::Block *defaultDestination = exitBlock;
+    mlir::ValueRange defaultOperands = exitBlock->getArguments();
+
+    // Track fallthrough between cases.
+    mlir::cir::YieldOp fallthroughYieldOp = nullptr;
+
+    // Digest the case statements values and bodies.
+    for (size_t i = 0; i < op.getCases()->size(); ++i) {
+      auto &region = op.getRegion(i);
+      auto caseAttr = cast<mlir::cir::CaseAttr>(op.getCases()->getValue()[i]);
+
+      // Found default case: save destination and operands.
+      switch (caseAttr.getKind().getValue()) {
+      case mlir::cir::CaseOpKind::Default:
+        defaultDestination = &region.front();
+        defaultOperands = region.getArguments();
+        break;
+      case mlir::cir::CaseOpKind::Range:
+        assert(caseAttr.getValue().size() == 2 &&
+               "Case range should have 2 case value");
+        rangeValues.push_back(
+            {cast<mlir::cir::IntAttr>(caseAttr.getValue()[0]).getValue(),
+             cast<mlir::cir::IntAttr>(caseAttr.getValue()[1]).getValue()});
+        rangeDestinations.push_back(&region.front());
+        rangeOperands.push_back(region.getArguments());
+        break;
+      case mlir::cir::CaseOpKind::Anyof:
+      case mlir::cir::CaseOpKind::Equal:
+        // AnyOf cases kind can have multiple values, hence the loop below.
+        for (auto &value : caseAttr.getValue()) {
+          caseValues.push_back(cast<mlir::cir::IntAttr>(value).getValue());
+          caseOperands.push_back(region.getArguments());
+          caseDestinations.push_back(&region.front());
+        }
+        break;
+      }
+
+      // Previous case is a fallthrough: branch it to this case.
+      if (fallthroughYieldOp) {
+        rewriteYieldOp(rewriter, fallthroughYieldOp, &region.front());
+        fallthroughYieldOp = nullptr;
+      }
+
+      for (auto &blk : region.getBlocks()) {
+        if (blk.getNumSuccessors())
+          continue;
+
+        // Handle switch-case yields.
+        if (auto yieldOp = dyn_cast<mlir::cir::YieldOp>(blk.getTerminator()))
+          fallthroughYieldOp = yieldOp;
+      }
+
+      // Handle break statements.
+      walkRegionSkipping<mlir::cir::LoopOpInterface, mlir::cir::SwitchOp>(
+          region, [&](mlir::Operation *op) {
+            if (!isa<mlir::cir::BreakOp>(op))
+              return mlir::WalkResult::advance();
+
+            lowerTerminator(op, exitBlock, rewriter);
+            return mlir::WalkResult::skip();
+          });
+
+      // Extract region contents before erasing the switch op.
+      rewriter.inlineRegionBefore(region, exitBlock);
+    }
+
+    // Last case is a fallthrough: branch it to exit.
+    if (fallthroughYieldOp) {
+      rewriteYieldOp(rewriter, fallthroughYieldOp, exitBlock);
+      fallthroughYieldOp = nullptr;
+    }
+
+    for (size_t index = 0; index < rangeValues.size(); ++index) {
+      auto lowerBound = rangeValues[index].first;
+      auto upperBound = rangeValues[index].second;
+
+      // The case range is unreachable, skip it.
+      if (lowerBound.sgt(upperBound))
+        continue;
+
+      // If range is small, add multiple switch instruction cases.
+      // This magical number is from the original CGStmt code.
+      constexpr int kSmallRangeThreshold = 64;
+      if ((upperBound - lowerBound)
+              .ult(llvm::APInt(32, kSmallRangeThreshold))) {
+        for (auto iValue = lowerBound; iValue.sle(upperBound); (void)iValue++) {
+          caseValues.push_back(iValue);
+          caseOperands.push_back(rangeOperands[index]);
+          caseDestinations.push_back(rangeDestinations[index]);
+        }
+        continue;
+      }
+
+      defaultDestination =
+          condBrToRangeDestination(op, rewriter, rangeDestinations[index],
+                                   defaultDestination, lowerBound, upperBound);
+      defaultOperands = rangeOperands[index];
+    }
+
+    // Set switch op to branch to the newly created blocks.
+    rewriter.setInsertionPoint(op);
+    rewriter.replaceOpWithNewOp<mlir::cir::SwitchFlatOp>(
+        op, op.getCondition(), defaultDestination, defaultOperands, caseValues,
+        caseDestinations, caseOperands);
+
+    return mlir::success();
+  }
+};
+class CIRTernaryOpFlattening
+    : public mlir::OpRewritePattern<mlir::cir::TernaryOp> {
+public:
+  using OpRewritePattern<mlir::cir::TernaryOp>::OpRewritePattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::TernaryOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    auto loc = op->getLoc();
+    auto *condBlock = rewriter.getInsertionBlock();
+    auto opPosition = rewriter.getInsertionPoint();
+    auto *remainingOpsBlock = rewriter.splitBlock(condBlock, opPosition);
+    SmallVector<mlir::Location, 2> locs;
+    // Ternary result is optional, make sure to populate the location only
+    // when relevant.
+    if (op->getResultTypes().size())
+      locs.push_back(loc);
+    auto *continueBlock =
+        rewriter.createBlock(remainingOpsBlock, op->getResultTypes(), locs);
+    rewriter.create<mlir::cir::BrOp>(loc, remainingOpsBlock);
+
+    auto &trueRegion = op.getTrueRegion();
+    auto *trueBlock = &trueRegion.front();
+    mlir::Operation *trueTerminator = trueRegion.back().getTerminator();
+    rewriter.setInsertionPointToEnd(&trueRegion.back());
+    auto trueYieldOp = dyn_cast<mlir::cir::YieldOp>(trueTerminator);
+
+    rewriter.replaceOpWithNewOp<mlir::cir::BrOp>(
+        trueYieldOp, trueYieldOp.getArgs(), continueBlock);
+    rewriter.inlineRegionBefore(trueRegion, continueBlock);
+
+    auto *falseBlock = continueBlock;
+    auto &falseRegion = op.getFalseRegion();
+
+    falseBlock = &falseRegion.front();
+    mlir::Operation *falseTerminator = falseRegion.back().getTerminator();
+    rewriter.setInsertionPointToEnd(&falseRegion.back());
+    auto falseYieldOp = dyn_cast<mlir::cir::YieldOp>(falseTerminator);
+    rewriter.replaceOpWithNewOp<mlir::cir::BrOp>(
+        falseYieldOp, falseYieldOp.getArgs(), continueBlock);
+    rewriter.inlineRegionBefore(falseRegion, continueBlock);
+
+    rewriter.setInsertionPointToEnd(condBlock);
+    rewriter.create<mlir::cir::BrCondOp>(loc, op.getCond(), trueBlock,
+                                         falseBlock);
+
+    rewriter.replaceOp(op, continueBlock->getArguments());
+
+    // Ok, we're done!
+    return mlir::success();
+  }
+};
+
+void populateFlattenCFGPatterns(RewritePatternSet &patterns) {
+  patterns
+      .add<CIRIfFlattening, CIRLoopOpInterfaceFlattening, CIRScopeOpFlattening,
+           CIRSwitchOpFlattening, CIRTernaryOpFlattening, CIRTryOpFlattening>(
+          patterns.getContext());
+}
+
+void FlattenCFGPass::runOnOperation() {
+  RewritePatternSet patterns(&getContext());
+  populateFlattenCFGPatterns(patterns);
+
+  // Collect operations to apply patterns.
+  SmallVector<Operation *, 16> ops;
+  getOperation()->walk<mlir::WalkOrder::PostOrder>([&](Operation *op) {
+    if (isa<IfOp, ScopeOp, SwitchOp, LoopOpInterface, TernaryOp, TryOp>(op))
+      ops.push_back(op);
+  });
+
+  // Apply patterns.
+  if (applyOpPatternsAndFold(ops, std::move(patterns)).failed())
+    signalPassFailure();
+}
+
+} // namespace
+
+namespace mlir {
+
+std::unique_ptr<Pass> createFlattenCFGPass() {
+  return std::make_unique<FlattenCFGPass>();
+}
+
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp b/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp
new file mode 100644
index 000000000000..34eb488b732c
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp
@@ -0,0 +1,54 @@
+#include "PassDetail.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/Passes.h"
+
+using namespace mlir;
+using namespace mlir::cir;
+
+namespace {
+
+struct GotoSolverPass : public GotoSolverBase<GotoSolverPass> {
+
+  GotoSolverPass() = default;
+  void runOnOperation() override;
+};
+
+static void process(mlir::cir::FuncOp func) {
+
+  mlir::OpBuilder rewriter(func.getContext());
+  std::map<std::string, Block *> labels;
+  std::vector<mlir::cir::GotoOp> gotos;
+
+  func.getBody().walk([&](mlir::Operation *op) {
+    if (auto lab = dyn_cast<mlir::cir::LabelOp>(op)) {
+      labels.emplace(lab.getLabel().str(), lab->getBlock());
+      lab.erase();
+    } else if (auto goTo = dyn_cast<mlir::cir::GotoOp>(op)) {
+      gotos.push_back(goTo);
+    }
+  });
+
+  for (auto goTo : gotos) {
+    mlir::OpBuilder::InsertionGuard guard(rewriter);
+    rewriter.setInsertionPoint(goTo);
+    auto dest = labels[goTo.getLabel().str()];
+    rewriter.create<mlir::cir::BrOp>(goTo.getLoc(), dest);
+    goTo.erase();
+  }
+}
+
+void GotoSolverPass::runOnOperation() {
+  SmallVector<Operation *, 16> ops;
+  getOperation()->walk([&](mlir::cir::FuncOp op) { process(op); });
+}
+
+} // namespace
+
+std::unique_ptr<Pass> mlir::createGotoSolverPass() {
+  return std::make_unique<GotoSolverPass>();
+}
\ No newline at end of file
diff --git a/clang/lib/CIR/Dialect/Transforms/IdiomRecognizer.cpp b/clang/lib/CIR/Dialect/Transforms/IdiomRecognizer.cpp
new file mode 100644
index 000000000000..f160239d460d
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/IdiomRecognizer.cpp
@@ -0,0 +1,213 @@
+//===- IdiomRecognizer.cpp - Recognize and raise C/C++ library calls ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Region.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Mangle.h"
+#include "clang/Basic/Module.h"
+#include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/Passes.h"
+#include "clang/CIR/Interfaces/ASTAttrInterfaces.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Path.h"
+
+#include "StdHelpers.h"
+
+using cir::CIRBaseBuilderTy;
+using namespace mlir;
+using namespace mlir::cir;
+
+namespace {
+
+struct IdiomRecognizerPass : public IdiomRecognizerBase<IdiomRecognizerPass> {
+  IdiomRecognizerPass() = default;
+  void runOnOperation() override;
+  void recognizeCall(CallOp call);
+  bool raiseStdFind(CallOp call);
+  bool raiseIteratorBeginEnd(CallOp call);
+
+  // Handle pass options
+  struct Options {
+    enum : unsigned {
+      None = 0,
+      RemarkFoundCalls = 1,
+      RemarkAll = 1 << 1,
+    };
+    unsigned val = None;
+    bool isOptionsParsed = false;
+
+    void parseOptions(ArrayRef<StringRef> remarks) {
+      if (isOptionsParsed)
+        return;
+
+      for (auto &remark : remarks) {
+        val |= StringSwitch<unsigned>(remark)
+                   .Case("found-calls", RemarkFoundCalls)
+                   .Case("all", RemarkAll)
+                   .Default(None);
+      }
+      isOptionsParsed = true;
+    }
+
+    void parseOptions(IdiomRecognizerPass &pass) {
+      SmallVector<llvm::StringRef, 4> remarks;
+
+      for (auto &r : pass.remarksList)
+        remarks.push_back(r);
+
+      parseOptions(remarks);
+    }
+
+    bool emitRemarkAll() { return val & RemarkAll; }
+    bool emitRemarkFoundCalls() {
+      return emitRemarkAll() || val & RemarkFoundCalls;
+    }
+  } opts;
+
+  ///
+  /// AST related
+  /// -----------
+  clang::ASTContext *astCtx;
+  void setASTContext(clang::ASTContext *c) { astCtx = c; }
+
+  /// Tracks current module.
+  ModuleOp theModule;
+};
+} // namespace
+
+bool IdiomRecognizerPass::raiseStdFind(CallOp call) {
+  // FIXME: tablegen all of this function.
+  if (call.getNumOperands() != 3)
+    return false;
+
+  auto callExprAttr = call.getAstAttr();
+  if (!callExprAttr || !callExprAttr.isStdFunctionCall("find")) {
+    return false;
+  }
+
+  if (opts.emitRemarkFoundCalls())
+    emitRemark(call.getLoc()) << "found call to std::find()";
+
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(call.getOperation());
+  auto findOp = builder.create<mlir::cir::StdFindOp>(
+      call.getLoc(), call.getResult().getType(), call.getCalleeAttr(),
+      call.getOperand(0), call.getOperand(1), call.getOperand(2));
+
+  call.replaceAllUsesWith(findOp);
+  call.erase();
+  return true;
+}
+
+static bool isIteratorLikeType(mlir::Type t) {
+  // TODO: some iterators are going to be represented with structs,
+  // in which case we could look at ASTRecordDeclInterface for more
+  // information.
+  auto pTy = dyn_cast<PointerType>(t);
+  if (!pTy || !mlir::isa<mlir::cir::IntType>(pTy.getPointee()))
+    return false;
+  return true;
+}
+
+static bool isIteratorInStdContainter(mlir::Type t) {
+  // TODO: only std::array supported for now, generalize and
+  // use tablegen. CallDescription.cpp in the static analyzer
+  // could be a good inspiration source too.
+  return isStdArrayType(t);
+}
+
+bool IdiomRecognizerPass::raiseIteratorBeginEnd(CallOp call) {
+  // FIXME: tablegen all of this function.
+  CIRBaseBuilderTy builder(getContext());
+
+  if (call.getNumOperands() != 1 || call.getNumResults() != 1)
+    return false;
+
+  auto callExprAttr = call.getAstAttr();
+  if (!callExprAttr)
+    return false;
+
+  if (!isIteratorLikeType(call.getResult().getType()))
+    return false;
+
+  // First argument is the container "this" pointer.
+  auto thisPtr = dyn_cast<PointerType>(call.getOperand(0).getType());
+  if (!thisPtr || !isIteratorInStdContainter(thisPtr.getPointee()))
+    return false;
+
+  builder.setInsertionPointAfter(call.getOperation());
+  mlir::Operation *iterOp;
+  if (callExprAttr.isIteratorBeginCall()) {
+    if (opts.emitRemarkFoundCalls())
+      emitRemark(call.getLoc()) << "found call to begin() iterator";
+    iterOp = builder.create<mlir::cir::IterBeginOp>(
+        call.getLoc(), call.getResult().getType(), call.getCalleeAttr(),
+        call.getOperand(0));
+  } else if (callExprAttr.isIteratorEndCall()) {
+    if (opts.emitRemarkFoundCalls())
+      emitRemark(call.getLoc()) << "found call to end() iterator";
+    iterOp = builder.create<mlir::cir::IterEndOp>(
+        call.getLoc(), call.getResult().getType(), call.getCalleeAttr(),
+        call.getOperand(0));
+  } else {
+    return false;
+  }
+
+  call.replaceAllUsesWith(iterOp);
+  call.erase();
+  return true;
+}
+
+void IdiomRecognizerPass::recognizeCall(CallOp call) {
+  if (raiseIteratorBeginEnd(call))
+    return;
+
+  if (raiseStdFind(call))
+    return;
+}
+
+void IdiomRecognizerPass::runOnOperation() {
+  assert(astCtx && "Missing ASTContext, please construct with the right ctor");
+  opts.parseOptions(*this);
+  auto *op = getOperation();
+  if (isa<::mlir::ModuleOp>(op))
+    theModule = cast<::mlir::ModuleOp>(op);
+
+  SmallVector<CallOp> callsToTransform;
+  op->walk([&](CallOp callOp) {
+    // Process call operations
+
+    // Skip indirect calls.
+    auto c = callOp.getCallee();
+    if (!c)
+      return;
+    callsToTransform.push_back(callOp);
+  });
+
+  for (auto c : callsToTransform)
+    recognizeCall(c);
+}
+
+std::unique_ptr<Pass> mlir::createIdiomRecognizerPass() {
+  return std::make_unique<IdiomRecognizerPass>();
+}
+
+std::unique_ptr<Pass>
+mlir::createIdiomRecognizerPass(clang::ASTContext *astCtx) {
+  auto pass = std::make_unique<IdiomRecognizerPass>();
+  pass->setASTContext(astCtx);
+  return std::move(pass);
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/LibOpt.cpp b/clang/lib/CIR/Dialect/Transforms/LibOpt.cpp
new file mode 100644
index 000000000000..b936157a1e9f
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/LibOpt.cpp
@@ -0,0 +1,246 @@
+//===- LibOpt.cpp - Optimize CIR raised C/C++ library idioms --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Region.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Mangle.h"
+#include "clang/Basic/Module.h"
+#include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/Passes.h"
+#include "clang/CIR/Interfaces/ASTAttrInterfaces.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Path.h"
+
+#include "StdHelpers.h"
+
+using cir::CIRBaseBuilderTy;
+using namespace mlir;
+using namespace mlir::cir;
+
+namespace {
+
+struct LibOptPass : public LibOptBase<LibOptPass> {
+  LibOptPass() = default;
+  void runOnOperation() override;
+  void xformStdFindIntoMemchr(StdFindOp findOp);
+
+  // Handle pass options
+  struct Options {
+    enum : unsigned {
+      None = 0,
+      RemarkTransforms = 1,
+      RemarkAll = 1 << 1,
+    };
+    unsigned val = None;
+    bool isOptionsParsed = false;
+
+    void parseOptions(ArrayRef<StringRef> remarks) {
+      if (isOptionsParsed)
+        return;
+
+      for (auto &remark : remarks) {
+        val |= StringSwitch<unsigned>(remark)
+                   .Case("transforms", RemarkTransforms)
+                   .Case("all", RemarkAll)
+                   .Default(None);
+      }
+      isOptionsParsed = true;
+    }
+
+    void parseOptions(LibOptPass &pass) {
+      SmallVector<llvm::StringRef, 4> remarks;
+
+      for (auto &r : pass.remarksList)
+        remarks.push_back(r);
+
+      parseOptions(remarks);
+    }
+
+    bool emitRemarkAll() { return val & RemarkAll; }
+    bool emitRemarkTransforms() {
+      return emitRemarkAll() || val & RemarkTransforms;
+    }
+  } opts;
+
+  ///
+  /// AST related
+  /// -----------
+  clang::ASTContext *astCtx;
+  void setASTContext(clang::ASTContext *c) { astCtx = c; }
+
+  /// Tracks current module.
+  ModuleOp theModule;
+};
+} // namespace
+
+static bool isSequentialContainer(mlir::Type t) {
+  // TODO: other sequential ones, vector, dequeue, list, forward_list.
+  return isStdArrayType(t);
+}
+
+static bool getIntegralNTTPAt(StructType t, size_t pos, unsigned &size) {
+  auto *d =
+      dyn_cast<clang::ClassTemplateSpecializationDecl>(t.getAst().getRawDecl());
+  if (!d)
+    return false;
+
+  auto &templArgs = d->getTemplateArgs();
+  if (pos >= templArgs.size())
+    return false;
+
+  auto arraySizeTemplateArg = templArgs[pos];
+  if (arraySizeTemplateArg.getKind() != clang::TemplateArgument::Integral)
+    return false;
+
+  size = arraySizeTemplateArg.getAsIntegral().getSExtValue();
+  return true;
+}
+
+static bool containerHasStaticSize(StructType t, unsigned &size) {
+  // TODO: add others.
+  if (!isStdArrayType(t))
+    return false;
+
+  // Get "size" from std::array<T, size>
+  unsigned sizeNTTPPos = 1;
+  return getIntegralNTTPAt(t, sizeNTTPPos, size);
+}
+
+void LibOptPass::xformStdFindIntoMemchr(StdFindOp findOp) {
+  // template <class T>
+  //  requires (sizeof(T) == 1 && is_integral_v<T>)
+  // T* find(T* first, T* last, T value) {
+  //   if (auto result = __builtin_memchr(first, value, last - first))
+  //     return result;
+  //   return last;
+  // }
+
+  auto first = findOp.getOperand(0);
+  auto last = findOp.getOperand(1);
+  auto value = findOp->getOperand(2);
+  if (!isa<PointerType>(first.getType()) || !isa<PointerType>(last.getType()))
+    return;
+
+  // Transformation:
+  // - 1st arg: the data pointer
+  //   - Assert the Iterator is a pointer to primitive type.
+  //   - Check IterBeginOp is char sized. TODO: add other types that map to
+  //   char size.
+  auto iterResTy = dyn_cast<PointerType>(findOp.getType());
+  assert(iterResTy && "expected pointer type for iterator");
+  auto underlyingDataTy = dyn_cast<IntType>(iterResTy.getPointee());
+  if (!underlyingDataTy || underlyingDataTy.getWidth() != 8)
+    return;
+
+  // - 2nd arg: the pattern
+  //   - Check it's a pointer type.
+  //   - Load the pattern from memory
+  //   - cast it to `int`.
+  auto patternAddrTy = dyn_cast<PointerType>(value.getType());
+  if (!patternAddrTy || patternAddrTy.getPointee() != underlyingDataTy)
+    return;
+
+  // - 3rd arg: the size
+  //   - Create and pass a cir.const with NTTP value
+
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(findOp.getOperation());
+  auto memchrOp0 =
+      builder.createBitcast(first.getLoc(), first, builder.getVoidPtrTy());
+
+  // FIXME: get datalayout based "int" instead of fixed size 4.
+  auto loadPattern =
+      builder.create<LoadOp>(value.getLoc(), underlyingDataTy, value);
+  auto memchrOp1 = builder.createIntCast(
+      loadPattern, IntType::get(builder.getContext(), 32, true));
+
+  const auto uInt64Ty = IntType::get(builder.getContext(), 64, false);
+
+  // Build memchr op:
+  //  void *memchr(const void *s, int c, size_t n);
+  auto memChr = [&] {
+    if (auto iterBegin = dyn_cast<IterBeginOp>(first.getDefiningOp());
+        iterBegin && isa<IterEndOp>(last.getDefiningOp())) {
+      // Both operands have the same type, use iterBegin.
+
+      // Look at this pointer to retrieve container information.
+      auto thisPtr =
+          cast<PointerType>(iterBegin.getOperand().getType()).getPointee();
+      auto containerTy = dyn_cast<StructType>(thisPtr);
+
+      unsigned staticSize = 0;
+      if (containerTy && isSequentialContainer(containerTy) &&
+          containerHasStaticSize(containerTy, staticSize)) {
+        return builder.create<MemChrOp>(
+            findOp.getLoc(), memchrOp0, memchrOp1,
+            builder.create<ConstantOp>(
+                findOp.getLoc(), uInt64Ty,
+                mlir::cir::IntAttr::get(uInt64Ty, staticSize)));
+      }
+    }
+    return builder.create<MemChrOp>(
+        findOp.getLoc(), memchrOp0, memchrOp1,
+        builder.create<PtrDiffOp>(findOp.getLoc(), uInt64Ty, last, first));
+  }();
+
+  auto MemChrResult =
+      builder.createBitcast(findOp.getLoc(), memChr.getResult(), iterResTy);
+
+  // if (result)
+  //   return result;
+  // else
+  // return last;
+  auto NullPtr = builder.getNullPtr(first.getType(), findOp.getLoc());
+  auto CmpResult = builder.create<CmpOp>(
+      findOp.getLoc(), BoolType::get(builder.getContext()), CmpOpKind::eq,
+      NullPtr.getRes(), MemChrResult);
+
+  auto result = builder.create<TernaryOp>(
+      findOp.getLoc(), CmpResult.getResult(),
+      [&](mlir::OpBuilder &ob, mlir::Location Loc) {
+        ob.create<YieldOp>(Loc, last);
+      },
+      [&](mlir::OpBuilder &ob, mlir::Location Loc) {
+        ob.create<YieldOp>(Loc, MemChrResult);
+      });
+
+  findOp.replaceAllUsesWith(result);
+  findOp.erase();
+}
+
+void LibOptPass::runOnOperation() {
+  assert(astCtx && "Missing ASTContext, please construct with the right ctor");
+  opts.parseOptions(*this);
+  auto *op = getOperation();
+  if (isa<::mlir::ModuleOp>(op))
+    theModule = cast<::mlir::ModuleOp>(op);
+
+  SmallVector<StdFindOp> stdFindToTransform;
+  op->walk([&](StdFindOp findOp) { stdFindToTransform.push_back(findOp); });
+
+  for (auto c : stdFindToTransform)
+    xformStdFindIntoMemchr(c);
+}
+
+std::unique_ptr<Pass> mlir::createLibOptPass() {
+  return std::make_unique<LibOptPass>();
+}
+
+std::unique_ptr<Pass> mlir::createLibOptPass(clang::ASTContext *astCtx) {
+  auto pass = std::make_unique<LibOptPass>();
+  pass->setASTContext(astCtx);
+  return std::move(pass);
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/LifetimeCheck.cpp b/clang/lib/CIR/Dialect/Transforms/LifetimeCheck.cpp
new file mode 100644
index 000000000000..99398bba908f
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/LifetimeCheck.cpp
@@ -0,0 +1,1961 @@
+//===- Lifetimecheck.cpp - emit diagnostic checks for lifetime violations -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/Passes.h"
+
+#include "clang/CIR/Interfaces/CIRLoopOpInterface.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallSet.h"
+
+#include <functional>
+
+using namespace mlir;
+using namespace cir;
+
+namespace {
+
+struct LocOrdering {
+  bool operator()(mlir::Location L1, mlir::Location L2) const {
+    return std::less<const void *>()(L1.getAsOpaquePointer(),
+                                     L2.getAsOpaquePointer());
+  }
+};
+
+struct LifetimeCheckPass : public LifetimeCheckBase<LifetimeCheckPass> {
+  LifetimeCheckPass() = default;
+  void runOnOperation() override;
+
+  void checkOperation(Operation *op);
+  void checkFunc(cir::FuncOp fnOp);
+  void checkBlock(Block &block);
+
+  void checkRegionWithScope(Region &region);
+  void checkRegion(Region &region);
+
+  void checkIf(IfOp op);
+  void checkSwitch(SwitchOp op);
+  void checkLoop(LoopOpInterface op);
+  void checkAlloca(AllocaOp op);
+  void checkStore(StoreOp op);
+  void checkLoad(LoadOp op);
+  void checkCall(CallOp callOp);
+  void checkAwait(AwaitOp awaitOp);
+  void checkReturn(ReturnOp retOp);
+
+  void classifyAndInitTypeCategories(mlir::Value addr, mlir::Type t,
+                                     mlir::Location loc, unsigned nestLevel);
+  void updatePointsTo(mlir::Value addr, mlir::Value data, mlir::Location loc);
+  void updatePointsToForConstStruct(mlir::Value addr,
+                                    mlir::cir::ConstStructAttr value,
+                                    mlir::Location loc);
+  void updatePointsToForZeroStruct(mlir::Value addr, StructType sTy,
+                                   mlir::Location loc);
+
+  enum DerefStyle {
+    Direct,
+    RetLambda,
+    CallParam,
+    IndirectCallParam,
+  };
+  void checkPointerDeref(mlir::Value addr, mlir::Location loc,
+                         DerefStyle derefStyle = DerefStyle::Direct);
+  void checkCoroTaskStore(StoreOp storeOp);
+  void checkLambdaCaptureStore(StoreOp storeOp);
+  void trackCallToCoroutine(CallOp callOp);
+
+  void checkCtor(CallOp callOp, ASTCXXConstructorDeclInterface ctor);
+  void checkMoveAssignment(CallOp callOp, ASTCXXMethodDeclInterface m);
+  void checkCopyAssignment(CallOp callOp, ASTCXXMethodDeclInterface m);
+  void checkNonConstUseOfOwner(mlir::Value ownerAddr, mlir::Location loc);
+  void checkOperators(CallOp callOp, ASTCXXMethodDeclInterface m);
+  void checkOtherMethodsAndFunctions(CallOp callOp,
+                                     ASTCXXMethodDeclInterface m);
+  void checkForOwnerAndPointerArguments(CallOp callOp, unsigned firstArgIdx);
+
+  // TODO: merge both methods below and pass down an enum.
+  //
+  // Check if a method's 'this' pointer (first arg) is tracked as
+  // a pointer category. Assumes the CallOp in question represents a method
+  // and returns the actual value associated with the tracked 'this' or an
+  // empty value if none is found.
+  mlir::Value getThisParamPointerCategory(CallOp callOp);
+  // Check if a method's 'this' pointer (first arg) is tracked as
+  // a owner category. Assumes the CallOp in question represents a method
+  // and returns the actual value associated with the tracked 'this' or an
+  // empty value if none is found.
+  mlir::Value getThisParamOwnerCategory(CallOp callOp);
+
+  // Tracks current module.
+  ModuleOp theModule;
+  // Track current function under analysis
+  std::optional<FuncOp> currFunc;
+
+  // Common helpers.
+  bool isCtorInitPointerFromOwner(CallOp callOp);
+  mlir::Value getNonConstUseOfOwner(CallOp callOp, ASTCXXMethodDeclInterface m);
+  bool isOwnerOrPointerClassMethod(CallOp callOp, ASTCXXMethodDeclInterface m);
+
+  // Diagnostic helpers.
+  void emitInvalidHistory(mlir::InFlightDiagnostic &D, mlir::Value histKey,
+                          mlir::Location warningLoc,
+                          DerefStyle derefStyle = DerefStyle::Direct);
+
+  ///
+  /// Pass options handling
+  /// ---------------------
+
+  struct Options {
+    enum : unsigned {
+      None = 0,
+      // Emit pset remarks only detecting invalid derefs
+      RemarkPsetInvalid = 1,
+      // Emit pset remarks for all derefs
+      RemarkPsetAlways = 1 << 1,
+      RemarkAll = 1 << 2,
+      HistoryNull = 1 << 3,
+      HistoryInvalid = 1 << 4,
+      HistoryAll = 1 << 5,
+    };
+    unsigned val = None;
+    unsigned histLimit = 1;
+    bool isOptionsParsed = false;
+
+    void parseOptions(ArrayRef<StringRef> remarks, ArrayRef<StringRef> hist,
+                      unsigned hist_limit) {
+      if (isOptionsParsed)
+        return;
+
+      for (auto &remark : remarks) {
+        val |= StringSwitch<unsigned>(remark)
+                   .Case("pset-invalid", RemarkPsetInvalid)
+                   .Case("pset-always", RemarkPsetAlways)
+                   .Case("all", RemarkAll)
+                   .Default(None);
+      }
+      for (auto &h : hist) {
+        val |= StringSwitch<unsigned>(h)
+                   .Case("invalid", HistoryInvalid)
+                   .Case("null", HistoryNull)
+                   .Case("all", HistoryAll)
+                   .Default(None);
+      }
+      histLimit = hist_limit;
+      isOptionsParsed = true;
+    }
+
+    void parseOptions(LifetimeCheckPass &pass) {
+      SmallVector<llvm::StringRef, 4> remarks;
+      SmallVector<llvm::StringRef, 4> hists;
+
+      for (auto &r : pass.remarksList)
+        remarks.push_back(r);
+
+      for (auto &h : pass.historyList)
+        hists.push_back(h);
+
+      parseOptions(remarks, hists, pass.historyLimit);
+    }
+
+    bool emitRemarkAll() { return val & RemarkAll; }
+    bool emitRemarkPsetInvalid() {
+      return emitRemarkAll() || val & RemarkPsetInvalid;
+    }
+    bool emitRemarkPsetAlways() {
+      return emitRemarkAll() || val & RemarkPsetAlways;
+    }
+
+    bool emitHistoryAll() { return val & HistoryAll; }
+    bool emitHistoryNull() { return emitHistoryAll() || val & HistoryNull; }
+    bool emitHistoryInvalid() {
+      return emitHistoryAll() || val & HistoryInvalid;
+    }
+  } opts;
+
+  ///
+  /// State
+  /// -----
+
+  // Represents the state of an element in a pointer set (pset)
+  struct State {
+    using DataTy = enum {
+      Invalid,
+      NullPtr,
+      Global,
+      // FIXME: currently only supports one level of OwnedBy!
+      OwnedBy,
+      LocalValue,
+      NumKindsMinusOne = LocalValue
+    };
+    State() { val.setInt(Invalid); }
+    State(DataTy d) { val.setInt(d); }
+    State(mlir::Value v, DataTy d = LocalValue) {
+      assert((d == LocalValue || d == OwnedBy) && "expected value or owned");
+      val.setPointerAndInt(v, d);
+    }
+
+    static constexpr int KindBits = 3;
+    static_assert((1 << KindBits) > NumKindsMinusOne,
+                  "Not enough room for kind!");
+    llvm::PointerIntPair<mlir::Value, KindBits> val;
+
+    /// Provide less/equal than operator for sorting / set ops.
+    bool operator<(const State &RHS) const {
+      // FIXME: note that this makes the ordering non-deterministic, do
+      // we really care?
+      if (hasValue() && RHS.hasValue())
+        return val.getPointer().getAsOpaquePointer() <
+               RHS.val.getPointer().getAsOpaquePointer();
+      return val.getInt() < RHS.val.getInt();
+    }
+    bool operator==(const State &RHS) const {
+      if (hasValue() && RHS.hasValue())
+        return val.getPointer() == RHS.val.getPointer();
+      return val.getInt() == RHS.val.getInt();
+    }
+
+    bool isLocalValue() const { return val.getInt() == LocalValue; }
+    bool isOwnedBy() const { return val.getInt() == OwnedBy; }
+    bool hasValue() const { return isLocalValue() || isOwnedBy(); }
+
+    mlir::Value getData() const {
+      assert(hasValue() && "data type does not hold a mlir::Value");
+      return val.getPointer();
+    }
+
+    void dump(llvm::raw_ostream &OS = llvm::errs(), int ownedGen = 0);
+
+    static State getInvalid() { return {Invalid}; }
+    static State getNullPtr() { return {NullPtr}; }
+    static State getLocalValue(mlir::Value v) { return {v, LocalValue}; }
+    static State getOwnedBy(mlir::Value v) { return {v, State::OwnedBy}; }
+  };
+
+  ///
+  /// Invalid and null history tracking
+  /// ---------------------------------
+  enum InvalidStyle {
+    Unknown,
+    EndOfScope,
+    NotInitialized,
+    MovedFrom,
+    NonConstUseOfOwner,
+  };
+
+  struct InvalidHistEntry {
+    InvalidStyle style = Unknown;
+    std::optional<mlir::Location> loc;
+    std::optional<mlir::Value> val;
+    InvalidHistEntry() = default;
+    InvalidHistEntry(InvalidStyle s, std::optional<mlir::Location> l,
+                     std::optional<mlir::Value> v)
+        : style(s), loc(l), val(v) {}
+  };
+
+  struct InvalidHist {
+    llvm::SmallVector<InvalidHistEntry, 8> entries;
+    void add(mlir::Value ptr, InvalidStyle invalidStyle, mlir::Location loc,
+             std::optional<mlir::Value> val = {}) {
+      entries.emplace_back(InvalidHistEntry(invalidStyle, loc, val));
+    }
+  };
+
+  llvm::DenseMap<mlir::Value, InvalidHist> invalidHist;
+
+  using PMapNullHistType =
+      llvm::DenseMap<mlir::Value, std::optional<mlir::Location>>;
+  PMapNullHistType pmapNullHist;
+
+  // Track emitted diagnostics, and do not repeat them.
+  llvm::SmallSet<mlir::Location, 8, LocOrdering> emittedDiagnostics;
+
+  ///
+  /// Pointer Map and Pointer Set
+  /// ---------------------------
+
+  using PSetType = llvm::SmallSet<State, 4>;
+  // FIXME: this should be a ScopedHashTable for consistency.
+  using PMapType = llvm::DenseMap<mlir::Value, PSetType>;
+
+  // FIXME: we probably don't need to track it at this level, perhaps
+  // just tracking at the scope level should be enough?
+  PMapType *currPmap = nullptr;
+  PMapType &getPmap() { return *currPmap; }
+  void markPsetInvalid(mlir::Value ptr, InvalidStyle invalidStyle,
+                       mlir::Location loc,
+                       std::optional<mlir::Value> extraVal = {}) {
+    auto &pset = getPmap()[ptr];
+
+    // If pset is already invalid, don't bother.
+    if (pset.count(State::getInvalid()))
+      return;
+
+    // 2.3 - putting invalid into pset(x) is said to invalidate it
+    pset.insert(State::getInvalid());
+    invalidHist[ptr].add(ptr, invalidStyle, loc, extraVal);
+  }
+
+  void markPsetNull(mlir::Value addr, mlir::Location loc) {
+    getPmap()[addr].clear();
+    getPmap()[addr].insert(State::getNullPtr());
+    pmapNullHist[addr] = loc;
+  }
+
+  void joinPmaps(SmallVectorImpl<PMapType> &pmaps);
+
+  // Provides p1179's 'KILL' functionality. See implementation for more
+  // information.
+  void kill(const State &s, InvalidStyle invalidStyle, mlir::Location loc);
+  void killInPset(mlir::Value ptrKey, const State &s, InvalidStyle invalidStyle,
+                  mlir::Location loc, std::optional<mlir::Value> extraVal);
+
+  // Local pointers
+  SmallPtrSet<mlir::Value, 8> ptrs;
+
+  // Local owners. We use a map instead of a set to track the current generation
+  // for this owner type internal pointee's. For instance, this allows tracking
+  // subsequent reuse of owner storage when a non-const use happens.
+  DenseMap<mlir::Value, unsigned> owners;
+  void addOwner(mlir::Value o) {
+    assert(!owners.count(o) && "already tracked");
+    owners[o] = 0;
+  }
+  void incOwner(mlir::Value o) {
+    assert(owners.count(o) && "entry expected");
+    owners[o]++;
+  }
+
+  // Aggregates and exploded fields.
+  using ExplodedFieldsTy = llvm::SmallVector<mlir::Value, 4>;
+  DenseMap<mlir::Value, ExplodedFieldsTy> aggregates;
+  void addAggregate(mlir::Value a, SmallVectorImpl<mlir::Value> &fields) {
+    assert(!aggregates.count(a) && "already tracked");
+    aggregates[a].swap(fields);
+  }
+
+  // Useful helpers for debugging
+  void printPset(PSetType &pset, llvm::raw_ostream &OS = llvm::errs());
+  LLVM_DUMP_METHOD void dumpPmap(PMapType &pmap);
+  LLVM_DUMP_METHOD void dumpCurrentPmap();
+
+  ///
+  /// Coroutine tasks (promise_type)
+  /// ------------------------------
+
+  // Track types we already know to be a coroutine task (promise_type)
+  llvm::DenseMap<mlir::Type, bool> IsTaskTyCache;
+  // Is the type associated with taskVal a coroutine task? Uses IsTaskTyCache
+  // or compute it from associated AST node.
+  bool isTaskType(mlir::Value taskVal);
+  // Addresses of coroutine Tasks found in the current function.
+  SmallPtrSet<mlir::Value, 8> tasks;
+
+  ///
+  /// Lambdas
+  /// -------
+
+  // Track types we already know to be a lambda
+  llvm::DenseMap<mlir::Type, bool> IsLambdaTyCache;
+  // Check if a given cir type is a struct containing a lambda
+  bool isLambdaType(mlir::Type ty);
+  // Get the lambda struct from a member access to it.
+  mlir::Value getLambdaFromMemberAccess(mlir::Value addr);
+
+  ///
+  /// Scope, context and guards
+  /// -------------------------
+
+  // Represents the scope context for IR operations (cir.scope, cir.if,
+  // then/else regions, etc). Tracks the declaration of variables in the current
+  // local scope.
+  struct LexicalScopeContext {
+    unsigned Depth = 0;
+    LexicalScopeContext() = delete;
+
+    llvm::PointerUnion<mlir::Region *, mlir::Operation *> parent;
+    LexicalScopeContext(mlir::Region *R) : parent(R) {}
+    LexicalScopeContext(mlir::Operation *Op) : parent(Op) {}
+    ~LexicalScopeContext() = default;
+
+    // Track all local values added in this scope
+    SmallPtrSet<mlir::Value, 4> localValues;
+
+    // Track the result of temporaries with coroutine call results,
+    // they are used to initialize a task.
+    //
+    // Value must come directly out of a cir.call to a cir.func which
+    // is a coroutine.
+    SmallPtrSet<mlir::Value, 2> localTempTasks;
+
+    // Track seen lambdas that escape out of the current scope
+    // (e.g. lambdas returned out of functions).
+    DenseMap<mlir::Value, mlir::Location> localRetLambdas;
+
+    LLVM_DUMP_METHOD void dumpLocalValues();
+  };
+
+  class LexicalScopeGuard {
+    LifetimeCheckPass &Pass;
+    LexicalScopeContext *OldVal = nullptr;
+
+  public:
+    LexicalScopeGuard(LifetimeCheckPass &p, LexicalScopeContext *L) : Pass(p) {
+      if (Pass.currScope) {
+        OldVal = Pass.currScope;
+        L->Depth++;
+      }
+      Pass.currScope = L;
+    }
+
+    LexicalScopeGuard(const LexicalScopeGuard &) = delete;
+    LexicalScopeGuard &operator=(const LexicalScopeGuard &) = delete;
+    LexicalScopeGuard &operator=(LexicalScopeGuard &&other) = delete;
+
+    void cleanup();
+    void restore() { Pass.currScope = OldVal; }
+    ~LexicalScopeGuard() {
+      cleanup();
+      restore();
+    }
+  };
+
+  class PmapGuard {
+    LifetimeCheckPass &Pass;
+    PMapType *OldVal = nullptr;
+
+  public:
+    PmapGuard(LifetimeCheckPass &lcp, PMapType *L) : Pass(lcp) {
+      if (Pass.currPmap) {
+        OldVal = Pass.currPmap;
+      }
+      Pass.currPmap = L;
+    }
+
+    PmapGuard(const PmapGuard &) = delete;
+    PmapGuard &operator=(const PmapGuard &) = delete;
+    PmapGuard &operator=(PmapGuard &&other) = delete;
+
+    void restore() { Pass.currPmap = OldVal; }
+    ~PmapGuard() { restore(); }
+  };
+
+  LexicalScopeContext *currScope = nullptr;
+
+  ///
+  /// AST related
+  /// -----------
+
+  std::optional<clang::ASTContext *> astCtx;
+  void setASTContext(clang::ASTContext *c) { astCtx = c; }
+};
+} // namespace
+
+static std::string getVarNameFromValue(mlir::Value v) {
+
+  auto srcOp = v.getDefiningOp();
+  if (!srcOp) {
+    auto blockArg = cast<BlockArgument>(v);
+    assert(blockArg.getOwner()->isEntryBlock() && "random block args NYI");
+    llvm::SmallString<128> finalName;
+    llvm::raw_svector_ostream Out(finalName);
+    Out << "fn_arg:" << blockArg.getArgNumber();
+    return Out.str().str();
+  }
+
+  if (auto allocaOp = dyn_cast<AllocaOp>(srcOp))
+    return allocaOp.getName().str();
+  if (auto getElemOp = dyn_cast<GetMemberOp>(srcOp)) {
+    auto parent = dyn_cast<AllocaOp>(getElemOp.getAddr().getDefiningOp());
+    if (parent) {
+      llvm::SmallString<128> finalName;
+      llvm::raw_svector_ostream Out(finalName);
+      Out << parent.getName() << "." << getElemOp.getName();
+      return Out.str().str();
+    }
+  }
+  if (auto callOp = dyn_cast<CallOp>(srcOp)) {
+    if (callOp.getCallee()) {
+      llvm::SmallString<128> finalName;
+      llvm::raw_svector_ostream Out(finalName);
+      Out << "call:" << callOp.getCallee()->str();
+      return Out.str().str();
+    }
+  }
+  assert(0 && "how did it get here?");
+  return "";
+}
+
+static Location getEndLoc(Location loc, int idx = 1) {
+  auto fusedLoc = dyn_cast<FusedLoc>(loc);
+  if (!fusedLoc)
+    return loc;
+  return fusedLoc.getLocations()[idx];
+}
+
+static Location getEndLocForHist(Operation *Op) {
+  return getEndLoc(Op->getLoc());
+}
+
+static Location getEndLocIf(IfOp ifOp, Region *R) {
+  assert(ifOp && "what other regions create their own scope?");
+  if (&ifOp.getThenRegion() == R)
+    return getEndLoc(ifOp.getLoc());
+  return getEndLoc(ifOp.getLoc(), /*idx=*/3);
+}
+
+static Location getEndLocForHist(Region *R) {
+  auto parentOp = R->getParentOp();
+  if (isa<IfOp>(parentOp))
+    return getEndLocIf(cast<IfOp>(parentOp), R);
+  if (isa<FuncOp>(parentOp))
+    return getEndLoc(parentOp->getLoc());
+  llvm_unreachable("what other regions create their own scope?");
+}
+
+static Location getEndLocForHist(LifetimeCheckPass::LexicalScopeContext &lsc) {
+  assert(!lsc.parent.isNull() && "shouldn't be null");
+  if (lsc.parent.is<Region *>())
+    return getEndLocForHist(lsc.parent.get<Region *>());
+  assert(lsc.parent.is<Operation *>() &&
+         "Only support operation beyond this point");
+  return getEndLocForHist(lsc.parent.get<Operation *>());
+}
+
+void LifetimeCheckPass::killInPset(mlir::Value ptrKey, const State &s,
+                                   InvalidStyle invalidStyle,
+                                   mlir::Location loc,
+                                   std::optional<mlir::Value> extraVal) {
+  auto &pset = getPmap()[ptrKey];
+  if (pset.contains(s)) {
+    pset.erase(s);
+    markPsetInvalid(ptrKey, invalidStyle, loc, extraVal);
+  }
+}
+
+// 2.3 - KILL(x) means to replace all occurrences of x and x' and x'' (etc.)
+// in the pmap with invalid. For example, if pmap is {(p1,{a}), (p2,{a'})},
+// KILL(a') would invalidate only p2, and KILL(a) would invalidate both p1 and
+// p2.
+void LifetimeCheckPass::kill(const State &s, InvalidStyle invalidStyle,
+                             mlir::Location loc) {
+  assert(s.hasValue() && "does not know how to kill other data types");
+  mlir::Value v = s.getData();
+  std::optional<mlir::Value> extraVal;
+  if (invalidStyle == InvalidStyle::EndOfScope)
+    extraVal = v;
+
+  for (auto &mapEntry : getPmap()) {
+    auto ptr = mapEntry.first;
+
+    // We are deleting this entry anyways, nothing to do here.
+    if (v == ptr)
+      continue;
+
+    // ... replace all occurrences of x and x' and x''. Start with the primes
+    // so we first remove uses and then users.
+    //
+    // FIXME: add x'', x''', etc...
+    if (s.isLocalValue() && owners.count(v))
+      killInPset(ptr, State::getOwnedBy(v), invalidStyle, loc, extraVal);
+    killInPset(ptr, s, invalidStyle, loc, extraVal);
+  }
+
+  // Delete the local value from pmap, since its scope has ended.
+  if (invalidStyle == InvalidStyle::EndOfScope) {
+    owners.erase(v);
+    ptrs.erase(v);
+    tasks.erase(v);
+    aggregates.erase(v);
+  }
+}
+
+void LifetimeCheckPass::LexicalScopeGuard::cleanup() {
+  auto *localScope = Pass.currScope;
+  for (auto pointee : localScope->localValues)
+    Pass.kill(State::getLocalValue(pointee), InvalidStyle::EndOfScope,
+              getEndLocForHist(*localScope));
+
+  // Catch interesting dangling references out of returns.
+  for (auto l : localScope->localRetLambdas)
+    Pass.checkPointerDeref(l.first, l.second, DerefStyle::RetLambda);
+}
+
+void LifetimeCheckPass::checkBlock(Block &block) {
+  // Block main role is to hold a list of Operations.
+  for (Operation &op : block.getOperations())
+    checkOperation(&op);
+}
+
+void LifetimeCheckPass::checkRegion(Region &region) {
+  for (Block &block : region.getBlocks())
+    checkBlock(block);
+}
+
+void LifetimeCheckPass::checkRegionWithScope(Region &region) {
+  // Add a new scope. Note that as part of the scope cleanup process
+  // we apply section 2.3 KILL(x) functionality, turning relevant
+  // references invalid.
+  LexicalScopeContext lexScope{&region};
+  LexicalScopeGuard scopeGuard{*this, &lexScope};
+  for (Block &block : region.getBlocks())
+    checkBlock(block);
+}
+
+void LifetimeCheckPass::checkFunc(cir::FuncOp fnOp) {
+  currFunc = fnOp;
+  // FIXME: perhaps this should be a function pass, but for now make
+  // sure we reset the state before looking at other functions.
+  if (currPmap)
+    getPmap().clear();
+  pmapNullHist.clear();
+  invalidHist.clear();
+
+  // Create a new pmap for this function.
+  PMapType localPmap{};
+  PmapGuard pmapGuard{*this, &localPmap};
+
+  // Add a new scope. Note that as part of the scope cleanup process
+  // we apply section 2.3 KILL(x) functionality, turning relevant
+  // references invalid.
+  for (Region &region : fnOp->getRegions())
+    checkRegionWithScope(region);
+
+  // FIXME: store the pmap result for this function, we
+  // could do some interesting IPA stuff using this info.
+  currFunc.reset();
+}
+
+// The join operation between pmap as described in section 2.3.
+//
+//  JOIN({pmap1,...,pmapN}) =>
+//  { (p, pset1(p) U ... U psetN(p) | (p,*) U pmap1 U ... U pmapN }.
+//
+void LifetimeCheckPass::joinPmaps(SmallVectorImpl<PMapType> &pmaps) {
+  for (auto &mapEntry : getPmap()) {
+    auto &val = mapEntry.first;
+
+    PSetType joinPset;
+    for (auto &pmapOp : pmaps)
+      llvm::set_union(joinPset, pmapOp[val]);
+
+    getPmap()[val] = joinPset;
+  }
+}
+
+void LifetimeCheckPass::checkLoop(LoopOpInterface loopOp) {
+  // 2.4.9. Loops
+  //
+  // A loop is treated as if it were the first two loop iterations unrolled
+  // using an if. For example:
+  //
+  //  for (/*init*/; /*cond*/; /*incr*/)
+  //   { /*body*/ }
+  //
+  // is treated as:
+  //
+  //  if (/*init*/; /*cond*/)
+  //   { /*body*/; /*incr*/ }
+  //  if (/*cond*/)
+  //   { /*body*/ }
+  //
+  // See checkIf for additional explanations.
+  SmallVector<PMapType, 4> pmapOps;
+  SmallVector<Region *, 4> regionsToCheck;
+
+  auto setupLoopRegionsToCheck = [&](bool isSubsequentTaken = false) {
+    regionsToCheck = loopOp.getRegionsInExecutionOrder();
+    // Drop step if it exists and we are not checking the subsequent taken.
+    if (loopOp.maybeGetStep() && !isSubsequentTaken)
+      regionsToCheck.pop_back();
+  };
+
+  // From 2.4.9 "Note":
+  //
+  // There are only three paths to analyze:
+  // (1) never taken (the loop body was not entered)
+  pmapOps.push_back(getPmap());
+
+  // (2) first taken (the first pass through the loop body, which begins
+  // with the loop entry pmap)
+  PMapType loopExitPmap;
+  {
+    // Intentional copy from loop entry map
+    loopExitPmap = getPmap();
+    PmapGuard pmapGuard{*this, &loopExitPmap};
+    setupLoopRegionsToCheck();
+    for (auto *r : regionsToCheck)
+      checkRegion(*r);
+    pmapOps.push_back(loopExitPmap);
+  }
+
+  // (3) and subsequent taken (second or later iteration, which begins with the
+  // loop body exit pmap and so takes into account any invalidations performed
+  // in the loop body on any path that could affect the next loop).
+  //
+  // This ensures that a subsequent loop iteration does not use a Pointer that
+  // was invalidated during a previous loop iteration.
+  //
+  // Because this analysis gives the same answer for each block of code (always
+  // converges), all loop iterations after the first get the same answer and
+  // so we only need to consider the second iteration, and so the analysis
+  // algorithm remains linear, single-pass. As an optimization, if the loop
+  // entry pmap is the same as the first loop body exit pmap, there is no need
+  // to perform the analysis on the second loop iteration; the answer will be
+  // the same.
+  if (getPmap() != loopExitPmap) {
+    // Intentional copy from first taken loop exit pmap
+    PMapType otherTakenPmap = loopExitPmap;
+    PmapGuard pmapGuard{*this, &otherTakenPmap};
+    setupLoopRegionsToCheck(/*isSubsequentTaken=*/true);
+    for (auto *r : regionsToCheck)
+      checkRegion(*r);
+    pmapOps.push_back(otherTakenPmap);
+  }
+
+  joinPmaps(pmapOps);
+}
+
+void LifetimeCheckPass::checkAwait(AwaitOp awaitOp) {
+  // Pretty conservative: assume all regions execute
+  // sequencially.
+  //
+  // FIXME: use branch interface here and only tackle
+  // the necessary regions.
+  SmallVector<PMapType, 4> pmapOps;
+
+  for (auto r : awaitOp.getRegions()) {
+    PMapType regionPmap = getPmap();
+    PmapGuard pmapGuard{*this, &regionPmap};
+    checkRegion(*r);
+    pmapOps.push_back(regionPmap);
+  }
+
+  joinPmaps(pmapOps);
+}
+
+void LifetimeCheckPass::checkReturn(ReturnOp retOp) {
+  // Upon return invalidate all local values. Since some return
+  // values might depend on other local address, check for the
+  // dangling aspects for this.
+  if (retOp.getNumOperands() == 0)
+    return;
+
+  auto retTy = retOp.getOperand(0).getType();
+  // FIXME: this can be extended to cover more leaking/dandling
+  // semantics out of functions.
+  if (!isLambdaType(retTy))
+    return;
+
+  // The return value is loaded from the return slot before
+  // returning.
+  auto loadOp = dyn_cast<LoadOp>(retOp.getOperand(0).getDefiningOp());
+  assert(loadOp && "expected cir.load");
+  if (!isa<AllocaOp>(loadOp.getAddr().getDefiningOp()))
+    return;
+
+  // Keep track of interesting lambda.
+  assert(!currScope->localRetLambdas.count(loadOp.getAddr()) &&
+         "lambda already returned?");
+  currScope->localRetLambdas.insert(
+      std::make_pair(loadOp.getAddr(), loadOp.getLoc()));
+}
+
+void LifetimeCheckPass::checkSwitch(SwitchOp switchOp) {
+  // 2.4.7. A switch(cond) is treated as if it were an equivalent series of
+  // non-nested if statements with single evaluation of cond; for example:
+  //
+  //    switch (a) {
+  //      case 1:/*1*/
+  //      case 2:/*2*/ break;
+  //      default:/*3*/
+  //    }
+  //
+  // is treated as:
+  //
+  //    if (auto& a=a; a==1) {/*1*/}
+  //    else if (a==1 || a==2) {/*2*/}
+  //    else {/*3*/}.
+  //
+  // See checkIf for additional explanations.
+  SmallVector<PMapType, 2> pmapOps;
+
+  // If there are no regions, pmap is the same.
+  if (switchOp.getRegions().empty())
+    return;
+
+  auto isCaseFallthroughTerminated = [&](Region &r) {
+    assert(r.getBlocks().size() == 1 && "cannot yet handle branches");
+    Block &block = r.back();
+    assert(!block.empty() && "case regions cannot be empty");
+
+    // FIXME: do something special about return terminated?
+    YieldOp y = dyn_cast<YieldOp>(block.back());
+    if (!y)
+      return false;
+    return true;
+  };
+
+  auto regions = switchOp.getRegions();
+  for (unsigned regionCurrent = 0, regionPastEnd = regions.size();
+       regionCurrent != regionPastEnd; ++regionCurrent) {
+    // Intentional pmap copy, basis to start new path.
+    PMapType locaCasePmap = getPmap();
+    PmapGuard pmapGuard{*this, &locaCasePmap};
+
+    // At any given point, fallbacks (if not empty) will increase the
+    // number of control-flow possibilities. For each region ending up
+    // with a fallback, keep computing the pmap until we hit a region
+    // that has a non-fallback terminator for the region.
+    unsigned idx = regionCurrent;
+    while (idx < regionPastEnd) {
+      // Note that for 'if' regions we use checkRegionWithScope, since
+      // there are lexical scopes associated with each region, this is
+      // not the case for switch's.
+      checkRegion(regions[idx]);
+      if (!isCaseFallthroughTerminated(regions[idx]))
+        break;
+      idx++;
+    }
+    pmapOps.push_back(locaCasePmap);
+  }
+
+  joinPmaps(pmapOps);
+}
+
+void LifetimeCheckPass::checkIf(IfOp ifOp) {
+  // Both then and else create their own lexical scopes, take that into account
+  // while checking then/else.
+  //
+  // This is also the moment where pmaps are joined because flow forks:
+  //    pmap(ifOp) = JOIN( pmap(then), pmap(else) )
+  //
+  // To that intent the pmap is copied out before checking each region and
+  // pmap(ifOp) computed after analysing both paths.
+  SmallVector<PMapType, 2> pmapOps;
+
+  {
+    PMapType localThenPmap = getPmap();
+    PmapGuard pmapGuard{*this, &localThenPmap};
+    checkRegionWithScope(ifOp.getThenRegion());
+    pmapOps.push_back(localThenPmap);
+  }
+
+  // In case there's no 'else' branch, the 'else' pmap is the same as
+  // prior to the if condition.
+  if (!ifOp.getElseRegion().empty()) {
+    PMapType localElsePmap = getPmap();
+    PmapGuard pmapGuard{*this, &localElsePmap};
+    checkRegionWithScope(ifOp.getElseRegion());
+    pmapOps.push_back(localElsePmap);
+  } else {
+    pmapOps.push_back(getPmap());
+  }
+
+  joinPmaps(pmapOps);
+}
+
+template <class T> bool isStructAndHasAttr(mlir::Type ty) {
+  if (!mlir::isa<mlir::cir::StructType>(ty))
+    return false;
+  return hasAttr<T>(mlir::cast<mlir::cir::StructType>(ty).getAst());
+}
+
+static bool isOwnerType(mlir::Type ty) {
+  // From 2.1:
+  //
+  // An Owner uniquely owns another object (cannot dangle). An Owner type is
+  // expressed using the annotation [[gsl::Owner(DerefType)]] where DerefType is
+  // the owned type (and (DerefType) may be omitted and deduced as below). For
+  // example:
+  //
+  // template<class T> class [[gsl::Owner(T)]] my_unique_smart_pointer;
+  //
+  // TODO: The following standard or other types are treated as-if annotated as
+  // Owners, if not otherwise annotated and if not SharedOwners:
+  //
+  // - Every type that satisfies the standard Container requirements and has a
+  // user-provided destructor. (Example: vector.) DerefType is ::value_type.
+  // - Every type that provides unary * and has a user-provided destructor.
+  // (Example: unique_ptr.) DerefType is the ref-unqualified return type of
+  // operator*.
+  // - Every type that has a data member or public base class of an Owner type.
+  // Additionally, for convenient adoption without modifying existing standard
+  // library headers, the following well known standard types are treated as-if
+  // annotated as Owners: stack, queue, priority_queue, optional, variant, any,
+  // and regex.
+  return isStructAndHasAttr<clang::OwnerAttr>(ty);
+}
+
+static bool containsPointerElts(mlir::cir::StructType s) {
+  auto members = s.getMembers();
+  return std::any_of(members.begin(), members.end(), [](mlir::Type t) {
+    return mlir::isa<mlir::cir::PointerType>(t);
+  });
+}
+
+static bool isAggregateType(LifetimeCheckPass *pass, mlir::Type agg) {
+  auto t = mlir::dyn_cast<mlir::cir::StructType>(agg);
+  if (!t)
+    return false;
+  // Lambdas have their special handling, and shall not be considered as
+  // aggregate types.
+  if (pass->isLambdaType(agg))
+    return false;
+  // FIXME: For now we handle this in a more naive way: any pointer
+  // element we find is enough to consider this an aggregate. But in
+  // reality it should be as defined in 2.1:
+  //
+  // An Aggregate is a type that is not an Indirection and is a class type with
+  // public data members none of which are references (& or &&) and no
+  // user-provided copy or move operations, and no base class that is not also
+  // an Aggregate. The elements of an Aggregate are its public data members.
+  return containsPointerElts(t);
+}
+
+static bool isPointerType(mlir::Type t) {
+  // From 2.1:
+  //
+  // A Pointer is not an Owner and provides indirect access to an object it does
+  // not own (can dangle). A Pointer type is expressed using the annotation
+  // [[gsl::Pointer(DerefType)]] where DerefType is the pointed-to type (and
+  // (Dereftype) may be omitted and deduced as below). For example:
+  //
+  // template<class T> class [[gsl::Pointer(T)]] my_span;
+  //
+  // TODO: The following standard or other types are treated as-if annotated as
+  // Pointer, if not otherwise annotated and if not Owners:
+  //
+  // - Every type that satisfies the standard Iterator requirements. (Example:
+  // regex_iterator.) DerefType is the ref-unqualified return type of operator*.
+  // - Every type that satisfies the Ranges TS Range concept. (Example:
+  // basic_string_view.) DerefType is the ref-unqualified type of *begin().
+  // - Every type that satisfies the following concept. DerefType is the
+  // ref-unqualified return type of operator*.
+  //
+  //  template<typename T> concept
+  //  TriviallyCopyableAndNonOwningAndDereferenceable =
+  //  std::is_trivially_copyable_v<T> && std::is_copy_constructible_v<T> &&
+  //  std::is_copy_assignable_v<T> && requires(T t) { *t; };
+  //
+  // - Every closure type of a lambda that captures by reference or captures a
+  // Pointer by value. DerefType is void.
+  // - Every type that has a data member or public base class of a Pointer type.
+  // Additionally, for convenient adoption without modifying existing standard
+  // library headers, the following well- known standard types are treated as-if
+  // annotated as Pointers, in addition to raw pointers and references: ref-
+  // erence_wrapper, and vector<bool>::reference.
+  if (mlir::isa<mlir::cir::PointerType>(t))
+    return true;
+  return isStructAndHasAttr<clang::PointerAttr>(t);
+}
+
+void LifetimeCheckPass::classifyAndInitTypeCategories(mlir::Value addr,
+                                                      mlir::Type t,
+                                                      mlir::Location loc,
+                                                      unsigned nestLevel) {
+  // The same alloca can be hit more than once when checking for dangling
+  // pointers out of subsequent loop iterations (e.g. second iteraton using
+  // pointer invalidated in the first run). Since we copy the pmap out to
+  // start those subsequent checks, make sure sure we skip existing alloca
+  // tracking.
+  if (getPmap().count(addr))
+    return;
+  getPmap()[addr] = {};
+
+  enum TypeCategory {
+    Unknown = 0,
+    SharedOwner = 1,
+    Owner = 1 << 2,
+    Pointer = 1 << 3,
+    Indirection = 1 << 4,
+    Aggregate = 1 << 5,
+    Value = 1 << 6,
+  };
+
+  auto localStyle = [&]() {
+    if (isPointerType(t))
+      return TypeCategory::Pointer;
+    if (isOwnerType(t))
+      return TypeCategory::Owner;
+    if (isAggregateType(this, t))
+      return TypeCategory::Aggregate;
+    return TypeCategory::Value;
+  }();
+
+  switch (localStyle) {
+  case TypeCategory::Pointer:
+    // 2.4.2 - When a non-parameter non-member Pointer p is declared, add
+    // (p, {invalid}) to pmap.
+    ptrs.insert(addr);
+    markPsetInvalid(addr, InvalidStyle::NotInitialized, loc);
+    break;
+  case TypeCategory::Owner:
+    // 2.4.2 - When a local Owner x is declared, add (x, {x__1'}) to pmap.
+    addOwner(addr);
+    getPmap()[addr].insert(State::getOwnedBy(addr));
+    currScope->localValues.insert(addr);
+    break;
+  case TypeCategory::Aggregate: {
+    // 2.1 - Aggregates are types we will “explode” (consider memberwise) at
+    // local scopes, because the function can operate on the members directly.
+
+    // TODO: only track first level of aggregates subobjects for now, get some
+    // data before we increase this.
+    if (nestLevel > 1)
+      break;
+
+    // Map values for members to it's index in the aggregate.
+    auto members = mlir::cast<mlir::cir::StructType>(t).getMembers();
+    SmallVector<mlir::Value, 4> fieldVals;
+    fieldVals.assign(members.size(), {});
+
+    // Go through uses of the alloca via `cir.struct_element_addr`, and
+    // track only the fields that are actually used.
+    std::for_each(addr.use_begin(), addr.use_end(), [&](mlir::OpOperand &use) {
+      auto op = dyn_cast<mlir::cir::GetMemberOp>(use.getOwner());
+      if (!op)
+        return;
+
+      auto eltAddr = op.getResult();
+      // If nothing is using this GetMemberOp, don't bother since
+      // it could lead to even more noisy outcomes.
+      if (eltAddr.use_empty())
+        return;
+
+      auto eltTy =
+          mlir::cast<mlir::cir::PointerType>(eltAddr.getType()).getPointee();
+
+      // Classify exploded types. Keep alloca original location.
+      classifyAndInitTypeCategories(eltAddr, eltTy, loc, ++nestLevel);
+      fieldVals[op.getIndex()] = eltAddr;
+    });
+
+    // In case this aggregate gets initialized at once, the fields need
+    // to be mapped to the elements values.
+    addAggregate(addr, fieldVals);
+
+    // There might be pointers to this aggregate, so also make a value
+    // for it.
+    LLVM_FALLTHROUGH;
+  }
+  case TypeCategory::Value: {
+    // 2.4.2 - When a local Value x is declared, add (x, {x}) to pmap.
+    getPmap()[addr].insert(State::getLocalValue(addr));
+    currScope->localValues.insert(addr);
+    break;
+  }
+  default:
+    llvm_unreachable("NYI");
+  }
+}
+
+void LifetimeCheckPass::checkAlloca(AllocaOp allocaOp) {
+  classifyAndInitTypeCategories(allocaOp.getAddr(), allocaOp.getAllocaType(),
+                                allocaOp.getLoc(), /*nestLevel=*/0);
+}
+
+void LifetimeCheckPass::checkCoroTaskStore(StoreOp storeOp) {
+  // Given:
+  //  auto task = [init task];
+  // Extend pset(task) such that:
+  //  pset(task) = pset(task) U {any local values used to init task}
+  auto taskTmp = storeOp.getValue();
+  // FIXME: check it's initialization 'init' attr.
+  auto taskAddr = storeOp.getAddr();
+
+  // Take the following coroutine creation pattern:
+  //
+  //   %task = cir.alloca ...
+  //   cir.scope {
+  //     %arg0 = cir.alloca ...
+  //     ...
+  //     %tmp_task = cir.call @corotine_call(%arg0, %arg1, ...)
+  //     cir.store %tmp_task, %task
+  //     ...
+  //   }
+  //
+  // Bind values that are coming from alloca's (like %arg0 above) to the
+  // pset of %task - this effectively leads to some invalidation of %task
+  // when %arg0 finishes its lifetime at the end of the enclosing cir.scope.
+  if (auto call = dyn_cast<mlir::cir::CallOp>(taskTmp.getDefiningOp())) {
+    bool potentialTaintedTask = false;
+    for (auto arg : call.getArgOperands()) {
+      auto alloca = dyn_cast<mlir::cir::AllocaOp>(arg.getDefiningOp());
+      if (alloca && currScope->localValues.count(alloca)) {
+        getPmap()[taskAddr].insert(State::getLocalValue(alloca));
+        potentialTaintedTask = true;
+      }
+    }
+
+    // Task are only interesting when there are local addresses leaking
+    // via the coroutine creation, only track those.
+    if (potentialTaintedTask)
+      tasks.insert(taskAddr);
+    return;
+  }
+  llvm_unreachable("expecting cir.call defining op");
+}
+
+mlir::Value LifetimeCheckPass::getLambdaFromMemberAccess(mlir::Value addr) {
+  auto op = addr.getDefiningOp();
+  // FIXME: we likely want to consider more indirections here...
+  if (!isa<mlir::cir::GetMemberOp>(op))
+    return nullptr;
+  auto allocaOp =
+      dyn_cast<mlir::cir::AllocaOp>(op->getOperand(0).getDefiningOp());
+  if (!allocaOp || !isLambdaType(allocaOp.getAllocaType()))
+    return nullptr;
+  return allocaOp;
+}
+
+void LifetimeCheckPass::checkLambdaCaptureStore(StoreOp storeOp) {
+  auto localByRefAddr = storeOp.getValue();
+  auto lambdaCaptureAddr = storeOp.getAddr();
+
+  if (!isa_and_nonnull<mlir::cir::AllocaOp>(localByRefAddr.getDefiningOp()))
+    return;
+  auto lambdaAddr = getLambdaFromMemberAccess(lambdaCaptureAddr);
+  if (!lambdaAddr)
+    return;
+
+  if (currScope->localValues.count(localByRefAddr))
+    getPmap()[lambdaAddr].insert(State::getLocalValue(localByRefAddr));
+}
+
+void LifetimeCheckPass::updatePointsToForConstStruct(
+    mlir::Value addr, mlir::cir::ConstStructAttr value, mlir::Location loc) {
+  assert(aggregates.count(addr) && "expected association with aggregate");
+  int memberIdx = 0;
+  for (auto &attr : value.getMembers()) {
+    auto ta = mlir::dyn_cast<mlir::TypedAttr>(attr);
+    assert(ta && "expected typed attribute");
+    auto fieldAddr = aggregates[addr][memberIdx];
+    // Unseen fields are not tracked.
+    if (fieldAddr && mlir::isa<mlir::cir::PointerType>(ta.getType())) {
+      assert(mlir::isa<mlir::cir::ConstPtrAttr>(ta) &&
+             "other than null not implemented");
+      markPsetNull(fieldAddr, loc);
+    }
+    memberIdx++;
+  }
+}
+
+void LifetimeCheckPass::updatePointsToForZeroStruct(mlir::Value addr,
+                                                    StructType sTy,
+                                                    mlir::Location loc) {
+  assert(aggregates.count(addr) && "expected association with aggregate");
+  int memberIdx = 0;
+  for (auto &t : sTy.getMembers()) {
+    auto fieldAddr = aggregates[addr][memberIdx];
+    // Unseen fields are not tracked.
+    if (fieldAddr && mlir::isa<mlir::cir::PointerType>(t)) {
+      markPsetNull(fieldAddr, loc);
+    }
+    memberIdx++;
+  }
+}
+
+static mlir::Operation *ignoreBitcasts(mlir::Operation *op) {
+  while (auto bitcast = dyn_cast<CastOp>(op)) {
+    if (bitcast.getKind() != CastKind::bitcast)
+      return op;
+    auto b = bitcast.getSrc().getDefiningOp();
+    // Do not handle block arguments just yet.
+    if (!b)
+      return op;
+    op = b;
+  }
+  return op;
+}
+
+void LifetimeCheckPass::updatePointsTo(mlir::Value addr, mlir::Value data,
+                                       mlir::Location loc) {
+
+  auto getArrayFromSubscript = [&](PtrStrideOp strideOp) -> mlir::Value {
+    auto castOp = dyn_cast<CastOp>(strideOp.getBase().getDefiningOp());
+    if (!castOp)
+      return {};
+    if (castOp.getKind() != cir::CastKind::array_to_ptrdecay)
+      return {};
+    return castOp.getSrc();
+  };
+
+  auto dataSrcOp = data.getDefiningOp();
+
+  // Handle function arguments but not all block arguments just yet.
+  if (!dataSrcOp) {
+    auto blockArg = cast<BlockArgument>(data);
+    if (!blockArg.getOwner()->isEntryBlock())
+      return;
+    getPmap()[addr].clear();
+    getPmap()[addr].insert(State::getLocalValue(data));
+    return;
+  }
+
+  // Ignore chains of bitcasts and update data source. Note that when
+  // dataSrcOp gets updated, `data` might not be the most updated resource
+  // to use, so avoid using it directly, and instead get things from newer
+  // dataSrcOp.
+  dataSrcOp = ignoreBitcasts(dataSrcOp);
+
+  // 2.4.2 - If the declaration includes an initialization, the
+  // initialization is treated as a separate operation
+  if (auto cstOp = dyn_cast<ConstantOp>(dataSrcOp)) {
+    // Aggregates can be bulk materialized in CIR, handle proper update of
+    // individual exploded fields.
+    if (aggregates.count(addr)) {
+      if (auto constStruct =
+              mlir::dyn_cast<mlir::cir::ConstStructAttr>(cstOp.getValue())) {
+        updatePointsToForConstStruct(addr, constStruct, loc);
+        return;
+      }
+
+      if (auto zero = mlir::dyn_cast<mlir::cir::ZeroAttr>(cstOp.getValue())) {
+        if (auto zeroStructTy = dyn_cast<StructType>(zero.getType())) {
+          updatePointsToForZeroStruct(addr, zeroStructTy, loc);
+          return;
+        }
+      }
+      return;
+    }
+
+    assert(cstOp.isNullPtr() && "other than null not implemented");
+    assert(getPmap().count(addr) && "address should always be valid");
+    // 2.4.2 - If the initialization is default initialization or zero
+    // initialization, set pset(p) = {null}; for example:
+    //
+    //  int* p; => pset(p) == {invalid}
+    //  int* p{}; or string_view p; => pset(p) == {null}.
+    //  int *p = nullptr; => pset(p) == {nullptr} => pset(p) == {null}
+    markPsetNull(addr, loc);
+    return;
+  }
+
+  if (auto allocaOp = dyn_cast<AllocaOp>(dataSrcOp)) {
+    // p = &x;
+    getPmap()[addr].clear();
+    getPmap()[addr].insert(State::getLocalValue(allocaOp.getAddr()));
+    return;
+  }
+
+  if (auto ptrStrideOp = dyn_cast<PtrStrideOp>(dataSrcOp)) {
+    // p = &a[0];
+    auto array = getArrayFromSubscript(ptrStrideOp);
+    if (array) {
+      getPmap()[addr].clear();
+      getPmap()[addr].insert(State::getLocalValue(array));
+    }
+    return;
+  }
+
+  // Initializes ptr types out of known lib calls marked with pointer
+  // attributes. TODO: find a better way to tag this.
+  if (auto callOp = dyn_cast<CallOp>(dataSrcOp)) {
+    // iter = vector<T>::begin()
+    getPmap()[addr].clear();
+    getPmap()[addr].insert(State::getLocalValue(callOp.getResult()));
+  }
+
+  if (auto loadOp = dyn_cast<LoadOp>(dataSrcOp)) {
+    // handle indirections through a load, a common example are temporaries
+    // copying the 'this' param to a subsequent call.
+    updatePointsTo(addr, loadOp.getAddr(), loc);
+    return;
+  }
+
+  // What should we add next?
+}
+
+void LifetimeCheckPass::checkStore(StoreOp storeOp) {
+  auto addr = storeOp.getAddr();
+
+  // Decompose store's to aggregates into multiple updates to individual fields.
+  if (aggregates.count(addr)) {
+    auto data = storeOp.getValue();
+    auto dataSrcOp = data.getDefiningOp();
+    // Only interested in updating and tracking fields, anything besides
+    // constants isn't really relevant.
+    if (dataSrcOp && isa<ConstantOp>(dataSrcOp))
+      updatePointsTo(addr, data, data.getLoc());
+    return;
+  }
+
+  // The bulk of the check is done on top of store to pointer categories,
+  // which usually represent the most common case.
+  //
+  // We handle some special local values, like coroutine tasks and lambdas,
+  // which could be holding references to things with dangling lifetime.
+  if (!ptrs.count(addr)) {
+    if (currScope->localTempTasks.count(storeOp.getValue()))
+      checkCoroTaskStore(storeOp);
+    else
+      checkLambdaCaptureStore(storeOp);
+    return;
+  }
+
+  // Only handle ptrs from here on.
+  updatePointsTo(addr, storeOp.getValue(), storeOp.getValue().getLoc());
+}
+
+void LifetimeCheckPass::checkLoad(LoadOp loadOp) {
+  auto addr = loadOp.getAddr();
+  // Only interested in checking deference on top of pointer types.
+  // Note that usually the use of the invalid address happens at the
+  // load or store using the result of this loadOp.
+  if (!getPmap().count(addr) || !ptrs.count(addr))
+    return;
+
+  if (!loadOp.getIsDeref())
+    return;
+
+  checkPointerDeref(addr, loadOp.getLoc());
+}
+
+void LifetimeCheckPass::emitInvalidHistory(mlir::InFlightDiagnostic &D,
+                                           mlir::Value histKey,
+                                           mlir::Location warningLoc,
+                                           DerefStyle derefStyle) {
+  assert(invalidHist.count(histKey) && "expected invalid hist");
+  auto &hist = invalidHist[histKey];
+  unsigned limit = opts.histLimit;
+
+  for (int lastIdx = hist.entries.size() - 1; limit > 0 && lastIdx >= 0;
+       lastIdx--, limit--) {
+    auto &info = hist.entries[lastIdx];
+
+    switch (info.style) {
+    case InvalidStyle::NotInitialized: {
+      D.attachNote(info.loc) << "uninitialized here";
+      break;
+    }
+    case InvalidStyle::EndOfScope: {
+      if (tasks.count(histKey)) {
+        StringRef resource = "resource";
+        if (auto allocaOp = dyn_cast<AllocaOp>(info.val->getDefiningOp())) {
+          if (isLambdaType(allocaOp.getAllocaType()))
+            resource = "lambda";
+        }
+        D.attachNote((*info.val).getLoc())
+            << "coroutine bound to " << resource << " with expired lifetime";
+        D.attachNote(info.loc) << "at the end of scope or full-expression";
+      } else if (derefStyle == DerefStyle::RetLambda) {
+        assert(currFunc && "expected function");
+        StringRef parent = currFunc->getLambda() ? "lambda" : "function";
+        D.attachNote(info.val->getLoc())
+            << "declared here but invalid after enclosing " << parent
+            << " ends";
+      } else {
+        auto outOfScopeVarName = getVarNameFromValue(*info.val);
+        D.attachNote(info.loc) << "pointee '" << outOfScopeVarName
+                               << "' invalidated at end of scope";
+      }
+      break;
+    }
+    case InvalidStyle::NonConstUseOfOwner: {
+      D.attachNote(info.loc) << "invalidated by non-const use of owner type";
+      break;
+    }
+    default:
+      llvm_unreachable("unknown history style");
+    }
+  }
+}
+
+void LifetimeCheckPass::checkPointerDeref(mlir::Value addr, mlir::Location loc,
+                                          DerefStyle derefStyle) {
+  bool hasInvalid = getPmap()[addr].count(State::getInvalid());
+  bool hasNullptr = getPmap()[addr].count(State::getNullPtr());
+
+  auto emitPsetRemark = [&] {
+    llvm::SmallString<128> psetStr;
+    llvm::raw_svector_ostream Out(psetStr);
+    printPset(getPmap()[addr], Out);
+    emitRemark(loc) << "pset => " << Out.str();
+  };
+
+  // Do not emit the same warning twice or more.
+  if (emittedDiagnostics.count(loc))
+    return;
+
+  bool psetRemarkEmitted = false;
+  if (opts.emitRemarkPsetAlways()) {
+    emitPsetRemark();
+    psetRemarkEmitted = true;
+  }
+
+  // 2.4.2 - On every dereference of a Pointer p, enforce that p is valid.
+  if (!hasInvalid && !hasNullptr)
+    return;
+
+  // TODO: create verbosity/accuracy levels, for now use deref styles directly
+  // to decide when not to emit a warning.
+
+  // For indirect calls, do not relly on blunt nullptr passing, require some
+  // invalidation to have happened in a path.
+  if (derefStyle == DerefStyle::IndirectCallParam && !hasInvalid)
+    return;
+
+  // Ok, filtered out questionable warnings, take the bad path leading to this
+  // deference point and diagnose it.
+  auto varName = getVarNameFromValue(addr);
+  auto D = emitWarning(loc);
+  emittedDiagnostics.insert(loc);
+
+  if (tasks.count(addr))
+    D << "use of coroutine '" << varName << "' with dangling reference";
+  else if (derefStyle == DerefStyle::RetLambda)
+    D << "returned lambda captures local variable";
+  else if (derefStyle == DerefStyle::CallParam ||
+           derefStyle == DerefStyle::IndirectCallParam) {
+    bool isAgg = isa_and_nonnull<GetMemberOp>(addr.getDefiningOp());
+    D << "passing ";
+    if (!isAgg)
+      D << "invalid pointer";
+    else
+      D << "aggregate containing invalid pointer member";
+    D << " '" << varName << "'";
+  } else
+    D << "use of invalid pointer '" << varName << "'";
+
+  // TODO: add accuracy levels, different combinations of invalid and null
+  // could have different ratios of false positives.
+  if (hasInvalid && opts.emitHistoryInvalid())
+    emitInvalidHistory(D, addr, loc, derefStyle);
+
+  if (hasNullptr && opts.emitHistoryNull()) {
+    assert(pmapNullHist.count(addr) && "expected nullptr hist");
+    auto &note = pmapNullHist[addr];
+    D.attachNote(*note) << "'nullptr' invalidated here";
+  }
+
+  if (!psetRemarkEmitted && opts.emitRemarkPsetInvalid())
+    emitPsetRemark();
+}
+
+static FuncOp getCalleeFromSymbol(ModuleOp mod, StringRef name) {
+  auto global = mlir::SymbolTable::lookupSymbolIn(mod, name);
+  assert(global && "expected to find symbol for function");
+  return dyn_cast<FuncOp>(global);
+}
+
+static const ASTCXXMethodDeclInterface getMethod(ModuleOp mod, CallOp callOp) {
+  if (!callOp.getCallee())
+    return nullptr;
+  StringRef name = *callOp.getCallee();
+  auto method = getCalleeFromSymbol(mod, name);
+  if (!method || method.getBuiltin())
+    return nullptr;
+  return dyn_cast<ASTCXXMethodDeclInterface>(method.getAstAttr());
+}
+
+mlir::Value LifetimeCheckPass::getThisParamPointerCategory(CallOp callOp) {
+  auto thisptr = callOp.getArgOperand(0);
+  if (ptrs.count(thisptr))
+    return thisptr;
+  if (auto loadOp = dyn_cast_or_null<LoadOp>(thisptr.getDefiningOp())) {
+    if (ptrs.count(loadOp.getAddr()))
+      return loadOp.getAddr();
+  }
+  // TODO: add a remark to spot 'this' indirections we currently not track.
+  return {};
+}
+
+mlir::Value LifetimeCheckPass::getThisParamOwnerCategory(CallOp callOp) {
+  auto thisptr = callOp.getArgOperand(0);
+  if (owners.count(thisptr))
+    return thisptr;
+  if (auto loadOp = dyn_cast_or_null<LoadOp>(thisptr.getDefiningOp())) {
+    if (owners.count(loadOp.getAddr()))
+      return loadOp.getAddr();
+  }
+  // TODO: add a remark to spot 'this' indirections we currently not track.
+  return {};
+}
+
+void LifetimeCheckPass::checkMoveAssignment(CallOp callOp,
+                                            ASTCXXMethodDeclInterface m) {
+  // MyPointer::operator=(MyPointer&&)(%dst, %src)
+  // or
+  // MyOwner::operator=(MyOwner&&)(%dst, %src)
+  auto dst = getThisParamPointerCategory(callOp);
+  auto src = callOp.getArgOperand(1);
+
+  // Move assignments between pointer categories.
+  if (dst && ptrs.count(src)) {
+    // Note that the current pattern here usually comes from a xvalue in src
+    // where all the initialization is done, and this move assignment is
+    // where we finally materialize it back to the original pointer category.
+    getPmap()[dst] = getPmap()[src];
+
+    // 2.4.2 - It is an error to use a moved-from object.
+    // To that intent we mark src's pset with invalid.
+    markPsetInvalid(src, InvalidStyle::MovedFrom, callOp.getLoc());
+    return;
+  }
+
+  // Copy assignments between owner categories.
+  dst = getThisParamOwnerCategory(callOp);
+  if (dst && owners.count(src)) {
+    // Handle as a non const use of owner, invalidating pointers.
+    checkNonConstUseOfOwner(dst, callOp.getLoc());
+
+    // 2.4.2 - It is an error to use a moved-from object.
+    // To that intent we mark src's pset with invalid.
+    markPsetInvalid(src, InvalidStyle::MovedFrom, callOp.getLoc());
+  }
+}
+
+void LifetimeCheckPass::checkCopyAssignment(CallOp callOp,
+                                            ASTCXXMethodDeclInterface m) {
+  // MyIntOwner::operator=(MyIntOwner&)(%dst, %src)
+  auto dst = getThisParamOwnerCategory(callOp);
+  auto src = callOp.getArgOperand(1);
+
+  // Copy assignment between owner categories.
+  if (dst && owners.count(src))
+    return checkNonConstUseOfOwner(dst, callOp.getLoc());
+
+  // Copy assignment between pointer categories.
+  dst = getThisParamPointerCategory(callOp);
+  if (dst && ptrs.count(src)) {
+    getPmap()[dst] = getPmap()[src];
+    return;
+  }
+}
+
+// User defined ctors that initialize from owner types is one
+// way of tracking owned pointers.
+//
+// Example:
+//  MyIntPointer::MyIntPointer(MyIntOwner const&)(%5, %4)
+//
+bool LifetimeCheckPass::isCtorInitPointerFromOwner(CallOp callOp) {
+  if (callOp.getNumArgOperands() < 2)
+    return false;
+
+  // FIXME: should we scan all arguments past first to look for an owner?
+  auto ptr = getThisParamPointerCategory(callOp);
+  auto owner = callOp.getArgOperand(1);
+
+  if (ptr && owners.count(owner))
+    return true;
+
+  return false;
+}
+
+void LifetimeCheckPass::checkCtor(CallOp callOp,
+                                  ASTCXXConstructorDeclInterface ctor) {
+  // TODO: zero init
+  // 2.4.2 if the initialization is default initialization or zero
+  // initialization, example:
+  //
+  //    int* p{};
+  //    string_view p;
+  //
+  // both results in pset(p) == {null}
+  if (ctor.isDefaultConstructor()) {
+    // First argument passed is always the alloca for the 'this' ptr.
+
+    // Currently two possible actions:
+    // 1. Skip Owner category initialization.
+    // 2. Initialize Pointer categories.
+    auto addr = getThisParamOwnerCategory(callOp);
+    if (addr)
+      return;
+
+    addr = getThisParamPointerCategory(callOp);
+    if (!addr)
+      return;
+
+    // Not interested in block/function arguments or any indirect
+    // provided alloca address.
+    if (!dyn_cast_or_null<AllocaOp>(addr.getDefiningOp()))
+      return;
+
+    markPsetNull(addr, callOp.getLoc());
+    return;
+  }
+
+  // User defined copy ctor calls ...
+  if (ctor.isCopyConstructor()) {
+    llvm_unreachable("NYI");
+  }
+
+  if (isCtorInitPointerFromOwner(callOp)) {
+    auto addr = getThisParamPointerCategory(callOp);
+    assert(addr && "expected pointer category");
+    auto owner = callOp.getArgOperand(1);
+    getPmap()[addr].clear();
+    getPmap()[addr].insert(State::getOwnedBy(owner));
+    return;
+  }
+}
+
+void LifetimeCheckPass::checkOperators(CallOp callOp,
+                                       ASTCXXMethodDeclInterface m) {
+  auto addr = getThisParamOwnerCategory(callOp);
+  if (addr) {
+    // const access to the owner is fine.
+    if (m.isConst())
+      return;
+    // TODO: this is a place where we can hook in some idiom recocgnition
+    // so we don't need to use actual source code annotation to make assumptions
+    // on methods we understand and know to behave nicely.
+    //
+    // In P1179, section 2.5.7.12, the use of [[gsl::lifetime_const]] is
+    // suggested, but it's not part of clang (will it ever?)
+    return checkNonConstUseOfOwner(addr, callOp.getLoc());
+  }
+
+  addr = getThisParamPointerCategory(callOp);
+  if (addr) {
+    // The assumption is that method calls on pointer types should trigger
+    // deref checking.
+    checkPointerDeref(addr, callOp.getLoc());
+    return;
+  }
+
+  // FIXME: we also need to look at operators from non owner or pointer
+  // types that could be using Owner/Pointer types as parameters.
+}
+
+mlir::Value
+LifetimeCheckPass::getNonConstUseOfOwner(CallOp callOp,
+                                         ASTCXXMethodDeclInterface m) {
+  if (m.isConst())
+    return {};
+  return getThisParamOwnerCategory(callOp);
+}
+
+void LifetimeCheckPass::checkNonConstUseOfOwner(mlir::Value ownerAddr,
+                                                mlir::Location loc) {
+  // 2.4.2 - On every non-const use of a local Owner o:
+  //
+  // - For each entry e in pset(s): Remove e from pset(s), and if no other
+  // Owner’s pset contains only e, then KILL(e).
+  kill(State::getOwnedBy(ownerAddr), InvalidStyle::NonConstUseOfOwner, loc);
+
+  // - Set pset(o) = {o__N'}, where N is one higher than the highest
+  // previously used suffix. For example, initially pset(o) is {o__1'}, on
+  // o’s first non-const use pset(o) becomes {o__2'}, on o’s second non-const
+  // use pset(o) becomes {o__3'}, and so on.
+  incOwner(ownerAddr);
+  return;
+}
+
+void LifetimeCheckPass::checkForOwnerAndPointerArguments(CallOp callOp,
+                                                         unsigned firstArgIdx) {
+  auto numOperands = callOp.getNumArgOperands();
+  if (firstArgIdx >= numOperands)
+    return;
+
+  llvm::SmallSetVector<mlir::Value, 8> ownersToInvalidate, ptrsToDeref;
+  for (unsigned i = firstArgIdx, e = numOperands; i != e; ++i) {
+    auto arg = callOp.getArgOperand(i);
+    // FIXME: apply p1179 rules as described in 2.5. Very conservative for now:
+    //
+    // - Owners: always invalidate.
+    // - Pointers: always check for deref.
+    // - Coroutine tasks: check the task for deref when calling methods of
+    //   the task, but also when the passing the task around to other functions.
+    // - Aggregates: check ptr subelements for deref.
+    //
+    // FIXME: even before 2.5 we should only invalidate non-const param types.
+    if (owners.count(arg))
+      ownersToInvalidate.insert(arg);
+    if (ptrs.count(arg))
+      ptrsToDeref.insert(arg);
+    if (tasks.count(arg))
+      ptrsToDeref.insert(arg);
+    if (aggregates.count(arg)) {
+      int memberIdx = 0;
+      auto sTy =
+          dyn_cast<StructType>(cast<PointerType>(arg.getType()).getPointee());
+      assert(sTy && "expected struct type");
+      for (auto m : sTy.getMembers()) {
+        auto ptrMemberAddr = aggregates[arg][memberIdx];
+        if (isa<PointerType>(m) && ptrMemberAddr) {
+          ptrsToDeref.insert(ptrMemberAddr);
+        }
+        memberIdx++;
+      }
+    }
+  }
+
+  // FIXME: CIR should track source info on the passed args, so we can get
+  // accurate location for why the invalidation happens.
+  for (auto o : ownersToInvalidate)
+    checkNonConstUseOfOwner(o, callOp.getLoc());
+  for (auto p : ptrsToDeref)
+    checkPointerDeref(p, callOp.getLoc(),
+                      callOp.getCallee() ? DerefStyle::CallParam
+                                         : DerefStyle::IndirectCallParam);
+}
+
+void LifetimeCheckPass::checkOtherMethodsAndFunctions(
+    CallOp callOp, ASTCXXMethodDeclInterface m) {
+  unsigned firstArgIdx = 0;
+
+  // Looks at a method 'this' pointer:
+  // - If a method call to a class we consider interesting, like a method
+  //   call on a coroutine task (promise_type).
+  // - Skip the 'this' for any other method.
+  if (m && !tasks.count(callOp.getArgOperand(firstArgIdx)))
+    firstArgIdx++;
+  checkForOwnerAndPointerArguments(callOp, firstArgIdx);
+}
+
+bool LifetimeCheckPass::isOwnerOrPointerClassMethod(
+    CallOp callOp, ASTCXXMethodDeclInterface m) {
+  // For the sake of analysis, these behave like regular functions
+  if (!m || m.isStatic())
+    return false;
+  // Check the object for owner/pointer by looking at the 'this' pointer.
+  return getThisParamPointerCategory(callOp) ||
+         getThisParamOwnerCategory(callOp);
+}
+
+bool LifetimeCheckPass::isLambdaType(mlir::Type ty) {
+  if (IsLambdaTyCache.count(ty))
+    return IsLambdaTyCache[ty];
+
+  IsLambdaTyCache[ty] = false;
+  auto taskTy = mlir::dyn_cast<mlir::cir::StructType>(ty);
+  if (!taskTy)
+    return false;
+  if (taskTy.getAst().isLambda())
+    IsLambdaTyCache[ty] = true;
+
+  return IsLambdaTyCache[ty];
+}
+
+bool LifetimeCheckPass::isTaskType(mlir::Value taskVal) {
+  auto ty = taskVal.getType();
+  if (IsTaskTyCache.count(ty))
+    return IsTaskTyCache[ty];
+
+  bool result = [&] {
+    auto taskTy = mlir::dyn_cast<mlir::cir::StructType>(taskVal.getType());
+    if (!taskTy)
+      return false;
+    return taskTy.getAst().hasPromiseType();
+  }();
+
+  IsTaskTyCache[ty] = result;
+  return result;
+}
+
+void LifetimeCheckPass::trackCallToCoroutine(CallOp callOp) {
+  if (auto fnName = callOp.getCallee()) {
+    auto calleeFuncOp = getCalleeFromSymbol(theModule, *fnName);
+    if (calleeFuncOp &&
+        (calleeFuncOp.getCoroutine() ||
+         (calleeFuncOp.isDeclaration() && callOp->getNumResults() > 0 &&
+          isTaskType(callOp->getResult(0))))) {
+      currScope->localTempTasks.insert(callOp->getResult(0));
+    }
+    return;
+  }
+  // Handle indirect calls to coroutines, for instance when
+  // lambda coroutines are involved with invokers.
+  if (callOp->getNumResults() > 0 && isTaskType(callOp->getResult(0))) {
+    // FIXME: get more guarantees to prevent false positives (perhaps
+    // apply some tracking analysis before this pass and check for lambda
+    // idioms).
+    currScope->localTempTasks.insert(callOp->getResult(0));
+  }
+}
+
+void LifetimeCheckPass::checkCall(CallOp callOp) {
+  if (callOp.getNumArgOperands() == 0)
+    return;
+
+  // Identify calls to coroutines and track returning temporary task types.
+  //
+  // Note that we can't reliably know if a function is a coroutine only as
+  // part of declaration
+  trackCallToCoroutine(callOp);
+
+  auto methodDecl = getMethod(theModule, callOp);
+  if (!isOwnerOrPointerClassMethod(callOp, methodDecl))
+    return checkOtherMethodsAndFunctions(callOp, methodDecl);
+
+  // From this point on only owner and pointer class methods handling,
+  // starting from special methods.
+  if (auto ctor = dyn_cast<ASTCXXConstructorDeclInterface>(methodDecl))
+    return checkCtor(callOp, ctor);
+  if (methodDecl.isMoveAssignmentOperator())
+    return checkMoveAssignment(callOp, methodDecl);
+  if (methodDecl.isCopyAssignmentOperator())
+    return checkCopyAssignment(callOp, methodDecl);
+  if (methodDecl.isOverloadedOperator())
+    return checkOperators(callOp, methodDecl);
+
+  // For any other methods...
+
+  // Non-const member call to a Owner invalidates any of its users.
+  if (auto owner = getNonConstUseOfOwner(callOp, methodDecl)) {
+    return checkNonConstUseOfOwner(owner, callOp.getLoc());
+  }
+
+  // Take a pset(Ptr) = { Ownr' } where Own got invalidated, this will become
+  // invalid access to Ptr if any of its methods are used.
+  auto addr = getThisParamPointerCategory(callOp);
+  if (addr)
+    return checkPointerDeref(addr, callOp.getLoc());
+}
+
+void LifetimeCheckPass::checkOperation(Operation *op) {
+  if (isa<::mlir::ModuleOp>(op)) {
+    theModule = cast<::mlir::ModuleOp>(op);
+    for (Region &region : op->getRegions())
+      checkRegion(region);
+    return;
+  }
+
+  if (isa<ScopeOp>(op)) {
+    // Add a new scope. Note that as part of the scope cleanup process
+    // we apply section 2.3 KILL(x) functionality, turning relevant
+    // references invalid.
+    //
+    // No need to create a new pmap when entering a new scope since it
+    // doesn't cause control flow to diverge (as it does in presence
+    // of cir::IfOp or cir::SwitchOp).
+    //
+    // Also note that for dangling pointers coming from if init stmts
+    // should be caught just fine, given that a ScopeOp embraces a IfOp.
+    LexicalScopeContext lexScope{op};
+    LexicalScopeGuard scopeGuard{*this, &lexScope};
+    for (Region &region : op->getRegions())
+      checkRegion(region);
+    return;
+  }
+
+  // FIXME: we can do better than sequence of dyn_casts.
+  if (auto fnOp = dyn_cast<cir::FuncOp>(op))
+    return checkFunc(fnOp);
+  if (auto ifOp = dyn_cast<IfOp>(op))
+    return checkIf(ifOp);
+  if (auto switchOp = dyn_cast<SwitchOp>(op))
+    return checkSwitch(switchOp);
+  if (auto loopOp = dyn_cast<LoopOpInterface>(op))
+    return checkLoop(loopOp);
+  if (auto allocaOp = dyn_cast<AllocaOp>(op))
+    return checkAlloca(allocaOp);
+  if (auto storeOp = dyn_cast<StoreOp>(op))
+    return checkStore(storeOp);
+  if (auto loadOp = dyn_cast<LoadOp>(op))
+    return checkLoad(loadOp);
+  if (auto callOp = dyn_cast<CallOp>(op))
+    return checkCall(callOp);
+  if (auto awaitOp = dyn_cast<AwaitOp>(op))
+    return checkAwait(awaitOp);
+  if (auto returnOp = dyn_cast<ReturnOp>(op))
+    return checkReturn(returnOp);
+}
+
+void LifetimeCheckPass::runOnOperation() {
+  assert(astCtx && "Missing ASTContext, please construct with the right ctor");
+  opts.parseOptions(*this);
+  Operation *op = getOperation();
+  checkOperation(op);
+}
+
+std::unique_ptr<Pass> mlir::createLifetimeCheckPass() {
+  return std::make_unique<LifetimeCheckPass>();
+}
+
+std::unique_ptr<Pass> mlir::createLifetimeCheckPass(clang::ASTContext *astCtx) {
+  auto lifetime = std::make_unique<LifetimeCheckPass>();
+  lifetime->setASTContext(astCtx);
+  return std::move(lifetime);
+}
+
+std::unique_ptr<Pass> mlir::createLifetimeCheckPass(ArrayRef<StringRef> remark,
+                                                    ArrayRef<StringRef> hist,
+                                                    unsigned hist_limit,
+                                                    clang::ASTContext *astCtx) {
+  auto lifetime = std::make_unique<LifetimeCheckPass>();
+  lifetime->setASTContext(astCtx);
+  lifetime->opts.parseOptions(remark, hist, hist_limit);
+  return std::move(lifetime);
+}
+
+//===----------------------------------------------------------------------===//
+// Dump & print helpers
+//===----------------------------------------------------------------------===//
+
+void LifetimeCheckPass::LexicalScopeContext::dumpLocalValues() {
+  llvm::errs() << "Local values: { ";
+  for (auto value : localValues) {
+    llvm::errs() << getVarNameFromValue(value);
+    llvm::errs() << ", ";
+  }
+  llvm::errs() << "}\n";
+}
+
+void LifetimeCheckPass::State::dump(llvm::raw_ostream &OS, int ownedGen) {
+  switch (val.getInt()) {
+  case Invalid:
+    OS << "invalid";
+    break;
+  case NullPtr:
+    OS << "nullptr";
+    break;
+  case Global:
+    OS << "global";
+    break;
+  case LocalValue:
+    OS << getVarNameFromValue(val.getPointer());
+    break;
+  case OwnedBy:
+    ownedGen++; // Start from 1.
+    OS << getVarNameFromValue(val.getPointer()) << "__" << ownedGen << "'";
+    break;
+  default:
+    llvm_unreachable("Not handled");
+  }
+}
+
+void LifetimeCheckPass::printPset(PSetType &pset, llvm::raw_ostream &OS) {
+  OS << "{ ";
+  auto size = pset.size();
+  for (auto s : pset) {
+    int ownerGen = 0;
+    if (s.isOwnedBy())
+      ownerGen = owners[s.getData()];
+    s.dump(OS, ownerGen);
+    size--;
+    if (size > 0)
+      OS << ", ";
+  }
+  OS << " }";
+}
+
+void LifetimeCheckPass::dumpCurrentPmap() { dumpPmap(*currPmap); }
+
+void LifetimeCheckPass::dumpPmap(PMapType &pmap) {
+  llvm::errs() << "pmap {\n";
+  int entry = 0;
+  for (auto &mapEntry : pmap) {
+    llvm::errs() << "  " << entry << ": " << getVarNameFromValue(mapEntry.first)
+                 << "  "
+                 << "=> ";
+    printPset(mapEntry.second);
+    llvm::errs() << "\n";
+    entry++;
+  }
+  llvm::errs() << "}\n";
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
new file mode 100644
index 000000000000..ba19c6ec4069
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -0,0 +1,1169 @@
+//===- LoweringPrepare.cpp - pareparation work for LLVM lowering ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoweringPrepareCXXABI.h"
+#include "PassDetail.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Region.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/CharUnits.h"
+#include "clang/AST/Mangle.h"
+#include "clang/Basic/Module.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/Passes.h"
+#include "clang/CIR/Interfaces/ASTAttrInterfaces.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Path.h"
+
+#include <memory>
+
+using cir::CIRBaseBuilderTy;
+using namespace mlir;
+using namespace mlir::cir;
+
+static SmallString<128> getTransformedFileName(ModuleOp theModule) {
+  SmallString<128> FileName;
+
+  if (theModule.getSymName()) {
+    FileName = llvm::sys::path::filename(theModule.getSymName()->str());
+  }
+
+  if (FileName.empty())
+    FileName = "<null>";
+
+  for (size_t i = 0; i < FileName.size(); ++i) {
+    // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
+    // to be the set of C preprocessing numbers.
+    if (!clang::isPreprocessingNumberBody(FileName[i]))
+      FileName[i] = '_';
+  }
+
+  return FileName;
+}
+
+/// Return the FuncOp called by `callOp`.
+static FuncOp getCalledFunction(CallOp callOp) {
+  SymbolRefAttr sym =
+      llvm::dyn_cast_if_present<SymbolRefAttr>(callOp.getCallableForCallee());
+  if (!sym)
+    return nullptr;
+  return dyn_cast_or_null<FuncOp>(
+      SymbolTable::lookupNearestSymbolFrom(callOp, sym));
+}
+
+namespace {
+
+struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
+  LoweringPreparePass() = default;
+  void runOnOperation() override;
+
+  void runOnOp(Operation *op);
+  void lowerUnaryOp(UnaryOp op);
+  void lowerBinOp(BinOp op);
+  void lowerCastOp(CastOp op);
+  void lowerComplexBinOp(ComplexBinOp op);
+  void lowerThreeWayCmpOp(CmpThreeWayOp op);
+  void lowerVAArgOp(VAArgOp op);
+  void lowerGlobalOp(GlobalOp op);
+  void lowerDynamicCastOp(DynamicCastOp op);
+  void lowerStdFindOp(StdFindOp op);
+  void lowerIterBeginOp(IterBeginOp op);
+  void lowerIterEndOp(IterEndOp op);
+  void lowerArrayDtor(ArrayDtor op);
+  void lowerArrayCtor(ArrayCtor op);
+
+  /// Collect annotations of global values in the module
+  void addGlobalAnnotations(mlir::Operation *op, mlir::ArrayAttr annotations);
+
+  /// Build the function that initializes the specified global
+  FuncOp buildCXXGlobalVarDeclInitFunc(GlobalOp op);
+
+  /// Build a module init function that calls all the dynamic initializers.
+  void buildCXXGlobalInitFunc();
+
+  /// Materialize global ctor/dtor list
+  void buildGlobalCtorDtorList();
+
+  /// Build attribute of global annotation values
+  void buildGlobalAnnotationValues();
+
+  FuncOp
+  buildRuntimeFunction(mlir::OpBuilder &builder, llvm::StringRef name,
+                       mlir::Location loc, mlir::cir::FuncType type,
+                       mlir::cir::GlobalLinkageKind linkage =
+                           mlir::cir::GlobalLinkageKind::ExternalLinkage);
+
+  GlobalOp
+  buildRuntimeVariable(mlir::OpBuilder &Builder, llvm::StringRef Name,
+                       mlir::Location Loc, mlir::Type type,
+                       mlir::cir::GlobalLinkageKind Linkage =
+                           mlir::cir::GlobalLinkageKind::ExternalLinkage);
+
+  ///
+  /// AST related
+  /// -----------
+
+  clang::ASTContext *astCtx;
+  std::shared_ptr<::cir::LoweringPrepareCXXABI> cxxABI;
+
+  void setASTContext(clang::ASTContext *c) {
+    astCtx = c;
+    auto abiStr = c->getTargetInfo().getABI();
+    switch (c->getCXXABIKind()) {
+    case clang::TargetCXXABI::GenericItanium:
+      cxxABI.reset(::cir::LoweringPrepareCXXABI::createItaniumABI());
+      break;
+    case clang::TargetCXXABI::GenericAArch64:
+    case clang::TargetCXXABI::AppleARM64:
+      // TODO: This is temporary solution. ABIKind info should be
+      // propagated from the targetInfo managed by ABI lowering
+      // query system.
+      assert(abiStr == "aapcs" || abiStr == "darwinpcs" ||
+             abiStr == "aapcs-soft");
+      cxxABI.reset(::cir::LoweringPrepareCXXABI::createAArch64ABI(
+          abiStr == "aapcs"
+              ? ::cir::AArch64ABIKind::AAPCS
+              : (abiStr == "darwinpccs" ? ::cir::AArch64ABIKind::DarwinPCS
+                                        : ::cir::AArch64ABIKind::AAPCSSoft)));
+      break;
+    default:
+      llvm_unreachable("NYI");
+    }
+  }
+
+  /// Tracks current module.
+  ModuleOp theModule;
+
+  /// Tracks existing dynamic initializers.
+  llvm::StringMap<uint32_t> dynamicInitializerNames;
+  llvm::SmallVector<FuncOp, 4> dynamicInitializers;
+
+  /// List of ctors to be called before main()
+  SmallVector<mlir::Attribute, 4> globalCtorList;
+  /// List of dtors to be called when unloading module.
+  SmallVector<mlir::Attribute, 4> globalDtorList;
+  /// List of annotations in the module
+  SmallVector<mlir::Attribute, 4> globalAnnotations;
+};
+} // namespace
+
+GlobalOp LoweringPreparePass::buildRuntimeVariable(
+    mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
+    mlir::Type type, mlir::cir::GlobalLinkageKind linkage) {
+  GlobalOp g = dyn_cast_or_null<GlobalOp>(SymbolTable::lookupNearestSymbolFrom(
+      theModule, StringAttr::get(theModule->getContext(), name)));
+  if (!g) {
+    g = builder.create<mlir::cir::GlobalOp>(loc, name, type);
+    g.setLinkageAttr(
+        mlir::cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
+    mlir::SymbolTable::setSymbolVisibility(
+        g, mlir::SymbolTable::Visibility::Private);
+  }
+  return g;
+}
+
+FuncOp LoweringPreparePass::buildRuntimeFunction(
+    mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
+    mlir::cir::FuncType type, mlir::cir::GlobalLinkageKind linkage) {
+  FuncOp f = dyn_cast_or_null<FuncOp>(SymbolTable::lookupNearestSymbolFrom(
+      theModule, StringAttr::get(theModule->getContext(), name)));
+  if (!f) {
+    f = builder.create<mlir::cir::FuncOp>(loc, name, type);
+    f.setLinkageAttr(
+        mlir::cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
+    mlir::SymbolTable::setSymbolVisibility(
+        f, mlir::SymbolTable::Visibility::Private);
+    mlir::NamedAttrList attrs;
+    f.setExtraAttrsAttr(mlir::cir::ExtraFuncAttributesAttr::get(
+        builder.getContext(), attrs.getDictionary(builder.getContext())));
+  }
+  return f;
+}
+
+FuncOp LoweringPreparePass::buildCXXGlobalVarDeclInitFunc(GlobalOp op) {
+  SmallString<256> fnName;
+  {
+    llvm::raw_svector_ostream Out(fnName);
+    op.getAst()->mangleDynamicInitializer(Out);
+    // Name numbering
+    uint32_t cnt = dynamicInitializerNames[fnName]++;
+    if (cnt)
+      fnName += "." + llvm::Twine(cnt).str();
+  }
+
+  // Create a variable initialization function.
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op);
+  auto voidTy = ::mlir::cir::VoidType::get(builder.getContext());
+  auto fnType = mlir::cir::FuncType::get({}, voidTy);
+  FuncOp f =
+      buildRuntimeFunction(builder, fnName, op.getLoc(), fnType,
+                           mlir::cir::GlobalLinkageKind::InternalLinkage);
+
+  // Move over the initialzation code of the ctor region.
+  auto &block = op.getCtorRegion().front();
+  mlir::Block *entryBB = f.addEntryBlock();
+  entryBB->getOperations().splice(entryBB->begin(), block.getOperations(),
+                                  block.begin(), std::prev(block.end()));
+
+  // Register the destructor call with __cxa_atexit
+  auto &dtorRegion = op.getDtorRegion();
+  if (!dtorRegion.empty()) {
+    assert(op.getAst() &&
+           op.getAst()->getTLSKind() == clang::VarDecl::TLS_None && " TLS NYI");
+    // Create a variable that binds the atexit to this shared object.
+    builder.setInsertionPointToStart(&theModule.getBodyRegion().front());
+    auto Handle = buildRuntimeVariable(builder, "__dso_handle", op.getLoc(),
+                                       builder.getI8Type());
+
+    // Look for the destructor call in dtorBlock
+    auto &dtorBlock = dtorRegion.front();
+    mlir::cir::CallOp dtorCall;
+    for (auto op : reverse(dtorBlock.getOps<mlir::cir::CallOp>())) {
+      dtorCall = op;
+      break;
+    }
+    assert(dtorCall && "Expected a dtor call");
+    FuncOp dtorFunc = getCalledFunction(dtorCall);
+    assert(dtorFunc &&
+           mlir::isa<ASTCXXDestructorDeclInterface>(*dtorFunc.getAst()) &&
+           "Expected a dtor call");
+
+    // Create a runtime helper function:
+    //    extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d);
+    auto voidPtrTy =
+        ::mlir::cir::PointerType::get(builder.getContext(), voidTy);
+    auto voidFnTy = mlir::cir::FuncType::get({voidPtrTy}, voidTy);
+    auto voidFnPtrTy =
+        ::mlir::cir::PointerType::get(builder.getContext(), voidFnTy);
+    auto HandlePtrTy =
+        mlir::cir::PointerType::get(builder.getContext(), Handle.getSymType());
+    auto fnAtExitType = mlir::cir::FuncType::get(
+        {voidFnPtrTy, voidPtrTy, HandlePtrTy},
+        mlir::cir::VoidType::get(builder.getContext()));
+    const char *nameAtExit = "__cxa_atexit";
+    FuncOp fnAtExit =
+        buildRuntimeFunction(builder, nameAtExit, op.getLoc(), fnAtExitType);
+
+    // Replace the dtor call with a call to __cxa_atexit(&dtor, &var,
+    // &__dso_handle)
+    builder.setInsertionPointAfter(dtorCall);
+    mlir::Value args[3];
+    auto dtorPtrTy = mlir::cir::PointerType::get(builder.getContext(),
+                                                 dtorFunc.getFunctionType());
+    // dtorPtrTy
+    args[0] = builder.create<mlir::cir::GetGlobalOp>(
+        dtorCall.getLoc(), dtorPtrTy, dtorFunc.getSymName());
+    args[0] = builder.create<mlir::cir::CastOp>(
+        dtorCall.getLoc(), voidFnPtrTy, mlir::cir::CastKind::bitcast, args[0]);
+    args[1] = builder.create<mlir::cir::CastOp>(dtorCall.getLoc(), voidPtrTy,
+                                                mlir::cir::CastKind::bitcast,
+                                                dtorCall.getArgOperand(0));
+    args[2] = builder.create<mlir::cir::GetGlobalOp>(
+        Handle.getLoc(), HandlePtrTy, Handle.getSymName());
+    builder.createCallOp(dtorCall.getLoc(), fnAtExit, args);
+    dtorCall->erase();
+    entryBB->getOperations().splice(entryBB->end(), dtorBlock.getOperations(),
+                                    dtorBlock.begin(),
+                                    std::prev(dtorBlock.end()));
+  }
+
+  // Replace cir.yield with cir.return
+  builder.setInsertionPointToEnd(entryBB);
+  auto &yieldOp = block.getOperations().back();
+  assert(isa<YieldOp>(yieldOp));
+  builder.create<ReturnOp>(yieldOp.getLoc());
+  return f;
+}
+
+static void canonicalizeIntrinsicThreeWayCmp(CIRBaseBuilderTy &builder,
+                                             CmpThreeWayOp op) {
+  auto loc = op->getLoc();
+  auto cmpInfo = op.getInfo();
+
+  if (cmpInfo.getLt() == -1 && cmpInfo.getEq() == 0 && cmpInfo.getGt() == 1) {
+    // The comparison is already in canonicalized form.
+    return;
+  }
+
+  auto canonicalizedCmpInfo =
+      mlir::cir::CmpThreeWayInfoAttr::get(builder.getContext(), -1, 0, 1);
+  mlir::Value result =
+      builder
+          .create<mlir::cir::CmpThreeWayOp>(loc, op.getType(), op.getLhs(),
+                                            op.getRhs(), canonicalizedCmpInfo)
+          .getResult();
+
+  auto compareAndYield = [&](mlir::Value input, int64_t test,
+                             int64_t yield) -> mlir::Value {
+    // Create a conditional branch that tests whether `input` is equal to
+    // `test`. If `input` is equal to `test`, yield `yield`. Otherwise, yield
+    // `input` as is.
+    auto testValue = builder.getConstant(
+        loc, mlir::cir::IntAttr::get(input.getType(), test));
+    auto yieldValue = builder.getConstant(
+        loc, mlir::cir::IntAttr::get(input.getType(), yield));
+    auto eqToTest =
+        builder.createCompare(loc, mlir::cir::CmpOpKind::eq, input, testValue);
+    return builder.createSelect(loc, eqToTest, yieldValue, input);
+  };
+
+  if (cmpInfo.getLt() != -1)
+    result = compareAndYield(result, -1, cmpInfo.getLt());
+
+  if (cmpInfo.getEq() != 0)
+    result = compareAndYield(result, 0, cmpInfo.getEq());
+
+  if (cmpInfo.getGt() != 1)
+    result = compareAndYield(result, 1, cmpInfo.getGt());
+
+  op.replaceAllUsesWith(result);
+  op.erase();
+}
+
+void LoweringPreparePass::lowerVAArgOp(VAArgOp op) {
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPoint(op);
+  ::cir::CIRDataLayout datalayout(theModule);
+
+  auto res = cxxABI->lowerVAArg(builder, op, datalayout);
+  if (res) {
+    op.replaceAllUsesWith(res);
+    op.erase();
+  }
+  return;
+}
+
+void LoweringPreparePass::lowerUnaryOp(UnaryOp op) {
+  auto ty = op.getType();
+  if (!mlir::isa<mlir::cir::ComplexType>(ty))
+    return;
+
+  auto loc = op.getLoc();
+  auto opKind = op.getKind();
+
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op);
+
+  auto operand = op.getInput();
+
+  auto operandReal = builder.createComplexReal(loc, operand);
+  auto operandImag = builder.createComplexImag(loc, operand);
+
+  mlir::Value resultReal;
+  mlir::Value resultImag;
+  switch (opKind) {
+  case mlir::cir::UnaryOpKind::Inc:
+  case mlir::cir::UnaryOpKind::Dec:
+    resultReal = builder.createUnaryOp(loc, opKind, operandReal);
+    resultImag = operandImag;
+    break;
+
+  case mlir::cir::UnaryOpKind::Plus:
+  case mlir::cir::UnaryOpKind::Minus:
+    resultReal = builder.createUnaryOp(loc, opKind, operandReal);
+    resultImag = builder.createUnaryOp(loc, opKind, operandImag);
+    break;
+
+  case mlir::cir::UnaryOpKind::Not:
+    resultReal = operandReal;
+    resultImag =
+        builder.createUnaryOp(loc, mlir::cir::UnaryOpKind::Minus, operandImag);
+    break;
+  }
+
+  auto result = builder.createComplexCreate(loc, resultReal, resultImag);
+  op.replaceAllUsesWith(result);
+  op.erase();
+}
+
+void LoweringPreparePass::lowerBinOp(BinOp op) {
+  auto ty = op.getType();
+  if (!mlir::isa<mlir::cir::ComplexType>(ty))
+    return;
+
+  auto loc = op.getLoc();
+  auto opKind = op.getKind();
+  assert((opKind == mlir::cir::BinOpKind::Add ||
+          opKind == mlir::cir::BinOpKind::Sub) &&
+         "invalid binary op kind on complex numbers");
+
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op);
+
+  auto lhs = op.getLhs();
+  auto rhs = op.getRhs();
+
+  // (a+bi) + (c+di) = (a+c) + (b+d)i
+  // (a+bi) - (c+di) = (a-c) + (b-d)i
+  auto lhsReal = builder.createComplexReal(loc, lhs);
+  auto lhsImag = builder.createComplexImag(loc, lhs);
+  auto rhsReal = builder.createComplexReal(loc, rhs);
+  auto rhsImag = builder.createComplexImag(loc, rhs);
+  auto resultReal = builder.createBinop(lhsReal, opKind, rhsReal);
+  auto resultImag = builder.createBinop(lhsImag, opKind, rhsImag);
+  auto result = builder.createComplexCreate(loc, resultReal, resultImag);
+
+  op.replaceAllUsesWith(result);
+  op.erase();
+}
+
+static mlir::Value lowerScalarToComplexCast(MLIRContext &ctx, CastOp op) {
+  CIRBaseBuilderTy builder(ctx);
+  builder.setInsertionPoint(op);
+
+  auto src = op.getSrc();
+  auto imag = builder.getNullValue(src.getType(), op.getLoc());
+  return builder.createComplexCreate(op.getLoc(), src, imag);
+}
+
+static mlir::Value lowerComplexToScalarCast(MLIRContext &ctx, CastOp op) {
+  CIRBaseBuilderTy builder(ctx);
+  builder.setInsertionPoint(op);
+
+  auto src = op.getSrc();
+
+  if (!mlir::isa<mlir::cir::BoolType>(op.getType()))
+    return builder.createComplexReal(op.getLoc(), src);
+
+  // Complex cast to bool: (bool)(a+bi) => (bool)a || (bool)b
+  auto srcReal = builder.createComplexReal(op.getLoc(), src);
+  auto srcImag = builder.createComplexImag(op.getLoc(), src);
+
+  mlir::cir::CastKind elemToBoolKind;
+  if (op.getKind() == mlir::cir::CastKind::float_complex_to_bool)
+    elemToBoolKind = mlir::cir::CastKind::float_to_bool;
+  else if (op.getKind() == mlir::cir::CastKind::int_complex_to_bool)
+    elemToBoolKind = mlir::cir::CastKind::int_to_bool;
+  else
+    llvm_unreachable("invalid complex to bool cast kind");
+
+  auto boolTy = builder.getBoolTy();
+  auto srcRealToBool =
+      builder.createCast(op.getLoc(), elemToBoolKind, srcReal, boolTy);
+  auto srcImagToBool =
+      builder.createCast(op.getLoc(), elemToBoolKind, srcImag, boolTy);
+
+  // srcRealToBool || srcImagToBool
+  return builder.createLogicalOr(op.getLoc(), srcRealToBool, srcImagToBool);
+}
+
+static mlir::Value lowerComplexToComplexCast(MLIRContext &ctx, CastOp op) {
+  CIRBaseBuilderTy builder(ctx);
+  builder.setInsertionPoint(op);
+
+  auto src = op.getSrc();
+  auto dstComplexElemTy =
+      mlir::cast<mlir::cir::ComplexType>(op.getType()).getElementTy();
+
+  auto srcReal = builder.createComplexReal(op.getLoc(), src);
+  auto srcImag = builder.createComplexReal(op.getLoc(), src);
+
+  mlir::cir::CastKind scalarCastKind;
+  switch (op.getKind()) {
+  case mlir::cir::CastKind::float_complex:
+    scalarCastKind = mlir::cir::CastKind::floating;
+    break;
+  case mlir::cir::CastKind::float_complex_to_int_complex:
+    scalarCastKind = mlir::cir::CastKind::float_to_int;
+    break;
+  case mlir::cir::CastKind::int_complex:
+    scalarCastKind = mlir::cir::CastKind::integral;
+    break;
+  case mlir::cir::CastKind::int_complex_to_float_complex:
+    scalarCastKind = mlir::cir::CastKind::int_to_float;
+    break;
+  default:
+    llvm_unreachable("invalid complex to complex cast kind");
+  }
+
+  auto dstReal = builder.createCast(op.getLoc(), scalarCastKind, srcReal,
+                                    dstComplexElemTy);
+  auto dstImag = builder.createCast(op.getLoc(), scalarCastKind, srcImag,
+                                    dstComplexElemTy);
+  return builder.createComplexCreate(op.getLoc(), dstReal, dstImag);
+}
+
+void LoweringPreparePass::lowerCastOp(CastOp op) {
+  mlir::Value loweredValue;
+  switch (op.getKind()) {
+  case mlir::cir::CastKind::float_to_complex:
+  case mlir::cir::CastKind::int_to_complex:
+    loweredValue = lowerScalarToComplexCast(getContext(), op);
+    break;
+
+  case mlir::cir::CastKind::float_complex_to_real:
+  case mlir::cir::CastKind::int_complex_to_real:
+  case mlir::cir::CastKind::float_complex_to_bool:
+  case mlir::cir::CastKind::int_complex_to_bool:
+    loweredValue = lowerComplexToScalarCast(getContext(), op);
+    break;
+
+  case mlir::cir::CastKind::float_complex:
+  case mlir::cir::CastKind::float_complex_to_int_complex:
+  case mlir::cir::CastKind::int_complex:
+  case mlir::cir::CastKind::int_complex_to_float_complex:
+    loweredValue = lowerComplexToComplexCast(getContext(), op);
+    break;
+
+  default:
+    return;
+  }
+
+  op.replaceAllUsesWith(loweredValue);
+  op.erase();
+}
+
+static mlir::Value buildComplexBinOpLibCall(
+    LoweringPreparePass &pass, CIRBaseBuilderTy &builder,
+    llvm::StringRef (*libFuncNameGetter)(llvm::APFloat::Semantics),
+    mlir::Location loc, mlir::cir::ComplexType ty, mlir::Value lhsReal,
+    mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag) {
+  auto elementTy = mlir::cast<mlir::cir::CIRFPTypeInterface>(ty.getElementTy());
+
+  auto libFuncName = libFuncNameGetter(
+      llvm::APFloat::SemanticsToEnum(elementTy.getFloatSemantics()));
+  llvm::SmallVector<mlir::Type, 4> libFuncInputTypes(4, elementTy);
+  auto libFuncTy = mlir::cir::FuncType::get(libFuncInputTypes, ty);
+
+  mlir::cir::FuncOp libFunc;
+  {
+    mlir::OpBuilder::InsertionGuard ipGuard{builder};
+    builder.setInsertionPointToStart(pass.theModule.getBody());
+    libFunc = pass.buildRuntimeFunction(builder, libFuncName, loc, libFuncTy);
+  }
+
+  auto call =
+      builder.createCallOp(loc, libFunc, {lhsReal, lhsImag, rhsReal, rhsImag});
+  return call.getResult();
+}
+
+static llvm::StringRef
+getComplexMulLibCallName(llvm::APFloat::Semantics semantics) {
+  switch (semantics) {
+  case llvm::APFloat::S_IEEEhalf:
+    return "__mulhc3";
+  case llvm::APFloat::S_IEEEsingle:
+    return "__mulsc3";
+  case llvm::APFloat::S_IEEEdouble:
+    return "__muldc3";
+  case llvm::APFloat::S_PPCDoubleDouble:
+    return "__multc3";
+  case llvm::APFloat::S_x87DoubleExtended:
+    return "__mulxc3";
+  case llvm::APFloat::S_IEEEquad:
+    return "__multc3";
+  default:
+    llvm_unreachable("unsupported floating point type");
+  }
+}
+
+static llvm::StringRef
+getComplexDivLibCallName(llvm::APFloat::Semantics semantics) {
+  switch (semantics) {
+  case llvm::APFloat::S_IEEEhalf:
+    return "__divhc3";
+  case llvm::APFloat::S_IEEEsingle:
+    return "__divsc3";
+  case llvm::APFloat::S_IEEEdouble:
+    return "__divdc3";
+  case llvm::APFloat::S_PPCDoubleDouble:
+    return "__divtc3";
+  case llvm::APFloat::S_x87DoubleExtended:
+    return "__divxc3";
+  case llvm::APFloat::S_IEEEquad:
+    return "__divtc3";
+  default:
+    llvm_unreachable("unsupported floating point type");
+  }
+}
+
+static mlir::Value lowerComplexMul(LoweringPreparePass &pass,
+                                   CIRBaseBuilderTy &builder,
+                                   mlir::Location loc,
+                                   mlir::cir::ComplexBinOp op,
+                                   mlir::Value lhsReal, mlir::Value lhsImag,
+                                   mlir::Value rhsReal, mlir::Value rhsImag) {
+  // (a+bi) * (c+di) = (ac-bd) + (ad+bc)i
+  auto resultRealLhs =
+      builder.createBinop(lhsReal, mlir::cir::BinOpKind::Mul, rhsReal);
+  auto resultRealRhs =
+      builder.createBinop(lhsImag, mlir::cir::BinOpKind::Mul, rhsImag);
+  auto resultImagLhs =
+      builder.createBinop(lhsReal, mlir::cir::BinOpKind::Mul, rhsImag);
+  auto resultImagRhs =
+      builder.createBinop(lhsImag, mlir::cir::BinOpKind::Mul, rhsReal);
+  auto resultReal = builder.createBinop(
+      resultRealLhs, mlir::cir::BinOpKind::Sub, resultRealRhs);
+  auto resultImag = builder.createBinop(
+      resultImagLhs, mlir::cir::BinOpKind::Add, resultImagRhs);
+  auto algebraicResult =
+      builder.createComplexCreate(loc, resultReal, resultImag);
+
+  auto ty = op.getType();
+  auto range = op.getRange();
+  if (mlir::isa<mlir::cir::IntType>(ty.getElementTy()) ||
+      range == mlir::cir::ComplexRangeKind::Basic ||
+      range == mlir::cir::ComplexRangeKind::Improved ||
+      range == mlir::cir::ComplexRangeKind::Promoted)
+    return algebraicResult;
+
+  // Check whether the real part and the imaginary part of the result are both
+  // NaN. If so, emit a library call to compute the multiplication instead.
+  // We check a value against NaN by comparing the value against itself.
+  auto resultRealIsNaN = builder.createIsNaN(loc, resultReal);
+  auto resultImagIsNaN = builder.createIsNaN(loc, resultImag);
+  auto resultRealAndImagAreNaN =
+      builder.createLogicalAnd(loc, resultRealIsNaN, resultImagIsNaN);
+  return builder
+      .create<mlir::cir::TernaryOp>(
+          loc, resultRealAndImagAreNaN,
+          [&](mlir::OpBuilder &, mlir::Location) {
+            auto libCallResult = buildComplexBinOpLibCall(
+                pass, builder, &getComplexMulLibCallName, loc, ty, lhsReal,
+                lhsImag, rhsReal, rhsImag);
+            builder.createYield(loc, libCallResult);
+          },
+          [&](mlir::OpBuilder &, mlir::Location) {
+            builder.createYield(loc, algebraicResult);
+          })
+      .getResult();
+}
+
+static mlir::Value
+buildAlgebraicComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc,
+                         mlir::Value lhsReal, mlir::Value lhsImag,
+                         mlir::Value rhsReal, mlir::Value rhsImag) {
+  // (a+bi) / (c+di) = ((ac+bd)/(cc+dd)) + ((bc-ad)/(cc+dd))i
+  auto &a = lhsReal;
+  auto &b = lhsImag;
+  auto &c = rhsReal;
+  auto &d = rhsImag;
+
+  auto ac = builder.createBinop(loc, a, mlir::cir::BinOpKind::Mul, c); // a*c
+  auto bd = builder.createBinop(loc, b, mlir::cir::BinOpKind::Mul, d); // b*d
+  auto cc = builder.createBinop(loc, c, mlir::cir::BinOpKind::Mul, c); // c*c
+  auto dd = builder.createBinop(loc, d, mlir::cir::BinOpKind::Mul, d); // d*d
+  auto acbd =
+      builder.createBinop(loc, ac, mlir::cir::BinOpKind::Add, bd); // ac+bd
+  auto ccdd =
+      builder.createBinop(loc, cc, mlir::cir::BinOpKind::Add, dd); // cc+dd
+  auto resultReal =
+      builder.createBinop(loc, acbd, mlir::cir::BinOpKind::Div, ccdd);
+
+  auto bc = builder.createBinop(loc, b, mlir::cir::BinOpKind::Mul, c); // b*c
+  auto ad = builder.createBinop(loc, a, mlir::cir::BinOpKind::Mul, d); // a*d
+  auto bcad =
+      builder.createBinop(loc, bc, mlir::cir::BinOpKind::Sub, ad); // bc-ad
+  auto resultImag =
+      builder.createBinop(loc, bcad, mlir::cir::BinOpKind::Div, ccdd);
+
+  return builder.createComplexCreate(loc, resultReal, resultImag);
+}
+
+static mlir::Value
+buildRangeReductionComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc,
+                              mlir::Value lhsReal, mlir::Value lhsImag,
+                              mlir::Value rhsReal, mlir::Value rhsImag) {
+  // Implements Smith's algorithm for complex division.
+  // SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962).
+
+  // Let:
+  //   - lhs := a+bi
+  //   - rhs := c+di
+  //   - result := lhs / rhs = e+fi
+  //
+  // The algorithm psudocode looks like follows:
+  //   if fabs(c) >= fabs(d):
+  //     r := d / c
+  //     tmp := c + r*d
+  //     e = (a + b*r) / tmp
+  //     f = (b - a*r) / tmp
+  //   else:
+  //     r := c / d
+  //     tmp := d + r*c
+  //     e = (a*r + b) / tmp
+  //     f = (b*r - a) / tmp
+
+  auto &a = lhsReal;
+  auto &b = lhsImag;
+  auto &c = rhsReal;
+  auto &d = rhsImag;
+
+  auto trueBranchBuilder = [&](mlir::OpBuilder &, mlir::Location) {
+    auto r = builder.createBinop(loc, d, mlir::cir::BinOpKind::Div,
+                                 c); // r := d / c
+    auto rd = builder.createBinop(loc, r, mlir::cir::BinOpKind::Mul, d); // r*d
+    auto tmp = builder.createBinop(loc, c, mlir::cir::BinOpKind::Add,
+                                   rd); // tmp := c + r*d
+
+    auto br = builder.createBinop(loc, b, mlir::cir::BinOpKind::Mul, r); // b*r
+    auto abr =
+        builder.createBinop(loc, a, mlir::cir::BinOpKind::Add, br); // a + b*r
+    auto e = builder.createBinop(loc, abr, mlir::cir::BinOpKind::Div, tmp);
+
+    auto ar = builder.createBinop(loc, a, mlir::cir::BinOpKind::Mul, r); // a*r
+    auto bar =
+        builder.createBinop(loc, b, mlir::cir::BinOpKind::Sub, ar); // b - a*r
+    auto f = builder.createBinop(loc, bar, mlir::cir::BinOpKind::Div, tmp);
+
+    auto result = builder.createComplexCreate(loc, e, f);
+    builder.createYield(loc, result);
+  };
+
+  auto falseBranchBuilder = [&](mlir::OpBuilder &, mlir::Location) {
+    auto r = builder.createBinop(loc, c, mlir::cir::BinOpKind::Div,
+                                 d); // r := c / d
+    auto rc = builder.createBinop(loc, r, mlir::cir::BinOpKind::Mul, c); // r*c
+    auto tmp = builder.createBinop(loc, d, mlir::cir::BinOpKind::Add,
+                                   rc); // tmp := d + r*c
+
+    auto ar = builder.createBinop(loc, a, mlir::cir::BinOpKind::Mul, r); // a*r
+    auto arb =
+        builder.createBinop(loc, ar, mlir::cir::BinOpKind::Add, b); // a*r + b
+    auto e = builder.createBinop(loc, arb, mlir::cir::BinOpKind::Div, tmp);
+
+    auto br = builder.createBinop(loc, b, mlir::cir::BinOpKind::Mul, r); // b*r
+    auto bra =
+        builder.createBinop(loc, br, mlir::cir::BinOpKind::Sub, a); // b*r - a
+    auto f = builder.createBinop(loc, bra, mlir::cir::BinOpKind::Div, tmp);
+
+    auto result = builder.createComplexCreate(loc, e, f);
+    builder.createYield(loc, result);
+  };
+
+  auto cFabs = builder.create<mlir::cir::FAbsOp>(loc, c);
+  auto dFabs = builder.create<mlir::cir::FAbsOp>(loc, d);
+  auto cmpResult =
+      builder.createCompare(loc, mlir::cir::CmpOpKind::ge, cFabs, dFabs);
+  auto ternary = builder.create<mlir::cir::TernaryOp>(
+      loc, cmpResult, trueBranchBuilder, falseBranchBuilder);
+
+  return ternary.getResult();
+}
+
+static mlir::Value lowerComplexDiv(LoweringPreparePass &pass,
+                                   CIRBaseBuilderTy &builder,
+                                   mlir::Location loc,
+                                   mlir::cir::ComplexBinOp op,
+                                   mlir::Value lhsReal, mlir::Value lhsImag,
+                                   mlir::Value rhsReal, mlir::Value rhsImag) {
+  auto ty = op.getType();
+  if (mlir::isa<mlir::cir::CIRFPTypeInterface>(ty.getElementTy())) {
+    auto range = op.getRange();
+    if (range == mlir::cir::ComplexRangeKind::Improved ||
+        (range == mlir::cir::ComplexRangeKind::Promoted && !op.getPromoted()))
+      return buildRangeReductionComplexDiv(builder, loc, lhsReal, lhsImag,
+                                           rhsReal, rhsImag);
+    if (range == mlir::cir::ComplexRangeKind::Full)
+      return buildComplexBinOpLibCall(pass, builder, &getComplexDivLibCallName,
+                                      loc, ty, lhsReal, lhsImag, rhsReal,
+                                      rhsImag);
+  }
+
+  return buildAlgebraicComplexDiv(builder, loc, lhsReal, lhsImag, rhsReal,
+                                  rhsImag);
+}
+
+void LoweringPreparePass::lowerComplexBinOp(ComplexBinOp op) {
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op);
+
+  auto loc = op.getLoc();
+  auto lhs = op.getLhs();
+  auto rhs = op.getRhs();
+  auto lhsReal = builder.createComplexReal(loc, lhs);
+  auto lhsImag = builder.createComplexImag(loc, lhs);
+  auto rhsReal = builder.createComplexReal(loc, rhs);
+  auto rhsImag = builder.createComplexImag(loc, rhs);
+
+  mlir::Value loweredResult;
+  if (op.getKind() == mlir::cir::ComplexBinOpKind::Mul)
+    loweredResult = lowerComplexMul(*this, builder, loc, op, lhsReal, lhsImag,
+                                    rhsReal, rhsImag);
+  else
+    loweredResult = lowerComplexDiv(*this, builder, loc, op, lhsReal, lhsImag,
+                                    rhsReal, rhsImag);
+
+  op.replaceAllUsesWith(loweredResult);
+  op.erase();
+}
+
+void LoweringPreparePass::lowerThreeWayCmpOp(CmpThreeWayOp op) {
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op);
+
+  if (op.isIntegralComparison() && op.isStrongOrdering()) {
+    // For three-way comparisons on integral operands that produce strong
+    // ordering, we can generate potentially better code with the `llvm.scmp.*`
+    // and `llvm.ucmp.*` intrinsics. Thus we don't replace these comparisons
+    // here. They will be lowered directly to LLVMIR during the LLVM lowering
+    // pass.
+    //
+    // But we still need to take a step here. `llvm.scmp.*` and `llvm.ucmp.*`
+    // returns -1, 0, or 1 to represent lt, eq, and gt, which are the
+    // "canonicalized" result values of three-way comparisons. However,
+    // `cir.cmp3way` may not produce canonicalized result. We need to
+    // canonicalize the comparison if necessary. This is what we're doing in
+    // this special branch.
+    canonicalizeIntrinsicThreeWayCmp(builder, op);
+    return;
+  }
+
+  auto loc = op->getLoc();
+  auto cmpInfo = op.getInfo();
+
+  auto buildCmpRes = [&](int64_t value) -> mlir::Value {
+    return builder.create<mlir::cir::ConstantOp>(
+        loc, op.getType(), mlir::cir::IntAttr::get(op.getType(), value));
+  };
+  auto ltRes = buildCmpRes(cmpInfo.getLt());
+  auto eqRes = buildCmpRes(cmpInfo.getEq());
+  auto gtRes = buildCmpRes(cmpInfo.getGt());
+
+  auto buildCmp = [&](CmpOpKind kind) -> mlir::Value {
+    auto ty = BoolType::get(&getContext());
+    return builder.create<mlir::cir::CmpOp>(loc, ty, kind, op.getLhs(),
+                                            op.getRhs());
+  };
+  auto buildSelect = [&](mlir::Value condition, mlir::Value trueResult,
+                         mlir::Value falseResult) -> mlir::Value {
+    return builder.createSelect(loc, condition, trueResult, falseResult);
+  };
+
+  mlir::Value transformedResult;
+  if (cmpInfo.getOrdering() == CmpOrdering::Strong) {
+    // Strong ordering.
+    auto lt = buildCmp(CmpOpKind::lt);
+    auto eq = buildCmp(CmpOpKind::eq);
+    auto selectOnEq = buildSelect(eq, eqRes, gtRes);
+    transformedResult = buildSelect(lt, ltRes, selectOnEq);
+  } else {
+    // Partial ordering.
+    auto unorderedRes = buildCmpRes(cmpInfo.getUnordered().value());
+
+    auto lt = buildCmp(CmpOpKind::lt);
+    auto eq = buildCmp(CmpOpKind::eq);
+    auto gt = buildCmp(CmpOpKind::gt);
+    auto selectOnEq = buildSelect(eq, eqRes, unorderedRes);
+    auto selectOnGt = buildSelect(gt, gtRes, selectOnEq);
+    transformedResult = buildSelect(lt, ltRes, selectOnGt);
+  }
+
+  op.replaceAllUsesWith(transformedResult);
+  op.erase();
+}
+
+void LoweringPreparePass::lowerGlobalOp(GlobalOp op) {
+  auto &ctorRegion = op.getCtorRegion();
+  auto &dtorRegion = op.getDtorRegion();
+
+  if (!ctorRegion.empty() || !dtorRegion.empty()) {
+    // Build a variable initialization function and move the initialzation code
+    // in the ctor region over.
+    auto f = buildCXXGlobalVarDeclInitFunc(op);
+
+    // Clear the ctor and dtor region
+    ctorRegion.getBlocks().clear();
+    dtorRegion.getBlocks().clear();
+
+    // Add a function call to the variable initialization function.
+    assert(!hasAttr<clang::InitPriorityAttr>(
+               mlir::cast<ASTDeclInterface>(*op.getAst())) &&
+           "custom initialization priority NYI");
+    dynamicInitializers.push_back(f);
+  }
+
+  std::optional<mlir::ArrayAttr> annotations = op.getAnnotations();
+  if (annotations) {
+    addGlobalAnnotations(op, annotations.value());
+  }
+}
+
+void LoweringPreparePass::buildGlobalCtorDtorList() {
+  if (!globalCtorList.empty()) {
+    theModule->setAttr("cir.global_ctors",
+                       mlir::ArrayAttr::get(&getContext(), globalCtorList));
+  }
+  if (!globalDtorList.empty()) {
+    theModule->setAttr("cir.global_dtors",
+                       mlir::ArrayAttr::get(&getContext(), globalDtorList));
+  }
+}
+
+void LoweringPreparePass::buildCXXGlobalInitFunc() {
+  if (dynamicInitializers.empty())
+    return;
+
+  for (auto &f : dynamicInitializers) {
+    // TODO: handle globals with a user-specified initialzation priority.
+    auto ctorAttr = mlir::cir::GlobalCtorAttr::get(&getContext(), f.getName());
+    globalCtorList.push_back(ctorAttr);
+  }
+
+  SmallString<256> fnName;
+  // Include the filename in the symbol name. Including "sub_" matches gcc
+  // and makes sure these symbols appear lexicographically behind the symbols
+  // with priority emitted above.  Module implementation units behave the same
+  // way as a non-modular TU with imports.
+  // TODO: check CXX20ModuleInits
+  if (astCtx->getCurrentNamedModule() &&
+      !astCtx->getCurrentNamedModule()->isModuleImplementation()) {
+    llvm::raw_svector_ostream Out(fnName);
+    std::unique_ptr<clang::MangleContext> MangleCtx(
+        astCtx->createMangleContext());
+    cast<clang::ItaniumMangleContext>(*MangleCtx)
+        .mangleModuleInitializer(astCtx->getCurrentNamedModule(), Out);
+  } else {
+    fnName += "_GLOBAL__sub_I_";
+    fnName += getTransformedFileName(theModule);
+  }
+
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointToEnd(&theModule.getBodyRegion().back());
+  auto fnType = mlir::cir::FuncType::get(
+      {}, mlir::cir::VoidType::get(builder.getContext()));
+  FuncOp f =
+      buildRuntimeFunction(builder, fnName, theModule.getLoc(), fnType,
+                           mlir::cir::GlobalLinkageKind::ExternalLinkage);
+  builder.setInsertionPointToStart(f.addEntryBlock());
+  for (auto &f : dynamicInitializers) {
+    builder.createCallOp(f.getLoc(), f);
+  }
+
+  builder.create<ReturnOp>(f.getLoc());
+}
+
+void LoweringPreparePass::lowerDynamicCastOp(DynamicCastOp op) {
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op);
+
+  assert(astCtx && "AST context is not available during lowering prepare");
+  auto loweredValue = cxxABI->lowerDynamicCast(builder, *astCtx, op);
+
+  op.replaceAllUsesWith(loweredValue);
+  op.erase();
+}
+
+static void lowerArrayDtorCtorIntoLoop(CIRBaseBuilderTy &builder,
+                                       mlir::Operation *op, mlir::Type eltTy,
+                                       mlir::Value arrayAddr,
+                                       uint64_t arrayLen) {
+  // Generate loop to call into ctor/dtor for every element.
+  auto loc = op->getLoc();
+
+  // TODO: instead of fixed integer size, create alias for PtrDiffTy and unify
+  // with CIRGen stuff.
+  auto ptrDiffTy =
+      mlir::cir::IntType::get(builder.getContext(), 64, /*signed=*/false);
+  auto numArrayElementsConst = builder.create<mlir::cir::ConstantOp>(
+      loc, ptrDiffTy, mlir::cir::IntAttr::get(ptrDiffTy, arrayLen));
+
+  auto begin = builder.create<mlir::cir::CastOp>(
+      loc, eltTy, mlir::cir::CastKind::array_to_ptrdecay, arrayAddr);
+  mlir::Value end = builder.create<mlir::cir::PtrStrideOp>(
+      loc, eltTy, begin, numArrayElementsConst);
+
+  auto tmpAddr = builder.createAlloca(
+      loc, /*addr type*/ builder.getPointerTo(eltTy),
+      /*var type*/ eltTy, "__array_idx", clang::CharUnits::One());
+  builder.createStore(loc, begin, tmpAddr);
+
+  auto loop = builder.createDoWhile(
+      loc,
+      /*condBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        auto currentElement = b.create<mlir::cir::LoadOp>(loc, eltTy, tmpAddr);
+        mlir::Type boolTy = mlir::cir::BoolType::get(b.getContext());
+        auto cmp = builder.create<mlir::cir::CmpOp>(
+            loc, boolTy, mlir::cir::CmpOpKind::eq, currentElement, end);
+        builder.createCondition(cmp);
+      },
+      /*bodyBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        auto currentElement = b.create<mlir::cir::LoadOp>(loc, eltTy, tmpAddr);
+
+        CallOp ctorCall;
+        op->walk([&](CallOp c) { ctorCall = c; });
+        assert(ctorCall && "expected ctor call");
+
+        auto one = builder.create<mlir::cir::ConstantOp>(
+            loc, ptrDiffTy, mlir::cir::IntAttr::get(ptrDiffTy, 1));
+
+        ctorCall->moveAfter(one);
+        ctorCall->setOperand(0, currentElement);
+
+        // Advance pointer and store them to temporary variable
+        auto nextElement = builder.create<mlir::cir::PtrStrideOp>(
+            loc, eltTy, currentElement, one);
+        builder.createStore(loc, nextElement, tmpAddr);
+        builder.createYield(loc);
+      });
+
+  op->replaceAllUsesWith(loop);
+  op->erase();
+}
+
+void LoweringPreparePass::lowerArrayDtor(ArrayDtor op) {
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op.getOperation());
+
+  auto eltTy = op->getRegion(0).getArgument(0).getType();
+  auto arrayLen = mlir::cast<mlir::cir::ArrayType>(
+                      mlir::cast<mlir::cir::PointerType>(op.getAddr().getType())
+                          .getPointee())
+                      .getSize();
+  lowerArrayDtorCtorIntoLoop(builder, op, eltTy, op.getAddr(), arrayLen);
+}
+
+void LoweringPreparePass::lowerArrayCtor(ArrayCtor op) {
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op.getOperation());
+
+  auto eltTy = op->getRegion(0).getArgument(0).getType();
+  auto arrayLen = mlir::cast<mlir::cir::ArrayType>(
+                      mlir::cast<mlir::cir::PointerType>(op.getAddr().getType())
+                          .getPointee())
+                      .getSize();
+  lowerArrayDtorCtorIntoLoop(builder, op, eltTy, op.getAddr(), arrayLen);
+}
+
+void LoweringPreparePass::lowerStdFindOp(StdFindOp op) {
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op.getOperation());
+  auto call = builder.createCallOp(
+      op.getLoc(), op.getOriginalFnAttr(), op.getResult().getType(),
+      mlir::ValueRange{op.getOperand(0), op.getOperand(1), op.getOperand(2)});
+
+  op.replaceAllUsesWith(call);
+  op.erase();
+}
+
+void LoweringPreparePass::lowerIterBeginOp(IterBeginOp op) {
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op.getOperation());
+  auto call = builder.createCallOp(op.getLoc(), op.getOriginalFnAttr(),
+                                   op.getResult().getType(),
+                                   mlir::ValueRange{op.getOperand()});
+
+  op.replaceAllUsesWith(call);
+  op.erase();
+}
+
+void LoweringPreparePass::lowerIterEndOp(IterEndOp op) {
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op.getOperation());
+  auto call = builder.createCallOp(op.getLoc(), op.getOriginalFnAttr(),
+                                   op.getResult().getType(),
+                                   mlir::ValueRange{op.getOperand()});
+
+  op.replaceAllUsesWith(call);
+  op.erase();
+}
+
+void LoweringPreparePass::addGlobalAnnotations(mlir::Operation *op,
+                                               mlir::ArrayAttr annotations) {
+  auto globalValue = cast<mlir::SymbolOpInterface>(op);
+  mlir::StringAttr globalValueName = globalValue.getNameAttr();
+  for (auto &annot : annotations) {
+    SmallVector<mlir::Attribute, 2> entryArray = {globalValueName, annot};
+    globalAnnotations.push_back(
+        mlir::ArrayAttr::get(theModule.getContext(), entryArray));
+  }
+}
+
+void LoweringPreparePass::buildGlobalAnnotationValues() {
+  if (globalAnnotations.empty())
+    return;
+  mlir::ArrayAttr annotationValueArray =
+      mlir::ArrayAttr::get(theModule.getContext(), globalAnnotations);
+  theModule->setAttr("cir.global_annotations",
+                     mlir::cir::GlobalAnnotationValuesAttr::get(
+                         theModule.getContext(), annotationValueArray));
+}
+
+void LoweringPreparePass::runOnOp(Operation *op) {
+  if (auto unary = dyn_cast<UnaryOp>(op)) {
+    lowerUnaryOp(unary);
+  } else if (auto bin = dyn_cast<BinOp>(op)) {
+    lowerBinOp(bin);
+  } else if (auto cast = dyn_cast<CastOp>(op)) {
+    lowerCastOp(cast);
+  } else if (auto complexBin = dyn_cast<ComplexBinOp>(op)) {
+    lowerComplexBinOp(complexBin);
+  } else if (auto threeWayCmp = dyn_cast<CmpThreeWayOp>(op)) {
+    lowerThreeWayCmpOp(threeWayCmp);
+  } else if (auto vaArgOp = dyn_cast<VAArgOp>(op)) {
+    lowerVAArgOp(vaArgOp);
+  } else if (auto getGlobal = dyn_cast<GlobalOp>(op)) {
+    lowerGlobalOp(getGlobal);
+  } else if (auto dynamicCast = dyn_cast<DynamicCastOp>(op)) {
+    lowerDynamicCastOp(dynamicCast);
+  } else if (auto stdFind = dyn_cast<StdFindOp>(op)) {
+    lowerStdFindOp(stdFind);
+  } else if (auto iterBegin = dyn_cast<IterBeginOp>(op)) {
+    lowerIterBeginOp(iterBegin);
+  } else if (auto iterEnd = dyn_cast<IterEndOp>(op)) {
+    lowerIterEndOp(iterEnd);
+  } else if (auto arrayCtor = dyn_cast<ArrayCtor>(op)) {
+    lowerArrayCtor(arrayCtor);
+  } else if (auto arrayDtor = dyn_cast<ArrayDtor>(op)) {
+    lowerArrayDtor(arrayDtor);
+  } else if (auto fnOp = dyn_cast<mlir::cir::FuncOp>(op)) {
+    if (auto globalCtor = fnOp.getGlobalCtorAttr()) {
+      globalCtorList.push_back(globalCtor);
+    } else if (auto globalDtor = fnOp.getGlobalDtorAttr()) {
+      globalDtorList.push_back(globalDtor);
+    }
+    if (std::optional<mlir::ArrayAttr> annotations = fnOp.getAnnotations())
+      addGlobalAnnotations(fnOp, annotations.value());
+  }
+}
+
+void LoweringPreparePass::runOnOperation() {
+  assert(astCtx && "Missing ASTContext, please construct with the right ctor");
+  auto *op = getOperation();
+  if (isa<::mlir::ModuleOp>(op)) {
+    theModule = cast<::mlir::ModuleOp>(op);
+  }
+
+  SmallVector<Operation *> opsToTransform;
+
+  op->walk([&](Operation *op) {
+    if (isa<UnaryOp, BinOp, CastOp, ComplexBinOp, CmpThreeWayOp, VAArgOp,
+            GlobalOp, DynamicCastOp, StdFindOp, IterEndOp, IterBeginOp,
+            ArrayCtor, ArrayDtor, mlir::cir::FuncOp>(op))
+      opsToTransform.push_back(op);
+  });
+
+  for (auto *o : opsToTransform)
+    runOnOp(o);
+
+  buildCXXGlobalInitFunc();
+  buildGlobalCtorDtorList();
+  buildGlobalAnnotationValues();
+}
+
+std::unique_ptr<Pass> mlir::createLoweringPreparePass() {
+  return std::make_unique<LoweringPreparePass>();
+}
+
+std::unique_ptr<Pass>
+mlir::createLoweringPreparePass(clang::ASTContext *astCtx) {
+  auto pass = std::make_unique<LoweringPreparePass>();
+  pass->setASTContext(astCtx);
+  return std::move(pass);
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h
new file mode 100644
index 000000000000..42e8917b43b6
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h
@@ -0,0 +1,44 @@
+//====- LoweringPrepareCXXABI.h -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the LoweringPrepareCXXABI class, which is the base class
+// for ABI specific functionalities that are required during LLVM lowering
+// prepare.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_LOWERING_PREPARE_CXX_ABI_H
+#define LLVM_CLANG_LIB_CIR_LOWERING_PREPARE_CXX_ABI_H
+
+#include "mlir/IR/Value.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Target/AArch64.h"
+
+namespace cir {
+
+class LoweringPrepareCXXABI {
+public:
+  static LoweringPrepareCXXABI *createItaniumABI();
+  static LoweringPrepareCXXABI *createAArch64ABI(::cir::AArch64ABIKind k);
+
+  virtual mlir::Value lowerVAArg(CIRBaseBuilderTy &builder,
+                                 mlir::cir::VAArgOp op,
+                                 const cir::CIRDataLayout &datalayout) = 0;
+  virtual ~LoweringPrepareCXXABI() {}
+
+  virtual mlir::Value lowerDynamicCast(CIRBaseBuilderTy &builder,
+                                       clang::ASTContext &astCtx,
+                                       mlir::cir::DynamicCastOp op) = 0;
+};
+
+} // namespace cir
+
+#endif // LLVM_CLANG_LIB_CIR_LOWERING_PREPARE_CXX_ABI_H
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepareItaniumCXXABI.h b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareItaniumCXXABI.h
new file mode 100644
index 000000000000..1dbef0d24ddd
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareItaniumCXXABI.h
@@ -0,0 +1,24 @@
+//====- LoweringPrepareItaniumCXXABI.h - Itanium ABI specific code --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Itanium C++ ABI specific code that is used during LLVMIR
+// lowering prepare.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoweringPrepareCXXABI.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+
+class LoweringPrepareItaniumCXXABI : public cir::LoweringPrepareCXXABI {
+public:
+  mlir::Value lowerDynamicCast(cir::CIRBaseBuilderTy &builder,
+                               clang::ASTContext &astCtx,
+                               mlir::cir::DynamicCastOp op) override;
+  mlir::Value lowerVAArg(cir::CIRBaseBuilderTy &builder, mlir::cir::VAArgOp op,
+                         const cir::CIRDataLayout &datalayout) override;
+};
diff --git a/clang/lib/CIR/Dialect/Transforms/PassDetail.h b/clang/lib/CIR/Dialect/Transforms/PassDetail.h
new file mode 100644
index 000000000000..2fdcfbda61e5
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/PassDetail.h
@@ -0,0 +1,29 @@
+//===- PassDetail.h - CIR Pass class details --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef DIALECT_CIR_TRANSFORMS_PASSDETAIL_H_
+#define DIALECT_CIR_TRANSFORMS_PASSDETAIL_H_
+
+#include "mlir/IR/Dialect.h"
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+// Forward declaration from Dialect.h
+template <typename ConcreteDialect>
+void registerDialect(DialectRegistry &registry);
+
+namespace cir {
+class CIRDialect;
+} // namespace cir
+
+#define GEN_PASS_CLASSES
+#include "clang/CIR/Dialect/Passes.h.inc"
+
+} // namespace mlir
+
+#endif // DIALECT_CIR_TRANSFORMS_PASSDETAIL_H_
diff --git a/clang/lib/CIR/Dialect/Transforms/SCFPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/SCFPrepare.cpp
new file mode 100644
index 000000000000..d1c8c2e0ef09
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/SCFPrepare.cpp
@@ -0,0 +1,245 @@
+//===- SCFPrepare.cpp - pareparation work for SCF lowering ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/Passes.h"
+
+using namespace mlir;
+using namespace cir;
+
+//===----------------------------------------------------------------------===//
+// Rewrite patterns
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+static Value findIVAddr(Block *step) {
+  Value IVAddr = nullptr;
+  for (Operation &op : *step) {
+    if (auto loadOp = dyn_cast<LoadOp>(op))
+      IVAddr = loadOp.getAddr();
+    else if (auto storeOp = dyn_cast<StoreOp>(op))
+      if (IVAddr != storeOp.getAddr())
+        return nullptr;
+  }
+  return IVAddr;
+}
+
+static CmpOp findLoopCmpAndIV(Block *cond, Value IVAddr, Value &IV) {
+  Operation *IVLoadOp = nullptr;
+  for (Operation &op : *cond) {
+    if (auto loadOp = dyn_cast<LoadOp>(op))
+      if (loadOp.getAddr() == IVAddr) {
+        IVLoadOp = &op;
+        break;
+      }
+  }
+  if (!IVLoadOp)
+    return nullptr;
+  if (!IVLoadOp->hasOneUse())
+    return nullptr;
+  IV = IVLoadOp->getResult(0);
+  return dyn_cast<CmpOp>(*IVLoadOp->user_begin());
+}
+
+// Canonicalize IV to LHS of loop comparison
+// For example, transfer cir.cmp(gt, %bound, %IV) to cir.cmp(lt, %IV, %bound).
+// So we could use RHS as boundary and use lt to determine it's an upper bound.
+struct canonicalizeIVtoCmpLHS : public OpRewritePattern<ForOp> {
+  using OpRewritePattern<ForOp>::OpRewritePattern;
+
+  CmpOpKind swapCmpKind(CmpOpKind kind) const {
+    switch (kind) {
+    case CmpOpKind::gt:
+      return CmpOpKind::lt;
+    case CmpOpKind::ge:
+      return CmpOpKind::le;
+    case CmpOpKind::lt:
+      return CmpOpKind::gt;
+    case CmpOpKind::le:
+      return CmpOpKind::ge;
+    default:
+      break;
+    }
+    return kind;
+  }
+
+  void replaceWithNewCmpOp(CmpOp oldCmp, CmpOpKind newKind, Value lhs,
+                           Value rhs, PatternRewriter &rewriter) const {
+    rewriter.setInsertionPointAfter(oldCmp.getOperation());
+    auto newCmp = rewriter.create<mlir::cir::CmpOp>(
+        oldCmp.getLoc(), oldCmp.getType(), newKind, lhs, rhs);
+    oldCmp->replaceAllUsesWith(newCmp);
+    oldCmp->erase();
+  }
+
+  LogicalResult matchAndRewrite(ForOp op,
+                                PatternRewriter &rewriter) const final {
+    auto *cond = &op.getCond().front();
+    auto *step = (op.maybeGetStep() ? &op.maybeGetStep()->front() : nullptr);
+    if (!step)
+      return failure();
+    Value IVAddr = findIVAddr(step);
+    if (!IVAddr)
+      return failure();
+    Value IV = nullptr;
+    auto loopCmp = findLoopCmpAndIV(cond, IVAddr, IV);
+    if (!loopCmp || !IV)
+      return failure();
+
+    CmpOpKind cmpKind = loopCmp.getKind();
+    Value cmpRhs = loopCmp.getRhs();
+    // Canonicalize IV to LHS of loop Cmp.
+    if (loopCmp.getLhs() != IV) {
+      cmpKind = swapCmpKind(cmpKind);
+      cmpRhs = loopCmp.getLhs();
+      replaceWithNewCmpOp(loopCmp, cmpKind, IV, cmpRhs, rewriter);
+      return success();
+    }
+
+    return failure();
+  }
+};
+
+// Hoist loop invariant operations in condition block out of loop
+// The condition block may be generated as following which contains the
+// operations produced upper bound.
+// SCF for loop required loop boundary as input operands. So we need to
+// hoist the boundary operations out of loop.
+//
+//   cir.for : cond {
+//     %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+//     %5 = cir.const #cir.int<100> : !s32i       <- upper bound
+//     %6 = cir.cmp(lt, %4, %5) : !s32i, !s32i
+//     %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+//     cir.condition(%7
+//  } body {
+struct hoistLoopInvariantInCondBlock : public OpRewritePattern<ForOp> {
+  using OpRewritePattern<ForOp>::OpRewritePattern;
+
+  bool isLoopInvariantLoad(Operation *op, ForOp forOp) const {
+    auto load = dyn_cast<LoadOp>(op);
+    if (!load)
+      return false;
+
+    auto loadAddr = load.getAddr();
+    auto result =
+        forOp->walk<mlir::WalkOrder::PreOrder>([&](mlir::Operation *op) {
+          if (auto store = dyn_cast<StoreOp>(op)) {
+            if (store.getAddr() == loadAddr)
+              return mlir::WalkResult::interrupt();
+          }
+          return mlir::WalkResult::advance();
+        });
+
+    if (result.wasInterrupted())
+      return false;
+
+    return true;
+  }
+
+  // Return true for loop invariant operation and push it to initOps.
+  bool isLoopInvariantOp(Operation *op, ForOp forOp,
+                         SmallVector<Operation *> &initOps) const {
+    if (!op)
+      return false;
+    if (isa<ConstantOp>(op) || isLoopInvariantLoad(op, forOp)) {
+      initOps.push_back(op);
+      return true;
+    } else if (isa<BinOp>(op) &&
+               isLoopInvariantOp(op->getOperand(0).getDefiningOp(), forOp,
+                                 initOps) &&
+               isLoopInvariantOp(op->getOperand(1).getDefiningOp(), forOp,
+                                 initOps)) {
+      initOps.push_back(op);
+      return true;
+    } else if (isa<mlir::cir::CastOp>(op) &&
+               isLoopInvariantOp(op->getOperand(0).getDefiningOp(), forOp,
+                                 initOps)) {
+      initOps.push_back(op);
+      return true;
+    }
+
+    return false;
+  }
+
+  LogicalResult matchAndRewrite(ForOp forOp,
+                                PatternRewriter &rewriter) const final {
+    auto *cond = &forOp.getCond().front();
+    auto *step =
+        (forOp.maybeGetStep() ? &forOp.maybeGetStep()->front() : nullptr);
+    if (!step)
+      return failure();
+    Value IVAddr = findIVAddr(step);
+    if (!IVAddr)
+      return failure();
+    Value IV = nullptr;
+    auto loopCmp = findLoopCmpAndIV(cond, IVAddr, IV);
+    if (!loopCmp || !IV)
+      return failure();
+
+    Value cmpRhs = loopCmp.getRhs();
+    auto defOp = cmpRhs.getDefiningOp();
+    SmallVector<Operation *> initOps;
+    // Collect loop invariant operations and move them before forOp.
+    if (isLoopInvariantOp(defOp, forOp, initOps)) {
+      for (auto op : initOps)
+        op->moveBefore(forOp);
+      return success();
+    }
+
+    return failure();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// SCFPreparePass
+//===----------------------------------------------------------------------===//
+
+struct SCFPreparePass : public SCFPrepareBase<SCFPreparePass> {
+  using SCFPrepareBase::SCFPrepareBase;
+  void runOnOperation() override;
+};
+
+void populateSCFPreparePatterns(RewritePatternSet &patterns) {
+  // clang-format off
+  patterns.add<
+    canonicalizeIVtoCmpLHS,
+    hoistLoopInvariantInCondBlock
+  >(patterns.getContext());
+  // clang-format on
+}
+
+void SCFPreparePass::runOnOperation() {
+  // Collect rewrite patterns.
+  RewritePatternSet patterns(&getContext());
+  populateSCFPreparePatterns(patterns);
+
+  // Collect operations to apply patterns.
+  SmallVector<Operation *, 16> ops;
+  getOperation()->walk([&](Operation *op) {
+    // CastOp here is to perform a manual `fold` in
+    // applyOpPatternsAndFold
+    if (isa<ForOp>(op))
+      ops.push_back(op);
+  });
+
+  // Apply patterns.
+  if (applyOpPatternsAndFold(ops, std::move(patterns)).failed())
+    signalPassFailure();
+}
+
+} // namespace
+
+std::unique_ptr<Pass> mlir::createSCFPreparePass() {
+  return std::make_unique<SCFPreparePass>();
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/StdHelpers.cpp b/clang/lib/CIR/Dialect/Transforms/StdHelpers.cpp
new file mode 100644
index 000000000000..2fbccfc7946a
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/StdHelpers.cpp
@@ -0,0 +1,32 @@
+//===- StdHelpers.cpp - Implementation standard related helpers--*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "StdHelpers.h"
+
+namespace mlir {
+namespace cir {
+
+bool isStdArrayType(mlir::Type t) {
+  auto sTy = dyn_cast<StructType>(t);
+  if (!sTy)
+    return false;
+  auto recordDecl = sTy.getAst();
+  if (!recordDecl.isInStdNamespace())
+    return false;
+
+  // TODO: only std::array supported for now, generalize and
+  // use tablegen. CallDescription.cpp in the static analyzer
+  // could be a good inspiration source too.
+  if (recordDecl.getName().compare("array") != 0)
+    return false;
+
+  return true;
+}
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/StdHelpers.h b/clang/lib/CIR/Dialect/Transforms/StdHelpers.h
new file mode 100644
index 000000000000..302272feb6bb
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/StdHelpers.h
@@ -0,0 +1,36 @@
+//===- StdHelpers.h - Helpers for standard types/functions ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Region.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/Basic/Module.h"
+#include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/Passes.h"
+#include "clang/CIR/Interfaces/ASTAttrInterfaces.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Path.h"
+
+#ifndef DIALECT_CIR_TRANSFORMS_STDHELPERS_H_
+#define DIALECT_CIR_TRANSFORMS_STDHELPERS_H_
+
+namespace mlir {
+namespace cir {
+
+bool isStdArrayType(mlir::Type t);
+
+} // namespace cir
+} // namespace mlir
+
+#endif
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfo.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfo.cpp
new file mode 100644
index 000000000000..4e2a81de9fc1
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfo.cpp
@@ -0,0 +1,46 @@
+//===- ABIInfo.cpp --------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/CodeGen/ABIInfo.cpp. The queries are
+// adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ABIInfo.h"
+#include "CIRCXXABI.h"
+#include "CIRLowerContext.h"
+#include "LowerTypes.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+
+namespace mlir {
+namespace cir {
+
+// Pin the vtable to this file.
+ABIInfo::~ABIInfo() = default;
+
+CIRCXXABI &ABIInfo::getCXXABI() const { return LT.getCXXABI(); }
+
+CIRLowerContext &ABIInfo::getContext() const { return LT.getContext(); }
+
+const clang::TargetInfo &ABIInfo::getTarget() const { return LT.getTarget(); }
+
+const ::cir::CIRDataLayout &ABIInfo::getDataLayout() const {
+  return LT.getDataLayout();
+}
+
+bool ABIInfo::isPromotableIntegerTypeForABI(Type Ty) const {
+  if (getContext().isPromotableIntegerType(Ty))
+    return true;
+
+  assert(!::cir::MissingFeatures::fixedWidthIntegers());
+
+  return false;
+}
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfo.h
new file mode 100644
index 000000000000..bbcd906e849a
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfo.h
@@ -0,0 +1,58 @@
+//===----- ABIInfo.h - CIR's ABI information --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics the CodeGen/ABIInfo.h class. The main difference
+// is that this is adapted to operate on the CIR dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_ABIINFO_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_ABIINFO_H
+
+#include "CIRCXXABI.h"
+#include "CIRLowerContext.h"
+#include "LowerFunctionInfo.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace mlir {
+namespace cir {
+
+// Forward declarations.
+class LowerTypes;
+
+/// Target specific hooks for defining how a type should be passed or returned
+/// from functions.
+class ABIInfo {
+protected:
+  LowerTypes &LT;
+  llvm::CallingConv::ID RuntimeCC;
+
+public:
+  ABIInfo(LowerTypes &LT) : LT(LT), RuntimeCC(llvm::CallingConv::C) {}
+  virtual ~ABIInfo();
+
+  CIRCXXABI &getCXXABI() const;
+
+  CIRLowerContext &getContext() const;
+
+  const clang::TargetInfo &getTarget() const;
+
+  const ::cir::CIRDataLayout &getDataLayout() const;
+
+  virtual void computeInfo(LowerFunctionInfo &FI) const = 0;
+
+  // Implement the Type::IsPromotableIntegerType for ABI specific needs. The
+  // only difference is that this considers bit-precise integer types as well.
+  bool isPromotableIntegerTypeForABI(Type Ty) const;
+};
+
+} // namespace cir
+} // namespace mlir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_ABIINFO_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp
new file mode 100644
index 000000000000..041c801dbe2e
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp
@@ -0,0 +1,57 @@
+//===--- ABIInfoImpl.cpp - Encapsulate calling convention details ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/CodeGen/ABIInfoImpl.cpp. The queries are
+// adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ABIInfo.h"
+#include "CIRCXXABI.h"
+#include "LowerFunction.h"
+#include "LowerFunctionInfo.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace mlir {
+namespace cir {
+
+bool classifyReturnType(const CIRCXXABI &CXXABI, LowerFunctionInfo &FI,
+                        const ABIInfo &Info) {
+  Type Ty = FI.getReturnType();
+
+  if (const auto RT = dyn_cast<StructType>(Ty)) {
+    assert(!::cir::MissingFeatures::isCXXRecordDecl());
+  }
+
+  return CXXABI.classifyReturnType(FI);
+}
+
+bool isAggregateTypeForABI(Type T) {
+  assert(!::cir::MissingFeatures::functionMemberPointerType());
+  return !LowerFunction::hasScalarEvaluationKind(T);
+}
+
+Type useFirstFieldIfTransparentUnion(Type Ty) {
+  if (auto RT = dyn_cast<StructType>(Ty)) {
+    if (RT.isUnion())
+      llvm_unreachable("NYI");
+  }
+  return Ty;
+}
+
+CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT,
+                                        CIRCXXABI &CXXABI) {
+  if (::cir::MissingFeatures::typeIsCXXRecordDecl()) {
+    llvm_unreachable("NYI");
+  }
+  return CXXABI.getRecordArgABI(RT);
+}
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h
new file mode 100644
index 000000000000..9e45bc4e0ecc
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h
@@ -0,0 +1,38 @@
+//===- ABIInfoImpl.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/CodeGen/ABIInfoImpl.h. The queries are
+// adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_ABIINFOIMPL_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_ABIINFOIMPL_H
+
+#include "ABIInfo.h"
+#include "CIRCXXABI.h"
+#include "LowerFunctionInfo.h"
+
+namespace mlir {
+namespace cir {
+
+bool classifyReturnType(const CIRCXXABI &CXXABI, LowerFunctionInfo &FI,
+                        const ABIInfo &Info);
+
+bool isAggregateTypeForABI(Type T);
+
+/// Pass transparent unions as if they were the type of the first element. Sema
+/// should ensure that all elements of the union have the same "machine type".
+Type useFirstFieldIfTransparentUnion(Type Ty);
+
+CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT, CIRCXXABI &CXXABI);
+
+} // namespace cir
+} // namespace mlir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_ABIINFOIMPL_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.cpp
new file mode 100644
index 000000000000..8c483469f1ce
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.cpp
@@ -0,0 +1,22 @@
+//===- CIRCXXABI.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/CodeGen/CGCXXABI.cpp. The queries are
+// adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRCXXABI.h"
+
+namespace mlir {
+namespace cir {
+
+CIRCXXABI::~CIRCXXABI() {}
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h
new file mode 100644
index 000000000000..42e666999005
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h
@@ -0,0 +1,91 @@
+//===----- CIRCXXABI.h - Interface to C++ ABIs for CIR Dialect --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics the CodeGen/CGCXXABI.h class. The main difference
+// is that this is adapted to operate on the CIR dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRCXXABI_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRCXXABI_H
+
+#include "LowerFunctionInfo.h"
+#include "mlir/IR/Value.h"
+#include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Target/AArch64.h"
+
+namespace mlir {
+namespace cir {
+
+// Forward declarations.
+class LowerModule;
+
+class CIRCXXABI {
+  friend class LowerModule;
+
+protected:
+  LowerModule &LM;
+
+  CIRCXXABI(LowerModule &LM) : LM(LM) {}
+
+public:
+  virtual ~CIRCXXABI();
+
+  /// If the C++ ABI requires the given type be returned in a particular way,
+  /// this method sets RetAI and returns true.
+  virtual bool classifyReturnType(LowerFunctionInfo &FI) const = 0;
+
+  /// Specify how one should pass an argument of a record type.
+  enum RecordArgABI {
+    /// Pass it using the normal C aggregate rules for the ABI, potentially
+    /// introducing extra copies and passing some or all of it in registers.
+    RAA_Default = 0,
+
+    /// Pass it on the stack using its defined layout.  The argument must be
+    /// evaluated directly into the correct stack position in the arguments
+    /// area,
+    /// and the call machinery must not move it or introduce extra copies.
+    RAA_DirectInMemory,
+
+    /// Pass it as a pointer to temporary memory.
+    RAA_Indirect
+  };
+
+  /// Returns how an argument of the given record type should be passed.
+  /// FIXME(cir): This expects a CXXRecordDecl! Not any record type.
+  virtual RecordArgABI getRecordArgABI(const StructType RD) const = 0;
+};
+
+/// Creates an Itanium-family ABI.
+CIRCXXABI *CreateItaniumCXXABI(LowerModule &CGM);
+
+} // namespace cir
+} // namespace mlir
+
+// FIXME(cir): Merge this into the CIRCXXABI class above. To do so, this code
+// should be updated to follow some level of codegen parity.
+namespace cir {
+
+class LoweringPrepareCXXABI {
+public:
+  static LoweringPrepareCXXABI *createItaniumABI();
+  static LoweringPrepareCXXABI *createAArch64ABI(::cir::AArch64ABIKind k);
+
+  virtual mlir::Value lowerVAArg(CIRBaseBuilderTy &builder,
+                                 mlir::cir::VAArgOp op,
+                                 const cir::CIRDataLayout &datalayout) = 0;
+  virtual ~LoweringPrepareCXXABI() {}
+
+  virtual mlir::Value lowerDynamicCast(CIRBaseBuilderTy &builder,
+                                       clang::ASTContext &astCtx,
+                                       mlir::cir::DynamicCastOp op) = 0;
+};
+} // namespace cir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRCXXABI_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.cpp
new file mode 100644
index 000000000000..42aae0a80d04
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.cpp
@@ -0,0 +1,205 @@
+//===- CIRLowerContext.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/AST/ASTContext.cpp. The queries are
+// adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRLowerContext.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Interfaces/DataLayoutInterfaces.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cmath>
+
+namespace mlir {
+namespace cir {
+
+CIRLowerContext::CIRLowerContext(ModuleOp module, clang::LangOptions LOpts)
+    : MLIRCtx(module.getContext()), LangOpts(LOpts) {}
+
+CIRLowerContext::~CIRLowerContext() {}
+
+clang::TypeInfo CIRLowerContext::getTypeInfo(Type T) const {
+  // TODO(cir): Memoize type info.
+
+  clang::TypeInfo TI = getTypeInfoImpl(T);
+  return TI;
+}
+
+/// getTypeInfoImpl - Return the size of the specified type, in bits.  This
+/// method does not work on incomplete types.
+///
+/// FIXME: Pointers into different addr spaces could have different sizes and
+/// alignment requirements: getPointerInfo should take an AddrSpace, this
+/// should take a QualType, &c.
+clang::TypeInfo CIRLowerContext::getTypeInfoImpl(const Type T) const {
+  uint64_t Width = 0;
+  unsigned Align = 8;
+  clang::AlignRequirementKind AlignRequirement =
+      clang::AlignRequirementKind::None;
+
+  // TODO(cir): We should implement a better way to identify type kinds and use
+  // builting data layout interface for this.
+  auto typeKind = clang::Type::Builtin;
+  if (isa<IntType, SingleType, DoubleType, BoolType>(T)) {
+    typeKind = clang::Type::Builtin;
+  } else if (isa<StructType>(T)) {
+    typeKind = clang::Type::Record;
+  } else {
+    llvm_unreachable("Unhandled type class");
+  }
+
+  // FIXME(cir): Here we fetch the width and alignment of a type considering the
+  // current target. We can likely improve this using MLIR's data layout, or
+  // some other interface, to abstract this away (e.g. type.getWidth() &
+  // type.getAlign()). Verify if data layout suffices because this would involve
+  // some other types such as vectors and complex numbers.
+  // FIXME(cir): In the original codegen, this receives an AST type, meaning it
+  // differs chars from integers, something that is not possible with the
+  // current level of CIR.
+  switch (typeKind) {
+  case clang::Type::Builtin: {
+    if (auto intTy = dyn_cast<IntType>(T)) {
+      // NOTE(cir): This assumes int types are already ABI-specific.
+      // FIXME(cir): Use data layout interface here instead.
+      Width = intTy.getWidth();
+      // FIXME(cir): Use the proper getABIAlignment method here.
+      Align = std::ceil((float)Width / 8) * 8;
+      break;
+    }
+    if (auto boolTy = dyn_cast<BoolType>(T)) {
+      Width = Target->getFloatWidth();
+      Align = Target->getFloatAlign();
+      break;
+    }
+    if (auto floatTy = dyn_cast<SingleType>(T)) {
+      Width = Target->getFloatWidth();
+      Align = Target->getFloatAlign();
+      break;
+    }
+    if (auto doubleTy = dyn_cast<DoubleType>(T)) {
+      Width = Target->getDoubleWidth();
+      Align = Target->getDoubleAlign();
+      break;
+    }
+    llvm_unreachable("Unknown builtin type!");
+    break;
+  }
+  case clang::Type::Record: {
+    const auto RT = dyn_cast<StructType>(T);
+    assert(!::cir::MissingFeatures::tagTypeClassAbstraction());
+
+    // Only handle TagTypes (names types) for now.
+    assert(RT.getName() && "Anonymous record is NYI");
+
+    // NOTE(cir): Clang does some hanlding of invalid tagged declarations here.
+    // Not sure if this is necessary in CIR.
+
+    if (::cir::MissingFeatures::typeGetAsEnumType()) {
+      llvm_unreachable("NYI");
+    }
+
+    const CIRRecordLayout &Layout = getCIRRecordLayout(RT);
+    Width = toBits(Layout.getSize());
+    Align = toBits(Layout.getAlignment());
+    assert(!::cir::MissingFeatures::recordDeclHasAlignmentAttr());
+    break;
+  }
+  default:
+    llvm_unreachable("Unhandled type class");
+  }
+
+  assert(llvm::isPowerOf2_32(Align) && "Alignment must be power of 2");
+  return clang::TypeInfo(Width, Align, AlignRequirement);
+}
+
+Type CIRLowerContext::initBuiltinType(clang::BuiltinType::Kind K) {
+  Type Ty;
+
+  // NOTE(cir): Clang does more stuff here. Not sure if we need to do the same.
+  assert(!::cir::MissingFeatures::qualifiedTypes());
+  switch (K) {
+  case clang::BuiltinType::Char_S:
+    Ty = IntType::get(getMLIRContext(), 8, true);
+    break;
+  default:
+    llvm_unreachable("NYI");
+  }
+
+  Types.push_back(Ty);
+  return Ty;
+}
+
+void CIRLowerContext::initBuiltinTypes(const clang::TargetInfo &Target,
+                                       const clang::TargetInfo *AuxTarget) {
+  assert((!this->Target || this->Target == &Target) &&
+         "Incorrect target reinitialization");
+  this->Target = &Target;
+  this->AuxTarget = AuxTarget;
+
+  // C99 6.2.5p3.
+  if (LangOpts.CharIsSigned)
+    CharTy = initBuiltinType(clang::BuiltinType::Char_S);
+  else
+    llvm_unreachable("NYI");
+}
+
+/// Convert a size in bits to a size in characters.
+clang::CharUnits CIRLowerContext::toCharUnitsFromBits(int64_t BitSize) const {
+  return clang::CharUnits::fromQuantity(BitSize / getCharWidth());
+}
+
+/// Convert a size in characters to a size in characters.
+int64_t CIRLowerContext::toBits(clang::CharUnits CharSize) const {
+  return CharSize.getQuantity() * getCharWidth();
+}
+
+clang::TypeInfoChars CIRLowerContext::getTypeInfoInChars(Type T) const {
+  if (auto arrTy = dyn_cast<ArrayType>(T))
+    llvm_unreachable("NYI");
+  clang::TypeInfo Info = getTypeInfo(T);
+  return clang::TypeInfoChars(toCharUnitsFromBits(Info.Width),
+                              toCharUnitsFromBits(Info.Align),
+                              Info.AlignRequirement);
+}
+
+bool CIRLowerContext::isPromotableIntegerType(Type T) const {
+  // HLSL doesn't promote all small integer types to int, it
+  // just uses the rank-based promotion rules for all types.
+  if (::cir::MissingFeatures::langOpts())
+    llvm_unreachable("NYI");
+
+  // FIXME(cir): CIR does not distinguish between char, short, etc. So we just
+  // assume it is promotable if smaller than 32 bits. This is wrong since, for
+  // example, Char32 is promotable. Improve CIR or add an AST query here.
+  if (auto intTy = dyn_cast<IntType>(T)) {
+    return cast<IntType>(T).getWidth() < 32;
+  }
+
+  // Bool are also handled here for codegen parity.
+  if (auto boolTy = dyn_cast<BoolType>(T)) {
+    return true;
+  }
+
+  // Enumerated types are promotable to their compatible integer types
+  // (C99 6.3.1.1) a.k.a. its underlying type (C++ [conv.prom]p2).
+  // TODO(cir): CIR doesn't know if a integer originated from an enum. Improve
+  // CIR or add an AST query here.
+  if (::cir::MissingFeatures::typeGetAsEnumType()) {
+    llvm_unreachable("NYI");
+  }
+
+  return false;
+}
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.h
new file mode 100644
index 000000000000..5a87f71c2bdc
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.h
@@ -0,0 +1,123 @@
+//===- CIRLowerContext.h - Context to lower CIR -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Partially mimics AST/ASTContext.h. The main difference is that this is
+// adapted to operate on the CIR dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRLowerContext_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRLowerContext_H
+
+#include "CIRRecordLayout.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/Types.h"
+#include "mlir/Interfaces/DataLayoutInterfaces.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+
+namespace mlir {
+namespace cir {
+
+// FIXME(cir): Most of this is type-related information that should already be
+// embedded into CIR. Maybe we can move this to an MLIR interface.
+class CIRLowerContext : public llvm::RefCountedBase<CIRLowerContext> {
+
+private:
+  mutable SmallVector<Type, 0> Types;
+
+  clang::TypeInfo getTypeInfoImpl(const Type T) const;
+
+  const clang::TargetInfo *Target = nullptr;
+  const clang::TargetInfo *AuxTarget = nullptr;
+
+  /// MLIR context to be used when creating types.
+  MLIRContext *MLIRCtx;
+
+  /// The language options used to create the AST associated with
+  /// this ASTContext object.
+  clang::LangOptions LangOpts;
+
+  //===--------------------------------------------------------------------===//
+  //                         Built-in Types
+  //===--------------------------------------------------------------------===//
+
+  Type CharTy;
+
+public:
+  CIRLowerContext(ModuleOp module, clang::LangOptions LOpts);
+  CIRLowerContext(const CIRLowerContext &) = delete;
+  CIRLowerContext &operator=(const CIRLowerContext &) = delete;
+  ~CIRLowerContext();
+
+  /// Initialize built-in types.
+  ///
+  /// This routine may only be invoked once for a given ASTContext object.
+  /// It is normally invoked after ASTContext construction.
+  ///
+  /// \param Target The target
+  void initBuiltinTypes(const clang::TargetInfo &Target,
+                        const clang::TargetInfo *AuxTarget = nullptr);
+
+private:
+  Type initBuiltinType(clang::BuiltinType::Kind K);
+
+public:
+  const clang::TargetInfo &getTargetInfo() const { return *Target; }
+
+  const clang::LangOptions &getLangOpts() const { return LangOpts; }
+
+  MLIRContext *getMLIRContext() const { return MLIRCtx; }
+
+  //===--------------------------------------------------------------------===//
+  //                         Type Sizing and Analysis
+  //===--------------------------------------------------------------------===//
+
+  /// Get the size and alignment of the specified complete type in bits.
+  clang::TypeInfo getTypeInfo(Type T) const;
+
+  /// Return the size of the specified (complete) type \p T, in bits.
+  uint64_t getTypeSize(Type T) const { return getTypeInfo(T).Width; }
+
+  /// Return the size of the character type, in bits.
+  // FIXME(cir): Refactor types and properly implement DataLayout interface in
+  // CIR so that this can be queried from the module.
+  uint64_t getCharWidth() const { return 8; }
+
+  /// Convert a size in bits to a size in characters.
+  clang::CharUnits toCharUnitsFromBits(int64_t BitSize) const;
+
+  /// Convert a size in characters to a size in bits.
+  int64_t toBits(clang::CharUnits CharSize) const;
+
+  clang::CharUnits getTypeSizeInChars(Type T) const {
+    // FIXME(cir): We should query MLIR's Datalayout here instead.
+    return getTypeInfoInChars(T).Width;
+  }
+
+  /// Return the ABI-specified alignment of a (complete) type \p T, in
+  /// bits.
+  unsigned getTypeAlign(Type T) const { return getTypeInfo(T).Align; }
+
+  clang::TypeInfoChars getTypeInfoInChars(Type T) const;
+
+  /// More type predicates useful for type checking/promotion
+  bool isPromotableIntegerType(Type T) const; // C99 6.3.1.1p2
+
+  /// Get or compute information about the layout of the specified
+  /// record (struct/union/class) \p D, which indicates its size and field
+  /// position information.
+  const CIRRecordLayout &getCIRRecordLayout(const Type D) const;
+};
+
+} // namespace cir
+} // namespace mlir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRLowerContext_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRRecordLayout.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRRecordLayout.cpp
new file mode 100644
index 000000000000..2744f67d19de
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRRecordLayout.cpp
@@ -0,0 +1,61 @@
+//===- CIRRecordLayout.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/AST/RecordLayout.cpp. The queries are
+// adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRRecordLayout.h"
+#include "clang/CIR/MissingFeatures.h"
+
+namespace mlir {
+namespace cir {
+
+// Constructor for C++ records.
+CIRRecordLayout::CIRRecordLayout(
+    const CIRLowerContext &Ctx, clang::CharUnits size,
+    clang::CharUnits alignment, clang::CharUnits preferredAlignment,
+    clang::CharUnits unadjustedAlignment, clang::CharUnits requiredAlignment,
+    bool hasOwnVFPtr, bool hasExtendableVFPtr, clang::CharUnits vbptroffset,
+    clang::CharUnits datasize, ArrayRef<uint64_t> fieldoffsets,
+    clang::CharUnits nonvirtualsize, clang::CharUnits nonvirtualalignment,
+    clang::CharUnits preferrednvalignment,
+    clang::CharUnits SizeOfLargestEmptySubobject, const Type PrimaryBase,
+    bool IsPrimaryBaseVirtual, const Type BaseSharingVBPtr,
+    bool EndsWithZeroSizedObject, bool LeadsWithZeroSizedBase)
+    : Size(size), DataSize(datasize), Alignment(alignment),
+      PreferredAlignment(preferredAlignment),
+      UnadjustedAlignment(unadjustedAlignment),
+      RequiredAlignment(requiredAlignment), CXXInfo(new CXXRecordLayoutInfo) {
+  // NOTE(cir): Clang does a far more elaborate append here by leveraging the
+  // custom ASTVector class. For now, we'll do a simple append.
+  FieldOffsets.insert(FieldOffsets.end(), fieldoffsets.begin(),
+                      fieldoffsets.end());
+
+  assert(!PrimaryBase && "Layout for class with inheritance is NYI");
+  // CXXInfo->PrimaryBase.setPointer(PrimaryBase);
+  assert(!IsPrimaryBaseVirtual && "Layout for virtual base class is NYI");
+  // CXXInfo->PrimaryBase.setInt(IsPrimaryBaseVirtual);
+  CXXInfo->NonVirtualSize = nonvirtualsize;
+  CXXInfo->NonVirtualAlignment = nonvirtualalignment;
+  CXXInfo->PreferredNVAlignment = preferrednvalignment;
+  CXXInfo->SizeOfLargestEmptySubobject = SizeOfLargestEmptySubobject;
+  // FIXME(cir): Initialize base classes offsets.
+  assert(!::cir::MissingFeatures::getCXXRecordBases());
+  CXXInfo->HasOwnVFPtr = hasOwnVFPtr;
+  CXXInfo->VBPtrOffset = vbptroffset;
+  CXXInfo->HasExtendableVFPtr = hasExtendableVFPtr;
+  // FIXME(cir): Probably not necessary for now.
+  // CXXInfo->BaseSharingVBPtr = BaseSharingVBPtr;
+  CXXInfo->EndsWithZeroSizedObject = EndsWithZeroSizedObject;
+  CXXInfo->LeadsWithZeroSizedBase = LeadsWithZeroSizedBase;
+}
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRRecordLayout.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRRecordLayout.h
new file mode 100644
index 000000000000..b282f32f8a9d
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRRecordLayout.h
@@ -0,0 +1,138 @@
+//===--- CGRecordLayout.h - LLVM Record Layout Information ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/CodeGen/CGRecordLayout.h. The queries
+// are adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRRECORDLAYOUT_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRRECORDLAYOUT_H
+
+#include "mlir/IR/Types.h"
+#include "mlir/Support/LLVM.h"
+#include "clang/AST/CharUnits.h"
+#include <cstdint>
+#include <vector>
+
+namespace mlir {
+namespace cir {
+
+class CIRLowerContext;
+
+// FIXME(cir): Perhaps this logic can be moved to the CIR dialect, specifically
+// the data layout abstractions.
+
+/// This class contains layout information for one RecordDecl, which is a
+/// struct/union/class.  The decl represented must be a definition, not a
+/// forward declaration. This class is also used to contain layout information
+/// for one ObjCInterfaceDecl.
+class CIRRecordLayout {
+
+private:
+  friend class CIRLowerContext;
+
+  /// Size of record in characters.
+  clang::CharUnits Size;
+
+  /// Size of record in characters without tail padding.
+  clang::CharUnits DataSize;
+
+  // Alignment of record in characters.
+  clang::CharUnits Alignment;
+
+  // Preferred alignment of record in characters. This can be different than
+  // Alignment in cases where it is beneficial for performance or backwards
+  // compatibility preserving (e.g. AIX-ABI).
+  clang::CharUnits PreferredAlignment;
+
+  // Maximum of the alignments of the record members in characters.
+  clang::CharUnits UnadjustedAlignment;
+
+  /// The required alignment of the object. In the MS-ABI the
+  /// __declspec(align()) trumps #pramga pack and must always be obeyed.
+  clang::CharUnits RequiredAlignment;
+
+  /// Array of field offsets in bits.
+  /// FIXME(cir): Create a custom CIRVector instead?
+  std::vector<uint64_t> FieldOffsets;
+
+  struct CXXRecordLayoutInfo {
+    /// The non-virtual size (in chars) of an object, which is the size of the
+    /// object without virtual bases.
+    clang::CharUnits NonVirtualSize;
+
+    /// The non-virtual alignment (in chars) of an object, which is the
+    /// alignment of the object without virtual bases.
+    clang::CharUnits NonVirtualAlignment;
+
+    /// The preferred non-virtual alignment (in chars) of an object, which is
+    /// the preferred alignment of the object without virtual bases.
+    clang::CharUnits PreferredNVAlignment;
+
+    /// The size of the largest empty subobject (either a base or a member).
+    /// Will be zero if the class doesn't contain any empty subobjects.
+    clang::CharUnits SizeOfLargestEmptySubobject;
+
+    /// Virtual base table offset (Microsoft-only).
+    clang::CharUnits VBPtrOffset;
+
+    /// Does this class provide a virtual function table (vtable in Itanium,
+    /// vftbl in Microsoft) that is independent from its base classes?
+    bool HasOwnVFPtr : 1;
+
+    /// Does this class have a vftable that could be extended by a derived
+    /// class.  The class may have inherited this pointer from a primary base
+    /// class.
+    bool HasExtendableVFPtr : 1;
+
+    /// True if this class contains a zero sized member or base or a base with a
+    /// zero sized member or base. Only used for MS-ABI.
+    bool EndsWithZeroSizedObject : 1;
+
+    /// True if this class is zero sized or first base is zero sized or has this
+    /// property.  Only used for MS-ABI.
+    bool LeadsWithZeroSizedBase : 1;
+  };
+
+  /// CXXInfo - If the record layout is for a C++ record, this will have
+  /// C++ specific information about the record.
+  CXXRecordLayoutInfo *CXXInfo = nullptr;
+
+  // Constructor for C++ records.
+  CIRRecordLayout(
+      const CIRLowerContext &Ctx, clang::CharUnits size,
+      clang::CharUnits alignment, clang::CharUnits preferredAlignment,
+      clang::CharUnits unadjustedAlignment, clang::CharUnits requiredAlignment,
+      bool hasOwnVFPtr, bool hasExtendableVFPtr, clang::CharUnits vbptroffset,
+      clang::CharUnits datasize, ArrayRef<uint64_t> fieldoffsets,
+      clang::CharUnits nonvirtualsize, clang::CharUnits nonvirtualalignment,
+      clang::CharUnits preferrednvalignment,
+      clang::CharUnits SizeOfLargestEmptySubobject, const Type PrimaryBase,
+      bool IsPrimaryBaseVirtual, const Type BaseSharingVBPtr,
+      bool EndsWithZeroSizedObject, bool LeadsWithZeroSizedBase);
+
+  ~CIRRecordLayout() = default;
+
+public:
+  /// Get the record alignment in characters.
+  clang::CharUnits getAlignment() const { return Alignment; }
+
+  /// Get the record size in characters.
+  clang::CharUnits getSize() const { return Size; }
+
+  /// Get the offset of the given field index, in bits.
+  uint64_t getFieldOffset(unsigned FieldNo) const {
+    return FieldOffsets[FieldNo];
+  }
+};
+
+} // namespace cir
+} // namespace mlir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRRECORDLAYOUT_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRToCIRArgMapping.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRToCIRArgMapping.h
new file mode 100644
index 000000000000..dd09122b94d9
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRToCIRArgMapping.h
@@ -0,0 +1,138 @@
+//===--- CIRToCIRArgMapping.cpp - Maps to ABI-specific arguments ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics the ClangToLLVMArgMapping class in
+// clang/lib/CodeGen/CGCall.cpp. The queries are adapted to operate on the CIR
+// dialect, however. This class was extracted into a separate file to resolve
+// build issues.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRTOCIRARGMAPPING_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRTOCIRARGMAPPING_H
+
+#include "CIRLowerContext.h"
+#include "LowerFunctionInfo.h"
+#include "clang/CIR/ABIArgInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace mlir {
+namespace cir {
+
+/// Encapsulates information about the way function arguments from
+/// LoweringFunctionInfo should be passed to actual CIR function.
+class CIRToCIRArgMapping {
+  static const unsigned InvalidIndex = ~0U;
+  unsigned TotalIRArgs;
+
+  /// Arguments of CIR function corresponding to single CIR argument.
+  /// NOTE(cir): We add an MLIR block argument here indicating the actual
+  /// argument in the IR.
+  struct IRArgs {
+    unsigned PaddingArgIndex;
+    // Argument is expanded to IR arguments at positions
+    // [FirstArgIndex, FirstArgIndex + NumberOfArgs).
+    unsigned FirstArgIndex;
+    unsigned NumberOfArgs;
+
+    IRArgs()
+        : PaddingArgIndex(InvalidIndex), FirstArgIndex(InvalidIndex),
+          NumberOfArgs(0) {}
+  };
+
+  llvm::SmallVector<IRArgs, 8> ArgInfo;
+
+public:
+  CIRToCIRArgMapping(const CIRLowerContext &context,
+                     const LowerFunctionInfo &FI, bool onlyRequiredArgs = false)
+      : ArgInfo(onlyRequiredArgs ? FI.getNumRequiredArgs() : FI.arg_size()) {
+    construct(context, FI, onlyRequiredArgs);
+  };
+
+  unsigned totalIRArgs() const { return TotalIRArgs; }
+
+  bool hasPaddingArg(unsigned ArgNo) const {
+    assert(ArgNo < ArgInfo.size());
+    return ArgInfo[ArgNo].PaddingArgIndex != InvalidIndex;
+  }
+
+  void construct(const CIRLowerContext &context, const LowerFunctionInfo &FI,
+                 bool onlyRequiredArgs = false) {
+    unsigned IRArgNo = 0;
+    bool SwapThisWithSRet = false;
+    const ::cir::ABIArgInfo &RetAI = FI.getReturnInfo();
+
+    if (RetAI.getKind() == ::cir::ABIArgInfo::Indirect) {
+      llvm_unreachable("NYI");
+    }
+
+    unsigned ArgNo = 0;
+    unsigned NumArgs =
+        onlyRequiredArgs ? FI.getNumRequiredArgs() : FI.arg_size();
+    for (LowerFunctionInfo::const_arg_iterator I = FI.arg_begin();
+         ArgNo < NumArgs; ++I, ++ArgNo) {
+      assert(I != FI.arg_end());
+      // Type ArgType = I->type;
+      const ::cir::ABIArgInfo &AI = I->info;
+      // Collect data about IR arguments corresponding to Clang argument ArgNo.
+      auto &IRArgs = ArgInfo[ArgNo];
+
+      if (::cir::MissingFeatures::argumentPadding()) {
+        llvm_unreachable("NYI");
+      }
+
+      switch (AI.getKind()) {
+      case ::cir::ABIArgInfo::Extend:
+      case ::cir::ABIArgInfo::Direct: {
+        // FIXME(cir): handle sseregparm someday...
+        assert(AI.getCoerceToType() && "Missing coerced type!!");
+        StructType STy = dyn_cast<StructType>(AI.getCoerceToType());
+        if (AI.isDirect() && AI.getCanBeFlattened() && STy) {
+          llvm_unreachable("NYI");
+        } else {
+          IRArgs.NumberOfArgs = 1;
+        }
+        break;
+      }
+      default:
+        llvm_unreachable("Missing ABIArgInfo::Kind");
+      }
+
+      if (IRArgs.NumberOfArgs > 0) {
+        IRArgs.FirstArgIndex = IRArgNo;
+        IRArgNo += IRArgs.NumberOfArgs;
+      }
+
+      // Skip over the sret parameter when it comes second.  We already handled
+      // it above.
+      if (IRArgNo == 1 && SwapThisWithSRet)
+        IRArgNo++;
+    }
+    assert(ArgNo == ArgInfo.size());
+
+    if (::cir::MissingFeatures::inallocaArgs()) {
+      llvm_unreachable("NYI");
+    }
+
+    TotalIRArgs = IRArgNo;
+  }
+
+  /// Returns index of first IR argument corresponding to ArgNo, and their
+  /// quantity.
+  std::pair<unsigned, unsigned> getIRArgs(unsigned ArgNo) const {
+    assert(ArgNo < ArgInfo.size());
+    return std::make_pair(ArgInfo[ArgNo].FirstArgIndex,
+                          ArgInfo[ArgNo].NumberOfArgs);
+  }
+};
+
+} // namespace cir
+} // namespace mlir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRTOCIRARGMAPPING_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt
new file mode 100644
index 000000000000..218656c3b144
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt
@@ -0,0 +1,33 @@
+add_clang_library(TargetLowering
+  ABIInfo.cpp
+  ABIInfoImpl.cpp
+  CIRCXXABI.cpp
+  CIRLowerContext.cpp
+  CIRRecordLayout.cpp
+  ItaniumCXXABI.cpp
+  LowerCall.cpp
+  LowerFunction.cpp
+  LowerModule.cpp
+  LowerTypes.cpp
+  RecordLayoutBuilder.cpp
+  TargetInfo.cpp
+  TargetLoweringInfo.cpp
+  Targets/AArch64.cpp
+  Targets/SPIR.cpp
+  Targets/X86.cpp
+  Targets/LoweringPrepareAArch64CXXABI.cpp
+  Targets/LoweringPrepareItaniumCXXABI.cpp
+
+  DEPENDS
+  clangBasic
+
+  LINK_LIBS PUBLIC
+
+  clangBasic
+  LLVMTargetParser
+  MLIRIR
+  MLIRPass
+  MLIRDLTIDialect
+  MLIRCIR
+  MLIRCIRInterfaces
+)
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ItaniumCXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ItaniumCXXABI.cpp
new file mode 100644
index 000000000000..9daba7d1a10c
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ItaniumCXXABI.cpp
@@ -0,0 +1,109 @@
+//===------- ItaniumCXXABI.cpp - Emit CIR code Itanium-specific code  -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides CIR lowering logic targeting the Itanium C++ ABI. The class in
+// this file generates structures that follow the Itanium C++ ABI, which is
+// documented at:
+//  https://itanium-cxx-abi.github.io/cxx-abi/abi.html
+//  https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html
+//
+// It also supports the closely-related ARM ABI, documented at:
+// https://developer.arm.com/documentation/ihi0041/g/
+//
+// This file partially mimics clang/lib/CodeGen/ItaniumCXXABI.cpp. The queries
+// are adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRCXXABI.h"
+#include "LowerModule.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace mlir {
+namespace cir {
+
+namespace {
+
+class ItaniumCXXABI : public CIRCXXABI {
+
+protected:
+  bool UseARMMethodPtrABI;
+  bool UseARMGuardVarABI;
+  bool Use32BitVTableOffsetABI;
+
+public:
+  ItaniumCXXABI(LowerModule &LM, bool UseARMMethodPtrABI = false,
+                bool UseARMGuardVarABI = false)
+      : CIRCXXABI(LM), UseARMMethodPtrABI(UseARMMethodPtrABI),
+        UseARMGuardVarABI(UseARMGuardVarABI), Use32BitVTableOffsetABI(false) {}
+
+  bool classifyReturnType(LowerFunctionInfo &FI) const override;
+
+  // FIXME(cir): This expects a CXXRecordDecl! Not any record type.
+  RecordArgABI getRecordArgABI(const StructType RD) const override {
+    assert(!::cir::MissingFeatures::recordDeclIsCXXDecl());
+    // If C++ prohibits us from making a copy, pass by address.
+    assert(!::cir::MissingFeatures::recordDeclCanPassInRegisters());
+    return RAA_Default;
+  }
+};
+
+} // namespace
+
+bool ItaniumCXXABI::classifyReturnType(LowerFunctionInfo &FI) const {
+  const StructType RD = dyn_cast<StructType>(FI.getReturnType());
+  if (!RD)
+    return false;
+
+  // If C++ prohibits us from making a copy, return by address.
+  if (::cir::MissingFeatures::recordDeclCanPassInRegisters())
+    llvm_unreachable("NYI");
+
+  return false;
+}
+
+CIRCXXABI *CreateItaniumCXXABI(LowerModule &LM) {
+  switch (LM.getCXXABIKind()) {
+  // Note that AArch64 uses the generic ItaniumCXXABI class since it doesn't
+  // include the other 32-bit ARM oddities: constructor/destructor return values
+  // and array cookies.
+  case clang::TargetCXXABI::GenericAArch64:
+  case clang::TargetCXXABI::AppleARM64:
+    // TODO: this isn't quite right, clang uses AppleARM64CXXABI which inherits
+    // from ARMCXXABI. We'll have to follow suit.
+    assert(!::cir::MissingFeatures::appleArm64CXXABI());
+    return new ItaniumCXXABI(LM, /*UseARMMethodPtrABI=*/true,
+                             /*UseARMGuardVarABI=*/true);
+
+  case clang::TargetCXXABI::GenericItanium:
+    if (LM.getTargetInfo().getTriple().getArch() == llvm::Triple::le32) {
+      llvm_unreachable("NYI");
+    }
+    return new ItaniumCXXABI(LM);
+
+  case clang::TargetCXXABI::Microsoft:
+    llvm_unreachable("Microsoft ABI is not Itanium-based");
+  default:
+    llvm_unreachable("NYI");
+  }
+
+  llvm_unreachable("bad ABI kind");
+}
+
+} // namespace cir
+} // namespace mlir
+
+// FIXME(cir): Merge this into the CIRCXXABI class above.
+class LoweringPrepareItaniumCXXABI : public cir::LoweringPrepareCXXABI {
+public:
+  mlir::Value lowerDynamicCast(cir::CIRBaseBuilderTy &builder,
+                               clang::ASTContext &astCtx,
+                               mlir::cir::DynamicCastOp op) override;
+  mlir::Value lowerVAArg(cir::CIRBaseBuilderTy &builder, mlir::cir::VAArgOp op,
+                         const cir::CIRDataLayout &datalayout) override;
+};
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerCall.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerCall.cpp
new file mode 100644
index 000000000000..42de07ec6965
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerCall.cpp
@@ -0,0 +1,339 @@
+#include "LowerCall.h"
+#include "CIRToCIRArgMapping.h"
+#include "LowerFunctionInfo.h"
+#include "LowerModule.h"
+#include "LowerTypes.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/FnInfoOpts.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace mlir;
+using namespace mlir::cir;
+
+using ABIArgInfo = ::cir::ABIArgInfo;
+using FnInfoOpts = ::cir::FnInfoOpts;
+using MissingFeatures = ::cir::MissingFeatures;
+
+namespace {
+
+/// Arrange a call as unto a free function, except possibly with an
+/// additional number of formal parameters considered required.
+const LowerFunctionInfo &
+arrangeFreeFunctionLikeCall(LowerTypes &LT, LowerModule &LM,
+                            const OperandRange &args, const FuncType fnType,
+                            unsigned numExtraRequiredArgs, bool chainCall) {
+  assert(args.size() >= numExtraRequiredArgs);
+
+  assert(!::cir::MissingFeatures::extParamInfo());
+
+  // In most cases, there are no optional arguments.
+  RequiredArgs required = RequiredArgs::All;
+
+  // If we have a variadic prototype, the required arguments are the
+  // extra prefix plus the arguments in the prototype.
+  // FIXME(cir): Properly check if function is no-proto.
+  if (/*IsPrototypedFunction=*/true) {
+    if (fnType.isVarArg())
+      llvm_unreachable("NYI");
+
+    if (::cir::MissingFeatures::extParamInfo())
+      llvm_unreachable("NYI");
+  }
+
+  // TODO(cir): There's some CC stuff related to no-proto functions here, but
+  // its skipped here since it requires CodeGen info. Maybe this information
+  // could be embbed  in the FuncOp during CIRGen.
+
+  assert(!::cir::MissingFeatures::chainCall() && !chainCall && "NYI");
+  FnInfoOpts opts = chainCall ? FnInfoOpts::IsChainCall : FnInfoOpts::None;
+  return LT.arrangeLLVMFunctionInfo(fnType.getReturnType(), opts,
+                                    fnType.getInputs(), required);
+}
+
+/// Adds the formal parameters in FPT to the given prefix. If any parameter in
+/// FPT has pass_object_size attrs, then we'll add parameters for those, too.
+static void appendParameterTypes(SmallVectorImpl<Type> &prefix, FuncType fnTy) {
+  // Fast path: don't touch param info if we don't need to.
+  if (/*!fnTy->hasExtParameterInfos()=*/true) {
+    prefix.append(fnTy.getInputs().begin(), fnTy.getInputs().end());
+    return;
+  }
+
+  assert(MissingFeatures::extParamInfo());
+  llvm_unreachable("NYI");
+}
+
+/// Arrange the LLVM function layout for a value of the given function
+/// type, on top of any implicit parameters already stored.
+///
+/// \param CGT - Abstraction for lowering CIR types.
+/// \param instanceMethod - Whether the function is an instance method.
+/// \param prefix - List of implicit parameters to be prepended (e.g. 'this').
+/// \param FTP - ABI-agnostic function type.
+static const LowerFunctionInfo &
+arrangeCIRFunctionInfo(LowerTypes &CGT, bool instanceMethod,
+                       SmallVectorImpl<mlir::Type> &prefix, FuncType fnTy) {
+  assert(!MissingFeatures::extParamInfo());
+  RequiredArgs Required = RequiredArgs::forPrototypePlus(fnTy, prefix.size());
+  // FIXME: Kill copy.
+  appendParameterTypes(prefix, fnTy);
+  assert(!MissingFeatures::qualifiedTypes());
+  Type resultType = fnTy.getReturnType();
+
+  FnInfoOpts opts =
+      instanceMethod ? FnInfoOpts::IsInstanceMethod : FnInfoOpts::None;
+  return CGT.arrangeLLVMFunctionInfo(resultType, opts, prefix, Required);
+}
+
+} // namespace
+
+/// Update function with ABI-specific attributes.
+///
+/// NOTE(cir): Partially copies CodeGenModule::ConstructAttributeList, but
+/// focuses on ABI/Target-related attributes.
+void LowerModule::constructAttributeList(StringRef Name,
+                                         const LowerFunctionInfo &FI,
+                                         FuncOp CalleeInfo, FuncOp newFn,
+                                         unsigned &CallingConv,
+                                         bool AttrOnCallSite, bool IsThunk) {
+  // Collect function IR attributes from the CC lowering.
+  // We'll collect the paramete and result attributes later.
+  // FIXME(cir): Codegen differentiates between CallConv and EffectiveCallConv,
+  // but I don't think we need to do this here.
+  CallingConv = FI.getCallingConvention();
+  // FIXME(cir): No-return should probably be set in CIRGen (ABI-agnostic).
+  if (MissingFeatures::noReturn())
+    llvm_unreachable("NYI");
+  if (MissingFeatures::csmeCall())
+    llvm_unreachable("NYI");
+
+  // TODO(cir): Implement AddAttributesFromFunctionProtoType here.
+  // TODO(cir): Implement AddAttributesFromOMPAssumes here.
+  assert(!MissingFeatures::openMP());
+
+  // TODO(cir): Skipping a bunch of AST queries here. We will need to partially
+  // implement some of them as this section sets target-specific attributes
+  // too.
+  // if (TargetDecl) {
+  //   [...]
+  // }
+
+  // NOTE(cir): The original code adds default and no-builtin attributes here as
+  // well. These are ABI/Target-agnostic, so it would be better handled in
+  // CIRGen.
+
+  // Override some default IR attributes based on declaration-specific
+  // information.
+  // NOTE(cir): Skipping another set of AST queries here.
+
+  // Collect attributes from arguments and return values.
+  CIRToCIRArgMapping IRFunctionArgs(getContext(), FI);
+
+  const ABIArgInfo &RetAI = FI.getReturnInfo();
+
+  // TODO(cir): No-undef attribute for return values partially depends on
+  // ABI-specific information. Maybe we should include it here.
+
+  switch (RetAI.getKind()) {
+  case ABIArgInfo::Extend:
+    if (RetAI.isSignExt())
+      newFn.setResultAttr(0, CIRDialect::getSExtAttrName(),
+                          rewriter.getUnitAttr());
+    else
+      // FIXME(cir): Add a proper abstraction to create attributes.
+      newFn.setResultAttr(0, CIRDialect::getZExtAttrName(),
+                          rewriter.getUnitAttr());
+    [[fallthrough]];
+  case ABIArgInfo::Direct:
+    if (RetAI.getInReg())
+      llvm_unreachable("InReg attribute is NYI");
+    assert(!::cir::MissingFeatures::noFPClass());
+    break;
+  case ABIArgInfo::Ignore:
+    break;
+  default:
+    llvm_unreachable("Missing ABIArgInfo::Kind");
+  }
+
+  if (!IsThunk) {
+    if (MissingFeatures::qualTypeIsReferenceType()) {
+      llvm_unreachable("NYI");
+    }
+  }
+
+  // Attach attributes to sret.
+  if (MissingFeatures::sretArgs()) {
+    llvm_unreachable("sret is NYI");
+  }
+
+  // Attach attributes to inalloca arguments.
+  if (MissingFeatures::inallocaArgs()) {
+    llvm_unreachable("inalloca is NYI");
+  }
+
+  // Apply `nonnull`, `dereferencable(N)` and `align N` to the `this` argument,
+  // unless this is a thunk function.
+  // FIXME: fix this properly, https://reviews.llvm.org/D100388
+  if (MissingFeatures::funcDeclIsCXXMethodDecl() ||
+      MissingFeatures::inallocaArgs()) {
+    llvm_unreachable("`this` argument attributes are NYI");
+  }
+
+  unsigned ArgNo = 0;
+  for (LowerFunctionInfo::const_arg_iterator I = FI.arg_begin(),
+                                             E = FI.arg_end();
+       I != E; ++I, ++ArgNo) {
+    // Type ParamType = I->type;
+    const ABIArgInfo &AI = I->info;
+    SmallVector<NamedAttribute> Attrs;
+
+    // Add attribute for padding argument, if necessary.
+    if (IRFunctionArgs.hasPaddingArg(ArgNo)) {
+      llvm_unreachable("Padding argument is NYI");
+    }
+
+    // TODO(cir): Mark noundef arguments and return values. Although this
+    // attribute is not a part of the call conve, it uses it to determine if a
+    // value is noundef (e.g. if an argument is passed direct, indirectly, etc).
+
+    // 'restrict' -> 'noalias' is done in EmitFunctionProlog when we
+    // have the corresponding parameter variable.  It doesn't make
+    // sense to do it here because parameters are so messed up.
+    switch (AI.getKind()) {
+    case ABIArgInfo::Extend:
+      if (AI.isSignExt())
+        Attrs.push_back(
+            rewriter.getNamedAttr("cir.signext", rewriter.getUnitAttr()));
+      else
+        // FIXME(cir): Add a proper abstraction to create attributes.
+        Attrs.push_back(
+            rewriter.getNamedAttr("cir.zeroext", rewriter.getUnitAttr()));
+      [[fallthrough]];
+    case ABIArgInfo::Direct:
+      if (ArgNo == 0 && ::cir::MissingFeatures::chainCall())
+        llvm_unreachable("ChainCall is NYI");
+      else if (AI.getInReg())
+        llvm_unreachable("InReg attribute is NYI");
+      // Attrs.addStackAlignmentAttr(llvm::MaybeAlign(AI.getDirectAlign()));
+      assert(!::cir::MissingFeatures::noFPClass());
+      break;
+    default:
+      llvm_unreachable("Missing ABIArgInfo::Kind");
+    }
+
+    if (::cir::MissingFeatures::qualTypeIsReferenceType()) {
+      llvm_unreachable("Reference handling is NYI");
+    }
+
+    // TODO(cir): Missing some swift and nocapture stuff here.
+    assert(!::cir::MissingFeatures::extParamInfo());
+
+    if (!Attrs.empty()) {
+      unsigned FirstIRArg, NumIRArgs;
+      std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
+      for (unsigned i = 0; i < NumIRArgs; i++)
+        newFn.setArgAttrs(FirstIRArg + i, Attrs);
+    }
+  }
+  assert(ArgNo == FI.arg_size());
+}
+
+/// Arrange the argument and result information for the declaration or
+/// definition of the given function.
+const LowerFunctionInfo &LowerTypes::arrangeFunctionDeclaration(FuncOp fnOp) {
+  if (MissingFeatures::funcDeclIsCXXMethodDecl())
+    llvm_unreachable("NYI");
+
+  assert(!MissingFeatures::qualifiedTypes());
+  FuncType FTy = fnOp.getFunctionType();
+
+  assert(!MissingFeatures::CUDA());
+
+  // When declaring a function without a prototype, always use a
+  // non-variadic type.
+  if (fnOp.getNoProto()) {
+    llvm_unreachable("NYI");
+  }
+
+  return arrangeFreeFunctionType(FTy);
+}
+
+/// Figure out the rules for calling a function with the given formal
+/// type using the given arguments.  The arguments are necessary
+/// because the function might be unprototyped, in which case it's
+/// target-dependent in crazy ways.
+const LowerFunctionInfo &
+LowerTypes::arrangeFreeFunctionCall(const OperandRange args,
+                                    const FuncType fnType, bool chainCall) {
+  return arrangeFreeFunctionLikeCall(*this, LM, args, fnType, chainCall ? 1 : 0,
+                                     chainCall);
+}
+
+/// Arrange the argument and result information for the declaration or
+/// definition of the given function.
+const LowerFunctionInfo &LowerTypes::arrangeFreeFunctionType(FuncType FTy) {
+  SmallVector<mlir::Type, 16> argTypes;
+  return ::arrangeCIRFunctionInfo(*this, /*instanceMethod=*/false, argTypes,
+                                  FTy);
+}
+
+/// Arrange the argument and result information for the declaration or
+/// definition of the given function.
+const LowerFunctionInfo &LowerTypes::arrangeGlobalDeclaration(FuncOp fnOp) {
+  if (MissingFeatures::funcDeclIsCXXConstructorDecl() ||
+      MissingFeatures::funcDeclIsCXXDestructorDecl())
+    llvm_unreachable("NYI");
+
+  return arrangeFunctionDeclaration(fnOp);
+}
+
+/// Arrange the argument and result information for an abstract value
+/// of a given function type.  This is the method which all of the
+/// above functions ultimately defer to.
+///
+/// \param resultType - ABI-agnostic CIR result type.
+/// \param opts - Options to control the arrangement.
+/// \param argTypes - ABI-agnostic CIR argument types.
+/// \param required - Information about required/optional arguments.
+const LowerFunctionInfo &
+LowerTypes::arrangeLLVMFunctionInfo(Type resultType, FnInfoOpts opts,
+                                    ArrayRef<Type> argTypes,
+                                    RequiredArgs required) {
+  assert(!::cir::MissingFeatures::qualifiedTypes());
+
+  LowerFunctionInfo *FI = nullptr;
+
+  // FIXME(cir): Allow user-defined CCs (e.g. __attribute__((vectorcall))).
+  assert(!::cir::MissingFeatures::extParamInfo());
+  unsigned CC = clangCallConvToLLVMCallConv(clang::CallingConv::CC_C);
+
+  // Construct the function info. We co-allocate the ArgInfos.
+  // NOTE(cir): This initial function info might hold incorrect data.
+  FI = LowerFunctionInfo::create(
+      CC, /*isInstanceMethod=*/false, /*isChainCall=*/false,
+      /*isDelegateCall=*/false, resultType, argTypes, required);
+
+  // Compute ABI information.
+  if (CC == llvm::CallingConv::SPIR_KERNEL) {
+    llvm_unreachable("NYI");
+  } else if (::cir::MissingFeatures::extParamInfo()) {
+    llvm_unreachable("NYI");
+  } else {
+    // NOTE(cir): This corects the initial function info data.
+    getABIInfo().computeInfo(*FI); // FIXME(cir): Args should be set to null.
+  }
+
+  // Loop over all of the computed argument and return value info. If any of
+  // them are direct or extend without a specified coerce type, specify the
+  // default now.
+  ::cir::ABIArgInfo &retInfo = FI->getReturnInfo();
+  if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == nullptr)
+    retInfo.setCoerceToType(convertType(FI->getReturnType()));
+
+  for (auto &I : FI->arguments())
+    if (I.info.canHaveCoerceToType() && I.info.getCoerceToType() == nullptr)
+      I.info.setCoerceToType(convertType(I.type));
+
+  return *FI;
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerCall.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerCall.h
new file mode 100644
index 000000000000..b579f96fb436
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerCall.h
@@ -0,0 +1,52 @@
+//===----- LowerCall.h - Encapsulate calling convention details -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/CodeGen/CGCall.h. The queries are
+// adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERCALL_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERCALL_H
+
+#include "mlir/IR/Value.h"
+
+namespace mlir {
+namespace cir {
+
+/// Contains the address where the return value of a function can be stored, and
+/// whether the address is volatile or not.
+class ReturnValueSlot {
+  // FIXME(cir): We should be able to query this directly from CIR at some
+  // point. This class can then be removed.
+  Value Addr = {};
+
+  // Return value slot flags
+  unsigned IsVolatile : 1;
+  unsigned IsUnused : 1;
+  unsigned IsExternallyDestructed : 1;
+
+public:
+  ReturnValueSlot()
+      : IsVolatile(false), IsUnused(false), IsExternallyDestructed(false) {}
+  ReturnValueSlot(Value Addr, bool IsVolatile, bool IsUnused = false,
+                  bool IsExternallyDestructed = false)
+      : Addr(Addr), IsVolatile(IsVolatile), IsUnused(IsUnused),
+        IsExternallyDestructed(IsExternallyDestructed) {}
+
+  bool isNull() const { return !Addr; }
+  bool isVolatile() const { return IsVolatile; }
+  Value getValue() const { return Addr; }
+  bool isUnused() const { return IsUnused; }
+  bool isExternallyDestructed() const { return IsExternallyDestructed; }
+};
+
+} // namespace cir
+} // namespace mlir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERCALL_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.cpp
new file mode 100644
index 000000000000..9e90c44a7d76
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.cpp
@@ -0,0 +1,906 @@
+//===--- LowerFunction.cpp - Lower CIR Function Code ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/CodeGen/CodeGenFunction.cpp. The queries
+// are adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LowerFunction.h"
+#include "CIRToCIRArgMapping.h"
+#include "LowerCall.h"
+#include "LowerFunctionInfo.h"
+#include "LowerModule.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LogicalResult.h"
+#include "clang/CIR/ABIArgInfo.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "clang/CIR/TypeEvaluationKind.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using ABIArgInfo = ::cir::ABIArgInfo;
+
+namespace mlir {
+namespace cir {
+
+namespace {
+
+Value buildAddressAtOffset(LowerFunction &LF, Value addr,
+                           const ABIArgInfo &info) {
+  if (unsigned offset = info.getDirectOffset()) {
+    llvm_unreachable("NYI");
+  }
+  return addr;
+}
+
+/// Given a struct pointer that we are accessing some number of bytes out of it,
+/// try to gep into the struct to get at its inner goodness.  Dive as deep as
+/// possible without entering an element with an in-memory size smaller than
+/// DstSize.
+Value enterStructPointerForCoercedAccess(Value SrcPtr, StructType SrcSTy,
+                                         uint64_t DstSize, LowerFunction &CGF) {
+  // We can't dive into a zero-element struct.
+  if (SrcSTy.getNumElements() == 0)
+    llvm_unreachable("NYI");
+
+  Type FirstElt = SrcSTy.getMembers()[0];
+
+  // If the first elt is at least as large as what we're looking for, or if the
+  // first element is the same size as the whole struct, we can enter it. The
+  // comparison must be made on the store size and not the alloca size. Using
+  // the alloca size may overstate the size of the load.
+  uint64_t FirstEltSize = CGF.LM.getDataLayout().getTypeStoreSize(FirstElt);
+  if (FirstEltSize < DstSize &&
+      FirstEltSize < CGF.LM.getDataLayout().getTypeStoreSize(SrcSTy))
+    return SrcPtr;
+
+  llvm_unreachable("NYI");
+}
+
+/// Create a store to \param Dst from \param Src where the source and
+/// destination may have different types.
+///
+/// This safely handles the case when the src type is larger than the
+/// destination type; the upper bits of the src will be lost.
+void createCoercedStore(Value Src, Value Dst, bool DstIsVolatile,
+                        LowerFunction &CGF) {
+  Type SrcTy = Src.getType();
+  Type DstTy = Dst.getType();
+  if (SrcTy == DstTy) {
+    llvm_unreachable("NYI");
+  }
+
+  // FIXME(cir): We need a better way to handle datalayout queries.
+  assert(isa<IntType>(SrcTy));
+  llvm::TypeSize SrcSize = CGF.LM.getDataLayout().getTypeAllocSize(SrcTy);
+
+  if (StructType DstSTy = dyn_cast<StructType>(DstTy)) {
+    Dst = enterStructPointerForCoercedAccess(Dst, DstSTy,
+                                             SrcSize.getFixedValue(), CGF);
+    assert(isa<PointerType>(Dst.getType()));
+    DstTy = cast<PointerType>(Dst.getType()).getPointee();
+  }
+
+  PointerType SrcPtrTy = dyn_cast<PointerType>(SrcTy);
+  PointerType DstPtrTy = dyn_cast<PointerType>(DstTy);
+  // TODO(cir): Implement address space.
+  if (SrcPtrTy && DstPtrTy && !::cir::MissingFeatures::addressSpace()) {
+    llvm_unreachable("NYI");
+  }
+
+  // If the source and destination are integer or pointer types, just do an
+  // extension or truncation to the desired type.
+  if ((isa<IntegerType>(SrcTy) || isa<PointerType>(SrcTy)) &&
+      (isa<IntegerType>(DstTy) || isa<PointerType>(DstTy))) {
+    llvm_unreachable("NYI");
+  }
+
+  llvm::TypeSize DstSize = CGF.LM.getDataLayout().getTypeAllocSize(DstTy);
+
+  // If store is legal, just bitcast the src pointer.
+  assert(!::cir::MissingFeatures::vectorType());
+  if (SrcSize.getFixedValue() <= DstSize.getFixedValue()) {
+    // Dst = Dst.withElementType(SrcTy);
+    CGF.buildAggregateStore(Src, Dst, DstIsVolatile);
+  } else {
+    llvm_unreachable("NYI");
+  }
+}
+
+// FIXME(cir): Create a custom rewriter class to abstract this away.
+Value createBitcast(Value Src, Type Ty, LowerFunction &LF) {
+  return LF.getRewriter().create<CastOp>(Src.getLoc(), Ty, CastKind::bitcast,
+                                         Src);
+}
+
+/// Coerces a \param Src value to a value of type \param Ty.
+///
+/// This safely handles the case when the src type is smaller than the
+/// destination type; in this situation the values of bits which not present in
+/// the src are undefined.
+///
+/// NOTE(cir): This method has partial parity with CGCall's CreateCoercedLoad.
+/// Unlike the original codegen, this function does not emit a coerced load
+/// since CIR's type checker wouldn't allow it. Instead, it casts the existing
+/// ABI-agnostic value to it's ABI-aware counterpart. Nevertheless, we should
+/// try to follow the same logic as the original codegen for correctness.
+Value createCoercedValue(Value Src, Type Ty, LowerFunction &CGF) {
+  Type SrcTy = Src.getType();
+
+  // If SrcTy and Ty are the same, just reuse the exising load.
+  if (SrcTy == Ty)
+    return Src;
+
+  // If it is the special boolean case, simply bitcast it.
+  if ((isa<BoolType>(SrcTy) && isa<IntType>(Ty)) ||
+      (isa<IntType>(SrcTy) && isa<BoolType>(Ty)))
+    return createBitcast(Src, Ty, CGF);
+
+  llvm::TypeSize DstSize = CGF.LM.getDataLayout().getTypeAllocSize(Ty);
+
+  if (auto SrcSTy = dyn_cast<StructType>(SrcTy)) {
+    Src = enterStructPointerForCoercedAccess(Src, SrcSTy,
+                                             DstSize.getFixedValue(), CGF);
+    SrcTy = Src.getType();
+  }
+
+  llvm::TypeSize SrcSize = CGF.LM.getDataLayout().getTypeAllocSize(SrcTy);
+
+  // If the source and destination are integer or pointer types, just do an
+  // extension or truncation to the desired type.
+  if ((isa<IntType>(Ty) || isa<PointerType>(Ty)) &&
+      (isa<IntType>(SrcTy) || isa<PointerType>(SrcTy))) {
+    llvm_unreachable("NYI");
+  }
+
+  // If load is legal, just bitcast the src pointer.
+  if (!SrcSize.isScalable() && !DstSize.isScalable() &&
+      SrcSize.getFixedValue() >= DstSize.getFixedValue()) {
+    // Generally SrcSize is never greater than DstSize, since this means we are
+    // losing bits. However, this can happen in cases where the structure has
+    // additional padding, for example due to a user specified alignment.
+    //
+    // FIXME: Assert that we aren't truncating non-padding bits when have access
+    // to that information.
+    // Src = Src.withElementType();
+    return CGF.buildAggregateBitcast(Src, Ty);
+  }
+
+  llvm_unreachable("NYI");
+}
+
+Value emitAddressAtOffset(LowerFunction &LF, Value addr,
+                          const ABIArgInfo &info) {
+  if (unsigned offset = info.getDirectOffset()) {
+    llvm_unreachable("NYI");
+  }
+  return addr;
+}
+
+/// After the calling convention is lowered, an ABI-agnostic type might have to
+/// be loaded back to its ABI-aware couterpart so it may be returned. If they
+/// differ, we have to do a coerced load. A coerced load, which means to load a
+/// type to another despite that they represent the same value. The simplest
+/// cases can be solved with a mere bitcast.
+///
+/// This partially replaces CreateCoercedLoad from the original codegen.
+/// However, instead of emitting the load, it emits a cast.
+///
+/// FIXME(cir): Improve parity with the original codegen.
+Value castReturnValue(Value Src, Type Ty, LowerFunction &LF) {
+  Type SrcTy = Src.getType();
+
+  // If SrcTy and Ty are the same, nothing to do.
+  if (SrcTy == Ty)
+    return Src;
+
+  // If is the special boolean case, simply bitcast it.
+  if (isa<BoolType>(SrcTy) && isa<IntType>(Ty))
+    return createBitcast(Src, Ty, LF);
+
+  llvm::TypeSize DstSize = LF.LM.getDataLayout().getTypeAllocSize(Ty);
+
+  // FIXME(cir): Do we need the EnterStructPointerForCoercedAccess routine here?
+
+  llvm::TypeSize SrcSize = LF.LM.getDataLayout().getTypeAllocSize(SrcTy);
+
+  if ((isa<IntType>(Ty) || isa<PointerType>(Ty)) &&
+      (isa<IntType>(SrcTy) || isa<PointerType>(SrcTy))) {
+    llvm_unreachable("NYI");
+  }
+
+  // If load is legal, just bitcast the src pointer.
+  if (!SrcSize.isScalable() && !DstSize.isScalable() &&
+      SrcSize.getFixedValue() >= DstSize.getFixedValue()) {
+    // Generally SrcSize is never greater than DstSize, since this means we are
+    // losing bits. However, this can happen in cases where the structure has
+    // additional padding, for example due to a user specified alignment.
+    //
+    // FIXME: Assert that we aren't truncating non-padding bits when have access
+    // to that information.
+    return LF.getRewriter().create<CastOp>(Src.getLoc(), Ty, CastKind::bitcast,
+                                           Src);
+  }
+
+  llvm_unreachable("NYI");
+}
+
+} // namespace
+
+// FIXME(cir): Pass SrcFn and NewFn around instead of having then as attributes.
+LowerFunction::LowerFunction(LowerModule &LM, PatternRewriter &rewriter,
+                             FuncOp srcFn, FuncOp newFn)
+    : Target(LM.getTarget()), rewriter(rewriter), SrcFn(srcFn), NewFn(newFn),
+      LM(LM) {}
+
+LowerFunction::LowerFunction(LowerModule &LM, PatternRewriter &rewriter,
+                             FuncOp srcFn, CallOp callOp)
+    : Target(LM.getTarget()), rewriter(rewriter), SrcFn(srcFn), callOp(callOp),
+      LM(LM) {}
+
+/// This method has partial parity with CodeGenFunction::EmitFunctionProlog from
+/// the original codegen. However, it focuses on the ABI-specific details. On
+/// top of that, it is also responsible for rewriting the original function.
+LogicalResult
+LowerFunction::buildFunctionProlog(const LowerFunctionInfo &FI, FuncOp Fn,
+                                   MutableArrayRef<BlockArgument> Args) {
+  // NOTE(cir): Skipping naked and implicit-return-zero functions here. These
+  // are dealt with in CIRGen.
+
+  CIRToCIRArgMapping IRFunctionArgs(LM.getContext(), FI);
+  assert(Fn.getNumArguments() == IRFunctionArgs.totalIRArgs());
+
+  // If we're using inalloca, all the memory arguments are GEPs off of the last
+  // parameter, which is a pointer to the complete memory area.
+  assert(!::cir::MissingFeatures::inallocaArgs());
+
+  // Name the struct return parameter.
+  assert(!::cir::MissingFeatures::sretArgs());
+
+  // Track if we received the parameter as a pointer (indirect, byval, or
+  // inalloca). If already have a pointer, EmitParmDecl doesn't need to copy it
+  // into a local alloca for us.
+  SmallVector<Value, 8> ArgVals;
+  ArgVals.reserve(Args.size());
+
+  // Create a pointer value for every parameter declaration. This usually
+  // entails copying one or more LLVM IR arguments into an alloca. Don't push
+  // any cleanups or do anything that might unwind. We do that separately, so
+  // we can push the cleanups in the correct order for the ABI.
+  assert(FI.arg_size() == Args.size());
+  unsigned ArgNo = 0;
+  LowerFunctionInfo::const_arg_iterator info_it = FI.arg_begin();
+  for (MutableArrayRef<BlockArgument>::const_iterator i = Args.begin(),
+                                                      e = Args.end();
+       i != e; ++i, ++info_it, ++ArgNo) {
+    const Value Arg = *i;
+    const ABIArgInfo &ArgI = info_it->info;
+
+    bool isPromoted = ::cir::MissingFeatures::varDeclIsKNRPromoted();
+    // We are converting from ABIArgInfo type to VarDecl type directly, unless
+    // the parameter is promoted. In this case we convert to
+    // CGFunctionInfo::ArgInfo type with subsequent argument demotion.
+    Type Ty = {};
+    if (isPromoted)
+      llvm_unreachable("NYI");
+    else
+      Ty = Arg.getType();
+    assert(!::cir::MissingFeatures::evaluationKind());
+
+    unsigned FirstIRArg, NumIRArgs;
+    std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
+
+    switch (ArgI.getKind()) {
+    case ABIArgInfo::Extend:
+    case ABIArgInfo::Direct: {
+      auto AI = Fn.getArgument(FirstIRArg);
+      Type LTy = Arg.getType();
+
+      // Prepare parameter attributes. So far, only attributes for pointer
+      // parameters are prepared. See
+      // http://llvm.org/docs/LangRef.html#paramattrs.
+      if (ArgI.getDirectOffset() == 0 && isa<PointerType>(LTy) &&
+          isa<PointerType>(ArgI.getCoerceToType())) {
+        llvm_unreachable("NYI");
+      }
+
+      // Prepare the argument value. If we have the trivial case, handle it
+      // with no muss and fuss.
+      if (!isa<StructType>(ArgI.getCoerceToType()) &&
+          ArgI.getCoerceToType() == Ty && ArgI.getDirectOffset() == 0) {
+        assert(NumIRArgs == 1);
+
+        // LLVM expects swifterror parameters to be used in very restricted
+        // ways. Copy the value into a less-restricted temporary.
+        Value V = AI;
+        if (::cir::MissingFeatures::extParamInfo()) {
+          llvm_unreachable("NYI");
+        }
+
+        // Ensure the argument is the correct type.
+        if (V.getType() != ArgI.getCoerceToType())
+          llvm_unreachable("NYI");
+
+        if (isPromoted)
+          llvm_unreachable("NYI");
+
+        ArgVals.push_back(V);
+
+        // NOTE(cir): Here we have a trivial case, which means we can just
+        // replace all uses of the original argument with the new one.
+        Value oldArg = SrcFn.getArgument(ArgNo);
+        Value newArg = Fn.getArgument(FirstIRArg);
+        rewriter.replaceAllUsesWith(oldArg, newArg);
+
+        break;
+      }
+
+      assert(!::cir::MissingFeatures::vectorType());
+
+      // Allocate original argument to be "uncoerced".
+      // FIXME(cir): We should have a alloca op builder that does not required
+      // the pointer type to be explicitly passed.
+      // FIXME(cir): Get the original name of the argument, as well as the
+      // proper alignment for the given type being allocated.
+      auto Alloca = rewriter.create<AllocaOp>(
+          Fn.getLoc(), rewriter.getType<PointerType>(Ty), Ty,
+          /*name=*/StringRef(""),
+          /*alignment=*/rewriter.getI64IntegerAttr(4));
+
+      Value Ptr = buildAddressAtOffset(*this, Alloca.getResult(), ArgI);
+
+      // Fast-isel and the optimizer generally like scalar values better than
+      // FCAs, so we flatten them if this is safe to do for this argument.
+      StructType STy = dyn_cast<StructType>(ArgI.getCoerceToType());
+      if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
+          STy.getNumElements() > 1) {
+        llvm_unreachable("NYI");
+      } else {
+        // Simple case, just do a coerced store of the argument into the alloca.
+        assert(NumIRArgs == 1);
+        Value AI = Fn.getArgument(FirstIRArg);
+        // TODO(cir): Set argument name in the new function.
+        createCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this);
+      }
+
+      // Match to what EmitParamDecl is expecting for this type.
+      if (::cir::MissingFeatures::evaluationKind()) {
+        llvm_unreachable("NYI");
+      } else {
+        // FIXME(cir): Should we have an ParamValue abstraction like in the
+        // original codegen?
+        ArgVals.push_back(Alloca);
+      }
+
+      // NOTE(cir): Once we have uncoerced the argument, we should be able to
+      // RAUW the original argument alloca with the new one. This assumes that
+      // the argument is used only to be stored in a alloca.
+      Value arg = SrcFn.getArgument(ArgNo);
+      assert(arg.hasOneUse());
+      auto *firstStore = *arg.user_begin();
+      auto argAlloca = cast<StoreOp>(firstStore).getAddr();
+      rewriter.replaceAllUsesWith(argAlloca, Alloca);
+      rewriter.eraseOp(firstStore);
+      rewriter.eraseOp(argAlloca.getDefiningOp());
+      break;
+    }
+    default:
+      llvm_unreachable("Unhandled ABIArgInfo::Kind");
+    }
+  }
+
+  if (getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+    llvm_unreachable("NYI");
+  } else {
+    // FIXME(cir): In the original codegen, EmitParamDecl is called here. It
+    // is likely that said function considers ABI details during emission, so
+    // we migth have to add a counter part here. Currently, it is not needed.
+  }
+
+  return success();
+}
+
+LogicalResult LowerFunction::buildFunctionEpilog(const LowerFunctionInfo &FI) {
+  // NOTE(cir): no-return, naked, and no result functions should be handled in
+  // CIRGen.
+
+  Value RV = {};
+  Type RetTy = FI.getReturnType();
+  const ABIArgInfo &RetAI = FI.getReturnInfo();
+
+  switch (RetAI.getKind()) {
+
+  case ABIArgInfo::Ignore:
+    break;
+
+  case ABIArgInfo::Extend:
+  case ABIArgInfo::Direct:
+    // FIXME(cir): Should we call ConvertType(RetTy) here?
+    if (RetAI.getCoerceToType() == RetTy && RetAI.getDirectOffset() == 0) {
+      // The internal return value temp always will have pointer-to-return-type
+      // type, just do a load.
+
+      // If there is a dominating store to ReturnValue, we can elide
+      // the load, zap the store, and usually zap the alloca.
+      // NOTE(cir): This seems like a premature optimization case. Skipping it.
+      if (::cir::MissingFeatures::returnValueDominatingStoreOptmiization()) {
+        llvm_unreachable("NYI");
+      }
+      // Otherwise, we have to do a simple load.
+      else {
+        // NOTE(cir): Nothing to do here. The codegen already emitted this load
+        // for us and there is no casting necessary to conform to the ABI. The
+        // zero-extension is enforced by the return value's attribute. Just
+        // early exit.
+        return success();
+      }
+    } else {
+      // NOTE(cir): Unlike the original codegen, CIR may have multiple return
+      // statements in the function body. We have to handle this here.
+      mlir::PatternRewriter::InsertionGuard guard(rewriter);
+      NewFn->walk([&](ReturnOp returnOp) {
+        rewriter.setInsertionPoint(returnOp);
+        RV = castReturnValue(returnOp->getOperand(0), RetAI.getCoerceToType(),
+                             *this);
+        rewriter.replaceOpWithNewOp<ReturnOp>(returnOp, RV);
+      });
+    }
+
+    // TODO(cir): Should AutoreleaseResult be handled here?
+    break;
+
+  default:
+    llvm_unreachable("Unhandled ABIArgInfo::Kind");
+  }
+
+  return success();
+}
+
+/// Generate code for a function based on the ABI-specific information.
+///
+/// This method has partial parity with CodeGenFunction::GenerateCode, but it
+/// focuses on the ABI-specific details. So a lot of codegen stuff is removed.
+LogicalResult LowerFunction::generateCode(FuncOp oldFn, FuncOp newFn,
+                                          const LowerFunctionInfo &FnInfo) {
+  assert(newFn && "generating code for null Function");
+  auto Args = oldFn.getArguments();
+
+  // Emit the ABI-specific function prologue.
+  assert(newFn.empty() && "Function already has a body");
+  rewriter.setInsertionPointToEnd(newFn.addEntryBlock());
+  if (buildFunctionProlog(FnInfo, newFn, oldFn.getArguments()).failed())
+    return failure();
+
+  // Ensure that old ABI-agnostic arguments uses were replaced.
+  const auto hasNoUses = [](Value val) { return val.getUses().empty(); };
+  assert(std::all_of(Args.begin(), Args.end(), hasNoUses) && "Missing RAUW?");
+
+  // Migrate function body to new ABI-aware function.
+  assert(oldFn.getBody().hasOneBlock() &&
+         "Multiple blocks in original function not supported");
+
+  // Move old function body to new function.
+  // FIXME(cir): The merge below is not very good: will not work if SrcFn has
+  // multiple blocks and it mixes the new and old prologues.
+  rewriter.mergeBlocks(&oldFn.getBody().front(), &newFn.getBody().front(),
+                       newFn.getArguments());
+
+  // FIXME(cir): What about saving parameters for corotines? Should we do
+  // something about it in this pass? If the change with the calling
+  // convention, we might have to handle this here.
+
+  // Emit the standard function epilogue.
+  if (buildFunctionEpilog(FnInfo).failed())
+    return failure();
+
+  return success();
+}
+
+void LowerFunction::buildAggregateStore(Value Val, Value Dest,
+                                        bool DestIsVolatile) {
+  // In LLVM codegen:
+  // Function to store a first-class aggregate into memory. We prefer to
+  // store the elements rather than the aggregate to be more friendly to
+  // fast-isel.
+  assert(mlir::isa<PointerType>(Dest.getType()) && "Storing in a non-pointer!");
+  (void)DestIsVolatile;
+
+  // Circumvent CIR's type checking.
+  Type pointeeTy = mlir::cast<PointerType>(Dest.getType()).getPointee();
+  if (Val.getType() != pointeeTy) {
+    // NOTE(cir):  We only bitcast and store if the types have the same size.
+    assert((LM.getDataLayout().getTypeSizeInBits(Val.getType()) ==
+            LM.getDataLayout().getTypeSizeInBits(pointeeTy)) &&
+           "Incompatible types");
+    auto loc = Val.getLoc();
+    Val = rewriter.create<CastOp>(loc, pointeeTy, CastKind::bitcast, Val);
+  }
+
+  rewriter.create<StoreOp>(Val.getLoc(), Val, Dest);
+}
+
+Value LowerFunction::buildAggregateBitcast(Value Val, Type DestTy) {
+  return rewriter.create<CastOp>(Val.getLoc(), DestTy, CastKind::bitcast, Val);
+}
+
+/// Rewrite a call operation to abide to the ABI calling convention.
+///
+/// FIXME(cir): This method has partial parity to CodeGenFunction's
+/// EmitCallEpxr method defined in CGExpr.cpp. This could likely be
+/// removed in favor of a more direct approach.
+LogicalResult LowerFunction::rewriteCallOp(CallOp op,
+                                           ReturnValueSlot retValSlot) {
+
+  // TODO(cir): Check if BlockCall, CXXMemberCall, CUDAKernelCall, or
+  // CXXOperatorMember require special handling here. These should be handled
+  // in CIRGen, unless there is call conv or ABI-specific stuff to be handled,
+  // them we should do it here.
+
+  // TODO(cir): Also check if Builtin and CXXPeseudoDtor need special handling
+  // here. These should be handled in CIRGen, unless there is call conv or
+  // ABI-specific stuff to be handled, them we should do it here.
+
+  // NOTE(cir): There is no direct way to fetch the function type from the
+  // CallOp, so we fetch it from the source function. This assumes the
+  // function definition has not yet been lowered.
+  assert(SrcFn && "No source function");
+  auto fnType = SrcFn.getFunctionType();
+
+  // Rewrite the call operation to abide to the ABI calling convention.
+  auto Ret = rewriteCallOp(fnType, SrcFn, op, retValSlot);
+
+  // Replace the original call result with the new one.
+  if (Ret)
+    rewriter.replaceAllUsesWith(op.getResult(), Ret);
+
+  // Erase original ABI-agnostic call.
+  rewriter.eraseOp(op);
+  return success();
+}
+
+/// Rewrite a call operation to abide to the ABI calling convention.
+///
+/// FIXME(cir): This method has partial parity to CodeGenFunction's EmitCall
+/// method defined in CGExpr.cpp. This could likely be removed in favor of a
+/// more direct approach since most of the code here is exclusively CodeGen.
+Value LowerFunction::rewriteCallOp(FuncType calleeTy, FuncOp origCallee,
+                                   CallOp callOp, ReturnValueSlot retValSlot,
+                                   Value Chain) {
+  // NOTE(cir): Skip a bunch of function pointer stuff and AST declaration
+  // asserts. Also skip sanitizers, as these should likely be handled at
+  // CIRGen.
+  CallArgList Args;
+  if (Chain)
+    llvm_unreachable("NYI");
+
+  // NOTE(cir): Call args were already emitted in CIRGen. Skip the evaluation
+  // order done in CIRGen and just fetch the exiting arguments here.
+  Args = callOp.getArgOperands();
+
+  const LowerFunctionInfo &FnInfo = LM.getTypes().arrangeFreeFunctionCall(
+      callOp.getArgOperands(), calleeTy, /*chainCall=*/false);
+
+  // C99 6.5.2.2p6:
+  //   If the expression that denotes the called function has a type
+  //   that does not include a prototype, [the default argument
+  //   promotions are performed]. If the number of arguments does not
+  //   equal the number of parameters, the behavior is undefined. If
+  //   the function is defined with a type that includes a prototype,
+  //   and either the prototype ends with an ellipsis (, ...) or the
+  //   types of the arguments after promotion are not compatible with
+  //   the types of the parameters, the behavior is undefined. If the
+  //   function is defined with a type that does not include a
+  //   prototype, and the types of the arguments after promotion are
+  //   not compatible with those of the parameters after promotion,
+  //   the behavior is undefined [except in some trivial cases].
+  // That is, in the general case, we should assume that a call
+  // through an unprototyped function type works like a *non-variadic*
+  // call.  The way we make this work is to cast to the exact type
+  // of the promoted arguments.
+  //
+  // Chain calls use this same code path to add the invisible chain parameter
+  // to the function type.
+  if (origCallee.getNoProto() || Chain) {
+    llvm_unreachable("NYI");
+  }
+
+  assert(!::cir::MissingFeatures::CUDA());
+
+  // TODO(cir): LLVM IR has the concept of "CallBase", which is a base class
+  // for all types of calls. Perhaps we should have a CIR interface to mimic
+  // this class.
+  CallOp CallOrInvoke = {};
+  Value CallResult =
+      rewriteCallOp(FnInfo, origCallee, callOp, retValSlot, Args, CallOrInvoke,
+                    /*isMustTail=*/false, callOp.getLoc());
+
+  // NOTE(cir): Skipping debug stuff here.
+
+  return CallResult;
+}
+
+// NOTE(cir): This method has partial parity to CodeGenFunction's EmitCall
+// method in CGCall.cpp. When incrementing it, use the original codegen as a
+// reference: add ABI-specific stuff and skip codegen stuff.
+Value LowerFunction::rewriteCallOp(const LowerFunctionInfo &CallInfo,
+                                   FuncOp Callee, CallOp Caller,
+                                   ReturnValueSlot ReturnValue,
+                                   CallArgList &CallArgs, CallOp CallOrInvoke,
+                                   bool isMustTail, Location loc) {
+  // FIXME: We no longer need the types from CallArgs; lift up and simplify.
+
+  // Handle struct-return functions by passing a pointer to the
+  // location that we would like to return into.
+  Type RetTy = CallInfo.getReturnType(); // ABI-agnostic type.
+  const ::cir::ABIArgInfo &RetAI = CallInfo.getReturnInfo();
+
+  FuncType IRFuncTy = LM.getTypes().getFunctionType(CallInfo);
+
+  // NOTE(cir): Some target/ABI related checks happen here. They are skipped
+  // under the assumption that they are handled in CIRGen.
+
+  // 1. Set up the arguments.
+
+  // If we're using inalloca, insert the allocation after the stack save.
+  // FIXME: Do this earlier rather than hacking it in here!
+  if (StructType ArgStruct = CallInfo.getArgStruct()) {
+    llvm_unreachable("NYI");
+  }
+
+  CIRToCIRArgMapping IRFunctionArgs(LM.getContext(), CallInfo);
+  SmallVector<Value, 16> IRCallArgs(IRFunctionArgs.totalIRArgs());
+
+  // If the call returns a temporary with struct return, create a temporary
+  // alloca to hold the result, unless one is given to us.
+  if (RetAI.isIndirect() || RetAI.isCoerceAndExpand() || RetAI.isInAlloca()) {
+    llvm_unreachable("NYI");
+  }
+
+  assert(!::cir::MissingFeatures::swift());
+
+  // NOTE(cir): Skipping lifetime markers here.
+
+  // Translate all of the arguments as necessary to match the IR lowering.
+  assert(CallInfo.arg_size() == CallArgs.size() &&
+         "Mismatch between function signature & arguments.");
+  unsigned ArgNo = 0;
+  LowerFunctionInfo::const_arg_iterator info_it = CallInfo.arg_begin();
+  for (auto I = CallArgs.begin(), E = CallArgs.end(); I != E;
+       ++I, ++info_it, ++ArgNo) {
+    const ABIArgInfo &ArgInfo = info_it->info;
+
+    if (IRFunctionArgs.hasPaddingArg(ArgNo))
+      llvm_unreachable("NYI");
+
+    unsigned FirstIRArg, NumIRArgs;
+    std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
+
+    switch (ArgInfo.getKind()) {
+    case ABIArgInfo::Extend:
+    case ABIArgInfo::Direct: {
+
+      if (isa<BoolType>(info_it->type)) {
+        IRCallArgs[FirstIRArg] = *I;
+        break;
+      }
+
+      if (!isa<StructType>(ArgInfo.getCoerceToType()) &&
+          ArgInfo.getCoerceToType() == info_it->type &&
+          ArgInfo.getDirectOffset() == 0) {
+        assert(NumIRArgs == 1);
+        Value V;
+        if (!isa<StructType>(I->getType())) {
+          V = *I;
+        } else {
+          llvm_unreachable("NYI");
+        }
+
+        if (::cir::MissingFeatures::extParamInfo()) {
+          llvm_unreachable("NYI");
+        }
+
+        if (ArgInfo.getCoerceToType() != V.getType() &&
+            isa<IntType>(V.getType()))
+          llvm_unreachable("NYI");
+
+        if (FirstIRArg < IRFuncTy.getNumInputs() &&
+            V.getType() != IRFuncTy.getInput(FirstIRArg))
+          llvm_unreachable("NYI");
+
+        if (::cir::MissingFeatures::undef())
+          llvm_unreachable("NYI");
+        IRCallArgs[FirstIRArg] = V;
+        break;
+      }
+
+      // FIXME: Avoid the conversion through memory if possible.
+      Value Src = {};
+      if (!isa<StructType>(I->getType())) {
+        llvm_unreachable("NYI");
+      } else {
+        // NOTE(cir): L/RValue stuff are left for CIRGen to handle.
+        Src = *I;
+      }
+
+      // If the value is offst in memory, apply the offset now.
+      // FIXME(cir): Is this offset already handled in CIRGen?
+      Src = emitAddressAtOffset(*this, Src, ArgInfo);
+
+      // Fast-isel and the optimizer generally like scalar values better than
+      // FCAs, so we flatten them if this is safe to do for this argument.
+      StructType STy = dyn_cast<StructType>(ArgInfo.getCoerceToType());
+      if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
+        llvm_unreachable("NYI");
+      } else {
+        // In the simple case, just pass the coerced loaded value.
+        assert(NumIRArgs == 1);
+        Value Load = createCoercedValue(Src, ArgInfo.getCoerceToType(), *this);
+
+        // FIXME(cir): We should probably handle CMSE non-secure calls here
+        assert(!::cir::MissingFeatures::cmseNonSecureCallAttr());
+
+        // since they are a ARM-specific feature.
+        if (::cir::MissingFeatures::undef())
+          llvm_unreachable("NYI");
+        IRCallArgs[FirstIRArg] = Load;
+      }
+
+      break;
+    }
+    default:
+      llvm::outs() << "Missing ABIArgInfo::Kind: " << ArgInfo.getKind() << "\n";
+      llvm_unreachable("NYI");
+    }
+  }
+
+  // 2. Prepare the function pointer.
+  // NOTE(cir): This is not needed for CIR.
+
+  // 3. Perform the actual call.
+
+  // NOTE(cir): CIRGen handle when to "deactive" cleanups. We also skip some
+  // debugging stuff here.
+
+  // Update the largest vector width if any arguments have vector types.
+  assert(!::cir::MissingFeatures::vectorType());
+
+  // Compute the calling convention and attributes.
+
+  // FIXME(cir): Skipping call attributes for now. Not sure if we have to do
+  // this at all since we already do it for the function definition.
+
+  // FIXME(cir): Implement the required procedures for strictfp function and
+  // fast-math.
+
+  // FIXME(cir): Add missing call-site attributes here if they are
+  // ABI/target-specific, otherwise, do it in CIRGen.
+
+  // NOTE(cir): Deciding whether to use Call or Invoke is done in CIRGen.
+
+  // Rewrite the actual call operation.
+  // TODO(cir): Handle other types of CIR calls (e.g. cir.try_call).
+  // NOTE(cir): We don't know if the callee was already lowered, so we only
+  // fetch the name from the callee, while the return type is fetch from the
+  // lowering types manager.
+  CallOp newCallOp = rewriter.create<CallOp>(
+      loc, Caller.getCalleeAttr(), IRFuncTy.getReturnType(), IRCallArgs);
+  auto extraAttrs =
+      rewriter.getAttr<ExtraFuncAttributesAttr>(rewriter.getDictionaryAttr({}));
+  newCallOp->setAttr("extra_attrs", extraAttrs);
+
+  assert(!::cir::MissingFeatures::vectorType());
+
+  // NOTE(cir): Skipping some ObjC, tail-call, debug, and attribute stuff
+  // here.
+
+  // 4. Finish the call.
+
+  // NOTE(cir): Skipping no-return, isMustTail, swift error handling, and
+  // writebacks here. These should be handled in CIRGen, I think.
+
+  // Convert return value from ABI-agnostic to ABI-aware.
+  Value Ret = [&] {
+    // NOTE(cir): CIRGen already handled the emission of the return value. We
+    // need only to handle the ABI-specific to ABI-agnostic cast here.
+    switch (RetAI.getKind()) {
+
+    case ::cir::ABIArgInfo::Ignore:
+      // If we are ignoring an argument that had a result, make sure to
+      // construct the appropriate return value for our caller.
+      return getUndefRValue(RetTy);
+
+    case ABIArgInfo::Extend:
+    case ABIArgInfo::Direct: {
+      Type RetIRTy = RetTy;
+      if (RetAI.getCoerceToType() == RetIRTy && RetAI.getDirectOffset() == 0) {
+        switch (getEvaluationKind(RetTy)) {
+        case ::cir::TypeEvaluationKind::TEK_Scalar: {
+          // If the argument doesn't match, perform a bitcast to coerce it.
+          // This can happen due to trivial type mismatches. NOTE(cir):
+          // Perhaps this section should handle CIR's boolean case.
+          Value V = newCallOp.getResult();
+          if (V.getType() != RetIRTy)
+            llvm_unreachable("NYI");
+          return V;
+        }
+        default:
+          llvm_unreachable("NYI");
+        }
+      }
+
+      // If coercing a fixed vector from a scalable vector for ABI
+      // compatibility, and the types match, use the llvm.vector.extract
+      // intrinsic to perform the conversion.
+      if (::cir::MissingFeatures::vectorType()) {
+        llvm_unreachable("NYI");
+      }
+
+      // FIXME(cir): Use return value slot here.
+      Value RetVal = callOp.getResult();
+      // TODO(cir): Check for volatile return values.
+      assert(!::cir::MissingFeatures::volatileTypes());
+
+      // NOTE(cir): If the function returns, there should always be a valid
+      // return value present. Instead of setting the return value here, we
+      // should have the ReturnValueSlot object set it beforehand.
+      if (!RetVal) {
+        RetVal = callOp.getResult();
+        // TODO(cir): Check for volatile return values.
+        assert(::cir::MissingFeatures::volatileTypes());
+      }
+
+      // An empty record can overlap other data (if declared with
+      // no_unique_address); omit the store for such types - as there is no
+      // actual data to store.
+      if (dyn_cast<StructType>(RetTy) &&
+          cast<StructType>(RetTy).getNumElements() != 0) {
+        RetVal =
+            createCoercedValue(newCallOp.getResult(), RetVal.getType(), *this);
+      }
+
+      // NOTE(cir): No need to convert from a temp to an RValue. This is
+      // done in CIRGen
+      return RetVal;
+    }
+    default:
+      llvm::errs() << "Unhandled ABIArgInfo kind: " << RetAI.getKind() << "\n";
+      llvm_unreachable("NYI");
+    }
+  }();
+
+  // NOTE(cir): Skipping Emissions, lifetime markers, and dtors here that
+  // should be handled in CIRGen.
+
+  return Ret;
+}
+
+// NOTE(cir): This method has partial parity to CodeGenFunction's
+// GetUndefRValue defined in CGExpr.cpp.
+Value LowerFunction::getUndefRValue(Type Ty) {
+  if (isa<VoidType>(Ty))
+    return nullptr;
+
+  llvm::outs() << "Missing undef handler for value type: " << Ty << "\n";
+  llvm_unreachable("NYI");
+}
+
+::cir::TypeEvaluationKind LowerFunction::getEvaluationKind(Type type) {
+  // FIXME(cir): Implement type classes for CIR types.
+  if (isa<StructType>(type))
+    return ::cir::TypeEvaluationKind::TEK_Aggregate;
+  if (isa<BoolType, IntType, SingleType, DoubleType>(type))
+    return ::cir::TypeEvaluationKind::TEK_Scalar;
+  llvm_unreachable("NYI");
+}
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.h
new file mode 100644
index 000000000000..bd46bcdd1d8b
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.h
@@ -0,0 +1,107 @@
+//===-- LowerFunction.h - Per-Function state for CIR lowering ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class partially mimics clang/lib/CodeGen/CGFunctionInfo.h. The queries
+// are adapted to operate on the CIR dialect, however. And we only copy code
+// related to ABI-specific codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERFUNCTION_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERFUNCTION_H
+
+#include "CIRCXXABI.h"
+#include "LowerCall.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LogicalResult.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/TypeEvaluationKind.h"
+
+namespace mlir {
+namespace cir {
+
+using CallArgList = SmallVector<Value, 8>;
+
+class LowerFunction {
+  LowerFunction(const LowerFunction &) = delete;
+  void operator=(const LowerFunction &) = delete;
+
+  friend class CIRCXXABI;
+
+  const clang::TargetInfo &Target;
+
+  PatternRewriter &rewriter;
+  FuncOp SrcFn;  // Original ABI-agnostic function.
+  FuncOp NewFn;  // New ABI-aware function.
+  CallOp callOp; // Call operation to be lowered.
+
+public:
+  /// Builder for lowering calling convention of a function definition.
+  LowerFunction(LowerModule &LM, PatternRewriter &rewriter, FuncOp srcFn,
+                FuncOp newFn);
+
+  /// Builder for lowering calling convention of a call operation.
+  LowerFunction(LowerModule &LM, PatternRewriter &rewriter, FuncOp srcFn,
+                CallOp callOp);
+
+  ~LowerFunction() = default;
+
+  LowerModule &LM; // Per-module state.
+
+  PatternRewriter &getRewriter() const { return rewriter; }
+
+  const clang::TargetInfo &getTarget() const { return Target; }
+
+  // Build ABI/Target-specific function prologue.
+  LogicalResult buildFunctionProlog(const LowerFunctionInfo &FI, FuncOp Fn,
+                                    MutableArrayRef<BlockArgument> Args);
+
+  // Build ABI/Target-specific function epilogue.
+  LogicalResult buildFunctionEpilog(const LowerFunctionInfo &FI);
+
+  // Parity with CodeGenFunction::GenerateCode. Keep in mind that several
+  // sections in the original function are focused on codegen unrelated to the
+  // ABI. Such sections are handled in CIR's codegen, not here.
+  LogicalResult generateCode(FuncOp oldFn, FuncOp newFn,
+                             const LowerFunctionInfo &FnInfo);
+
+  // Emit the most simple cir.store possible (e.g. a store for a whole
+  // struct), which can later be broken down in other CIR levels (or prior
+  // to dialect codegen).
+  void buildAggregateStore(Value Val, Value Dest, bool DestIsVolatile);
+
+  // Emit a simple bitcast for a coerced aggregate type to convert it from an
+  // ABI-agnostic to an ABI-aware type.
+  Value buildAggregateBitcast(Value Val, Type DestTy);
+
+  /// Rewrite a call operation to abide to the ABI calling convention.
+  LogicalResult rewriteCallOp(CallOp op,
+                              ReturnValueSlot retValSlot = ReturnValueSlot());
+  Value rewriteCallOp(FuncType calleeTy, FuncOp origCallee, CallOp callOp,
+                      ReturnValueSlot retValSlot, Value Chain = nullptr);
+  Value rewriteCallOp(const LowerFunctionInfo &CallInfo, FuncOp Callee,
+                      CallOp Caller, ReturnValueSlot ReturnValue,
+                      CallArgList &CallArgs, CallOp CallOrInvoke,
+                      bool isMustTail, Location loc);
+
+  /// Get an appropriate 'undef' value for the given type.
+  Value getUndefRValue(Type Ty);
+
+  /// Return the TypeEvaluationKind of Type \c T.
+  static ::cir::TypeEvaluationKind getEvaluationKind(Type T);
+
+  static bool hasScalarEvaluationKind(Type T) {
+    return getEvaluationKind(T) == ::cir::TypeEvaluationKind::TEK_Scalar;
+  }
+};
+
+} // namespace cir
+} // namespace mlir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERFUNCTION_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunctionInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunctionInfo.h
new file mode 100644
index 000000000000..c81335c9985a
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunctionInfo.h
@@ -0,0 +1,176 @@
+//==-- LowerFunctionInfo.h - Represents of function argument/return types --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/inlcude/CodeGen/LowerFunctionInfo.h. The
+// queries are adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERFUNCTIONINFO_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERFUNCTIONINFO_H
+
+#include "mlir/IR/Types.h"
+#include "clang/CIR/ABIArgInfo.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/TrailingObjects.h"
+
+namespace mlir {
+namespace cir {
+
+/// A class for recording the number of arguments that a function
+/// signature requires.
+class RequiredArgs {
+  /// The number of required arguments, or ~0 if the signature does
+  /// not permit optional arguments.
+  unsigned NumRequired;
+
+public:
+  enum All_t { All };
+
+  RequiredArgs(All_t _) : NumRequired(~0U) {}
+  explicit RequiredArgs(unsigned n) : NumRequired(n) { assert(n != ~0U); }
+
+  /// Compute the arguments required by the given formal prototype,
+  /// given that there may be some additional, non-formal arguments
+  /// in play.
+  ///
+  /// If FD is not null, this will consider pass_object_size params in FD.
+  static RequiredArgs forPrototypePlus(const FuncType prototype,
+                                       unsigned additional) {
+    if (!prototype.isVarArg())
+      return All;
+
+    llvm_unreachable("Variadic function is NYI");
+  }
+
+  bool allowsOptionalArgs() const { return NumRequired != ~0U; }
+};
+
+// Implementation detail of LowerFunctionInfo, factored out so it can be
+// named in the TrailingObjects base class of CGFunctionInfo.
+struct LowerFunctionInfoArgInfo {
+  mlir::Type type;        // Original ABI-agnostic type.
+  ::cir::ABIArgInfo info; // ABI-specific information.
+};
+
+// FIXME(cir): We could likely encode this information within CIR/MLIR, allowing
+// us to eliminate this class.
+class LowerFunctionInfo final
+    : private llvm::TrailingObjects<LowerFunctionInfo,
+                                    LowerFunctionInfoArgInfo> {
+  typedef LowerFunctionInfoArgInfo ArgInfo;
+
+  /// The LLVM::CallingConv to use for this function (as specified by the
+  /// user).
+  unsigned CallingConvention : 8;
+
+  /// The LLVM::CallingConv to actually use for this function, which may
+  /// depend on the ABI.
+  unsigned EffectiveCallingConvention : 8;
+
+  /// Whether this is an instance method.
+  unsigned InstanceMethod : 1;
+
+  /// Whether this is a chain call.
+  unsigned ChainCall : 1;
+
+  /// Whether this function is called by forwarding arguments.
+  /// This doesn't support inalloca or varargs.
+  unsigned DelegateCall : 1;
+
+  RequiredArgs Required;
+
+  /// The struct representing all arguments passed in memory.  Only used when
+  /// passing non-trivial types with inalloca.  Not part of the profile.
+  StructType ArgStruct;
+
+  unsigned NumArgs;
+
+  const ArgInfo *getArgsBuffer() const { return getTrailingObjects<ArgInfo>(); }
+  ArgInfo *getArgsBuffer() { return getTrailingObjects<ArgInfo>(); }
+
+  LowerFunctionInfo() : Required(RequiredArgs::All) {}
+
+public:
+  static LowerFunctionInfo *create(unsigned llvmCC, bool instanceMethod,
+                                   bool chainCall, bool delegateCall,
+                                   Type resultType,
+                                   ArrayRef<mlir::Type> argTypes,
+                                   RequiredArgs required) {
+    // TODO(cir): Add assertions?
+    assert(!::cir::MissingFeatures::extParamInfo());
+    void *buffer = operator new(totalSizeToAlloc<ArgInfo>(argTypes.size() + 1));
+
+    LowerFunctionInfo *FI = new (buffer) LowerFunctionInfo();
+    FI->CallingConvention = llvmCC;
+    FI->EffectiveCallingConvention = llvmCC;
+    FI->InstanceMethod = instanceMethod;
+    FI->ChainCall = chainCall;
+    FI->DelegateCall = delegateCall;
+    FI->Required = required;
+    FI->ArgStruct = nullptr;
+    FI->NumArgs = argTypes.size();
+    FI->getArgsBuffer()[0].type = resultType;
+    for (unsigned i = 0, e = argTypes.size(); i != e; ++i)
+      FI->getArgsBuffer()[i + 1].type = argTypes[i];
+
+    return FI;
+  };
+
+  // Friending class TrailingObjects is apparently not good enough for MSVC,
+  // so these have to be public.
+  friend class TrailingObjects;
+  size_t numTrailingObjects(OverloadToken<ArgInfo>) const {
+    return NumArgs + 1;
+  }
+
+  typedef const ArgInfo *const_arg_iterator;
+  typedef ArgInfo *arg_iterator;
+
+  MutableArrayRef<ArgInfo> arguments() {
+    return MutableArrayRef<ArgInfo>(arg_begin(), NumArgs);
+  }
+
+  const_arg_iterator arg_begin() const { return getArgsBuffer() + 1; }
+  const_arg_iterator arg_end() const { return getArgsBuffer() + 1 + NumArgs; }
+  arg_iterator arg_begin() { return getArgsBuffer() + 1; }
+  arg_iterator arg_end() { return getArgsBuffer() + 1 + NumArgs; }
+
+  unsigned arg_size() const { return NumArgs; }
+
+  bool isVariadic() const {
+    assert(!::cir::MissingFeatures::variadicFunctions());
+    return false;
+  }
+  unsigned getNumRequiredArgs() const {
+    if (isVariadic())
+      llvm_unreachable("NYI");
+    return arg_size();
+  }
+
+  Type getReturnType() const { return getArgsBuffer()[0].type; }
+
+  ::cir::ABIArgInfo &getReturnInfo() { return getArgsBuffer()[0].info; }
+  const ::cir::ABIArgInfo &getReturnInfo() const {
+    return getArgsBuffer()[0].info;
+  }
+
+  /// Return the user specified callingconvention, which has been translated
+  /// into an LLVM CC.
+  unsigned getCallingConvention() const { return CallingConvention; }
+
+  /// Get the struct type used to represent all the arguments in memory.
+  StructType getArgStruct() const { return ArgStruct; }
+};
+
+} // namespace cir
+} // namespace mlir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERFUNCTIONINFO_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp
new file mode 100644
index 000000000000..715a5f2470d7
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp
@@ -0,0 +1,250 @@
+//===--- LowerModule.cpp - Lower CIR Module to a Target -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/CodeGen/CodeGenModule.cpp. The queries
+// are adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME(cir): This header file is not exposed to the public API, but can be
+// reused by CIR ABI lowering since it holds target-specific information.
+#include "../../../../Basic/Targets.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/TargetOptions.h"
+
+#include "CIRLowerContext.h"
+#include "LowerFunction.h"
+#include "LowerModule.h"
+#include "TargetInfo.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LogicalResult.h"
+#include "clang/CIR/Target/AArch64.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using MissingFeatures = ::cir::MissingFeatures;
+using AArch64ABIKind = ::cir::AArch64ABIKind;
+using X86AVXABILevel = ::cir::X86AVXABILevel;
+
+namespace mlir {
+namespace cir {
+
+static CIRCXXABI *createCXXABI(LowerModule &CGM) {
+  switch (CGM.getCXXABIKind()) {
+  case clang::TargetCXXABI::AppleARM64:
+  case clang::TargetCXXABI::Fuchsia:
+  case clang::TargetCXXABI::GenericAArch64:
+  case clang::TargetCXXABI::GenericARM:
+  case clang::TargetCXXABI::iOS:
+  case clang::TargetCXXABI::WatchOS:
+  case clang::TargetCXXABI::GenericMIPS:
+  case clang::TargetCXXABI::GenericItanium:
+  case clang::TargetCXXABI::WebAssembly:
+  case clang::TargetCXXABI::XL:
+    return CreateItaniumCXXABI(CGM);
+  case clang::TargetCXXABI::Microsoft:
+    llvm_unreachable("Windows ABI NYI");
+  }
+
+  llvm_unreachable("invalid C++ ABI kind");
+}
+
+static std::unique_ptr<TargetLoweringInfo>
+createTargetLoweringInfo(LowerModule &LM) {
+  const clang::TargetInfo &Target = LM.getTarget();
+  const llvm::Triple &Triple = Target.getTriple();
+
+  switch (Triple.getArch()) {
+  case llvm::Triple::aarch64_be:
+  case llvm::Triple::aarch64: {
+    AArch64ABIKind Kind = AArch64ABIKind::AAPCS;
+    if (Target.getABI() == "darwinpcs")
+      llvm_unreachable("DarwinPCS ABI NYI");
+    else if (Triple.isOSWindows())
+      llvm_unreachable("Windows ABI NYI");
+    else if (Target.getABI() == "aapcs-soft")
+      llvm_unreachable("AAPCS-soft ABI NYI");
+
+    return createAArch64TargetLoweringInfo(LM, Kind);
+  }
+  case llvm::Triple::x86_64: {
+    switch (Triple.getOS()) {
+    case llvm::Triple::Win32:
+      llvm_unreachable("Windows ABI NYI");
+    default:
+      return createX86_64TargetLoweringInfo(LM, X86AVXABILevel::None);
+    }
+  }
+  case llvm::Triple::spirv64:
+    return createSPIRVTargetLoweringInfo(LM);
+  default:
+    llvm_unreachable("ABI NYI");
+  }
+}
+
+LowerModule::LowerModule(clang::LangOptions opts, ModuleOp &module,
+                         StringAttr DL,
+                         std::unique_ptr<clang::TargetInfo> target,
+                         PatternRewriter &rewriter)
+    : context(module, opts), module(module), Target(std::move(target)),
+      ABI(createCXXABI(*this)), types(*this, DL.getValue()),
+      rewriter(rewriter) {
+  context.initBuiltinTypes(*Target);
+}
+
+const TargetLoweringInfo &LowerModule::getTargetLoweringInfo() {
+  if (!TheTargetCodeGenInfo)
+    TheTargetCodeGenInfo = createTargetLoweringInfo(*this);
+  return *TheTargetCodeGenInfo;
+}
+
+void LowerModule::setCIRFunctionAttributes(FuncOp GD,
+                                           const LowerFunctionInfo &Info,
+                                           FuncOp F, bool IsThunk) {
+  unsigned CallingConv;
+  // NOTE(cir): The method below will update the F function in-place with the
+  // proper attributes.
+  constructAttributeList(GD.getName(), Info, GD, F, CallingConv,
+                         /*AttrOnCallSite=*/false, IsThunk);
+  // TODO(cir): Set Function's calling convention.
+}
+
+/// Set function attributes for a function declaration.
+///
+/// This method is based on CodeGenModule::SetFunctionAttributes but it
+/// altered to consider only the ABI/Target-related bits.
+void LowerModule::setFunctionAttributes(FuncOp oldFn, FuncOp newFn,
+                                        bool IsIncompleteFunction,
+                                        bool IsThunk) {
+
+  // TODO(cir): There's some special handling from attributes related to LLVM
+  // intrinsics. Should we do that here as well?
+
+  // Setup target-specific attributes.
+  if (!IsIncompleteFunction)
+    setCIRFunctionAttributes(oldFn, getTypes().arrangeGlobalDeclaration(oldFn),
+                             newFn, IsThunk);
+
+  // TODO(cir): Handle attributes for returned "this" objects.
+
+  // NOTE(cir): Skipping some linkage and other global value attributes here as
+  // it might be better for CIRGen to handle them.
+
+  // TODO(cir): Skipping section attributes here.
+
+  // TODO(cir): Skipping error attributes here.
+
+  // If we plan on emitting this inline builtin, we can't treat it as a builtin.
+  if (MissingFeatures::funcDeclIsInlineBuiltinDeclaration()) {
+    llvm_unreachable("NYI");
+  }
+
+  if (MissingFeatures::funcDeclIsReplaceableGlobalAllocationFunction()) {
+    llvm_unreachable("NYI");
+  }
+
+  if (MissingFeatures::funcDeclIsCXXConstructorDecl() ||
+      MissingFeatures::funcDeclIsCXXDestructorDecl())
+    llvm_unreachable("NYI");
+  else if (MissingFeatures::funcDeclIsCXXMethodDecl())
+    llvm_unreachable("NYI");
+
+  // NOTE(cir) Skipping emissions that depend on codegen options, as well as
+  // sanitizers handling here. Do this in CIRGen.
+
+  if (MissingFeatures::langOpts() && MissingFeatures::openMP())
+    llvm_unreachable("NYI");
+
+  // NOTE(cir): Skipping more things here that depend on codegen options.
+
+  if (MissingFeatures::extParamInfo()) {
+    llvm_unreachable("NYI");
+  }
+}
+
+/// Rewrites an existing function to conform to the ABI.
+///
+/// This method is based on CodeGenModule::EmitGlobalFunctionDefinition but it
+/// considerably simplified as it tries to remove any CodeGen related code.
+LogicalResult LowerModule::rewriteFunctionDefinition(FuncOp op) {
+  mlir::OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPoint(op);
+
+  // Get ABI/target-specific function information.
+  const LowerFunctionInfo &FI = this->getTypes().arrangeGlobalDeclaration(op);
+
+  // Get ABI/target-specific function type.
+  FuncType Ty = this->getTypes().getFunctionType(FI);
+
+  // NOTE(cir): Skipping getAddrOfFunction and getOrCreateCIRFunction methods
+  // here, as they are mostly codegen logic.
+
+  // Create a new function with the ABI-specific types.
+  FuncOp newFn = cast<FuncOp>(rewriter.cloneWithoutRegions(op));
+  newFn.setType(Ty);
+
+  // NOTE(cir): The clone above will preserve any existing attributes. If there
+  // are high-level attributes that ought to be dropped, do it here.
+
+  // Set up ABI-specific function attributes.
+  setFunctionAttributes(op, newFn, false, /*IsThunk=*/false);
+  if (MissingFeatures::extParamInfo()) {
+    llvm_unreachable("ExtraAttrs are NYI");
+  }
+
+  if (LowerFunction(*this, rewriter, op, newFn)
+          .generateCode(op, newFn, FI)
+          .failed())
+    return failure();
+
+  // Erase original ABI-agnostic function.
+  rewriter.eraseOp(op);
+  return success();
+}
+
+LogicalResult LowerModule::rewriteFunctionCall(CallOp callOp, FuncOp funcOp) {
+  mlir::OpBuilder::InsertionGuard guard(rewriter);
+  rewriter.setInsertionPoint(callOp);
+
+  // Create a new function with the ABI-specific calling convention.
+  if (LowerFunction(*this, rewriter, funcOp, callOp)
+          .rewriteCallOp(callOp)
+          .failed())
+    return failure();
+
+  return success();
+}
+
+// TODO: not to create it every time
+std::unique_ptr<LowerModule> createLowerModule(ModuleOp module,
+                                               PatternRewriter &rewriter) {
+  // Fetch the LLVM data layout string.
+  auto dataLayoutStr = cast<StringAttr>(
+      module->getAttr(LLVM::LLVMDialect::getDataLayoutAttrName()));
+
+  // Fetch target information.
+  llvm::Triple triple(
+      cast<StringAttr>(module->getAttr("cir.triple")).getValue());
+  clang::TargetOptions targetOptions;
+  targetOptions.Triple = triple.str();
+  auto targetInfo = clang::targets::AllocateTarget(triple, targetOptions);
+
+  // FIXME(cir): This just uses the default language options. We need to account
+  // for custom options.
+  // Create context.
+  assert(!::cir::MissingFeatures::langOpts());
+  clang::LangOptions langOpts;
+
+  return std::make_unique<LowerModule>(langOpts, module, dataLayoutStr,
+                                       std::move(targetInfo), rewriter);
+}
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.h
new file mode 100644
index 000000000000..44cd5a0ae1cb
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.h
@@ -0,0 +1,108 @@
+//===--- LowerModule.h - Abstracts CIR's module lowering --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/CodeGen/CodeGenModule.h. The queries are
+// adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERMODULE_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERMODULE_H
+
+#include "CIRLowerContext.h"
+#include "LowerTypes.h"
+#include "TargetLoweringInfo.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Interfaces/DataLayoutInterfaces.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/MissingFeatures.h"
+#include <memory>
+
+namespace mlir {
+namespace cir {
+
+class LowerModule {
+  CIRLowerContext context;
+  ModuleOp module;
+  const std::unique_ptr<clang::TargetInfo> Target;
+  mutable std::unique_ptr<TargetLoweringInfo> TheTargetCodeGenInfo;
+  std::unique_ptr<CIRCXXABI> ABI;
+
+  LowerTypes types;
+
+  PatternRewriter &rewriter;
+
+public:
+  LowerModule(clang::LangOptions opts, ModuleOp &module, StringAttr DL,
+              std::unique_ptr<clang::TargetInfo> target,
+              PatternRewriter &rewriter);
+  ~LowerModule() = default;
+
+  // Trivial getters.
+  LowerTypes &getTypes() { return types; }
+  CIRLowerContext &getContext() { return context; }
+  CIRCXXABI &getCXXABI() const { return *ABI; }
+  const clang::TargetInfo &getTarget() const { return *Target; }
+  MLIRContext *getMLIRContext() { return module.getContext(); }
+  ModuleOp &getModule() { return module; }
+
+  const ::cir::CIRDataLayout &getDataLayout() const {
+    return types.getDataLayout();
+  }
+
+  const TargetLoweringInfo &getTargetLoweringInfo();
+
+  // FIXME(cir): This would be in ASTContext, not CodeGenModule.
+  const clang::TargetInfo &getTargetInfo() const { return *Target; }
+
+  // FIXME(cir): This would be in ASTContext, not CodeGenModule.
+  clang::TargetCXXABI::Kind getCXXABIKind() const {
+    auto kind = getTarget().getCXXABI().getKind();
+    assert(!::cir::MissingFeatures::langOpts());
+    return kind;
+  }
+
+  void
+  constructAttributeList(StringRef Name, const LowerFunctionInfo &FI,
+                         FuncOp CalleeInfo, // TODO(cir): Implement CalleeInfo?
+                         FuncOp newFn, unsigned &CallingConv,
+                         bool AttrOnCallSite, bool IsThunk);
+
+  void setCIRFunctionAttributes(FuncOp GD, const LowerFunctionInfo &Info,
+                                FuncOp F, bool IsThunk);
+
+  /// Set function attributes for a function declaration.
+  void setFunctionAttributes(FuncOp oldFn, FuncOp newFn,
+                             bool IsIncompleteFunction, bool IsThunk);
+
+  // Create a CIR FuncOp with with the given signature.
+  FuncOp createCIRFunction(
+      StringRef MangledName, FuncType Ty, FuncOp D, bool ForVTable,
+      bool DontDefer = false, bool IsThunk = false,
+      ArrayRef<Attribute> = {}, // TODO(cir): __attribute__(()) stuff.
+      bool IsForDefinition = false);
+
+  // Rewrite CIR FuncOp to match the target ABI.
+  LogicalResult rewriteFunctionDefinition(FuncOp op);
+
+  // Rewrite CIR CallOp to match the target ABI.
+  LogicalResult rewriteFunctionCall(CallOp callOp, FuncOp funcOp);
+};
+
+std::unique_ptr<LowerModule> createLowerModule(ModuleOp module,
+                                               PatternRewriter &rewriter);
+
+} // namespace cir
+} // namespace mlir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERMODULE_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerTypes.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerTypes.cpp
new file mode 100644
index 000000000000..bdec98a64f43
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerTypes.cpp
@@ -0,0 +1,121 @@
+//===--- LowerTypes.cpp - Type translation to target-specific types -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/CodeGen/CodeGenTypes.cpp. The queries
+// are adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LowerTypes.h"
+#include "CIRToCIRArgMapping.h"
+#include "LowerModule.h"
+#include "mlir/IR/Types.h"
+#include "mlir/Support/LLVM.h"
+#include "clang/CIR/ABIArgInfo.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace ::mlir::cir;
+
+using ABIArgInfo = ::cir::ABIArgInfo;
+
+unsigned LowerTypes::clangCallConvToLLVMCallConv(clang::CallingConv CC) {
+  switch (CC) {
+  case clang::CC_C:
+    return llvm::CallingConv::C;
+  default:
+    llvm_unreachable("calling convention NYI");
+  }
+}
+
+LowerTypes::LowerTypes(LowerModule &LM, StringRef DLString)
+    : LM(LM), context(LM.getContext()), Target(LM.getTarget()),
+      CXXABI(LM.getCXXABI()),
+      TheABIInfo(LM.getTargetLoweringInfo().getABIInfo()),
+      mlirContext(LM.getMLIRContext()), DL(LM.getModule()) {}
+
+/// Return the ABI-specific function type for a CIR function type.
+FuncType LowerTypes::getFunctionType(const LowerFunctionInfo &FI) {
+
+  mlir::Type resultType = {};
+  const ::cir::ABIArgInfo &retAI = FI.getReturnInfo();
+  switch (retAI.getKind()) {
+  case ABIArgInfo::Extend:
+  case ABIArgInfo::Direct:
+    resultType = retAI.getCoerceToType();
+    break;
+  case ::cir::ABIArgInfo::Ignore:
+    resultType = VoidType::get(getMLIRContext());
+    break;
+  default:
+    llvm_unreachable("Missing ABIArgInfo::Kind");
+  }
+
+  CIRToCIRArgMapping IRFunctionArgs(getContext(), FI, true);
+  SmallVector<Type, 8> ArgTypes(IRFunctionArgs.totalIRArgs());
+
+  // Add type for sret argument.
+  assert(!::cir::MissingFeatures::sretArgs());
+
+  // Add type for inalloca argument.
+  assert(!::cir::MissingFeatures::inallocaArgs());
+
+  // Add in all of the required arguments.
+  unsigned ArgNo = 0;
+  LowerFunctionInfo::const_arg_iterator it = FI.arg_begin(),
+                                        ie = it + FI.getNumRequiredArgs();
+  for (; it != ie; ++it, ++ArgNo) {
+    const ABIArgInfo &ArgInfo = it->info;
+
+    assert(!::cir::MissingFeatures::argumentPadding());
+
+    unsigned FirstIRArg, NumIRArgs;
+    std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
+
+    switch (ArgInfo.getKind()) {
+    case ABIArgInfo::Extend:
+    case ABIArgInfo::Direct: {
+      // Fast-isel and the optimizer generally like scalar values better than
+      // FCAs, so we flatten them if this is safe to do for this argument.
+      Type argType = ArgInfo.getCoerceToType();
+      StructType st = dyn_cast<StructType>(argType);
+      if (st && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
+        assert(NumIRArgs == st.getNumElements());
+        for (unsigned i = 0, e = st.getNumElements(); i != e; ++i)
+          ArgTypes[FirstIRArg + i] = st.getMembers()[i];
+      } else {
+        assert(NumIRArgs == 1);
+        ArgTypes[FirstIRArg] = argType;
+      }
+      break;
+    }
+    default:
+      llvm_unreachable("Missing ABIArgInfo::Kind");
+    }
+  }
+
+  return FuncType::get(getMLIRContext(), ArgTypes, resultType, FI.isVariadic());
+}
+
+/// Convert a CIR type to its ABI-specific default form.
+mlir::Type LowerTypes::convertType(Type T) {
+  /// NOTE(cir): It the original codegen this method is used to get the default
+  /// LLVM IR representation for a given AST type. When a the ABI-specific
+  /// function info sets a nullptr for a return or argument type, the default
+  /// type given by this method is used. In CIR's case, its types are already
+  /// supposed to be ABI-specific, so this method is not really useful here.
+  /// It's kept here for codegen parity's sake.
+
+  // Certain CIR types are already ABI-specific, so we just return them.
+  if (isa<BoolType, IntType, SingleType, DoubleType>(T)) {
+    return T;
+  }
+
+  llvm::outs() << "Missing default ABI-specific type for " << T << "\n";
+  llvm_unreachable("NYI");
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerTypes.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerTypes.h
new file mode 100644
index 000000000000..d6f20941544f
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerTypes.h
@@ -0,0 +1,103 @@
+//===--- LowerTypes.cpp - Type lowering for CIR dialect -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/CodeGen/CodeGenTypes.cpp. The queries
+// are adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERTYPES_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERTYPES_H
+
+#include "ABIInfo.h"
+#include "CIRCXXABI.h"
+#include "CIRLowerContext.h"
+#include "LowerCall.h"
+#include "mlir/IR/MLIRContext.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/FnInfoOpts.h"
+
+namespace mlir {
+namespace cir {
+
+// Forward declarations.
+class LowerModule;
+
+/// This class organizes lowering to ABI-specific types in CIR.
+class LowerTypes {
+  // FIXME(cir): This abstraction could likely be replaced by a MLIR interface
+  // or direct queries to CIR types. It here mostly for code parity.
+
+private:
+  LowerModule &LM;
+  CIRLowerContext &context;
+  const clang::TargetInfo &Target;
+  CIRCXXABI &CXXABI;
+
+  // This should not be moved earlier, since its initialization depends on some
+  // of the previous reference members being already initialized
+  const ABIInfo &TheABIInfo;
+
+  // Used to build types and other MLIR operations.
+  MLIRContext *mlirContext;
+
+  ::cir::CIRDataLayout DL;
+
+  const ABIInfo &getABIInfo() const { return TheABIInfo; }
+
+public:
+  LowerTypes(LowerModule &LM, StringRef DLString);
+  ~LowerTypes() = default;
+
+  const ::cir::CIRDataLayout &getDataLayout() const { return DL; }
+  LowerModule &getLM() const { return LM; }
+  CIRCXXABI &getCXXABI() const { return CXXABI; }
+  CIRLowerContext &getContext() { return context; }
+  const clang::TargetInfo &getTarget() const { return Target; }
+  MLIRContext *getMLIRContext() { return mlirContext; }
+
+  /// Convert clang calling convention to LLVM callilng convention.
+  unsigned clangCallConvToLLVMCallConv(clang::CallingConv CC);
+
+  /// Free functions are functions that are compatible with an ordinary
+  /// C function pointer type.
+  /// FIXME(cir): Does the "free function" concept makes sense here?
+  const LowerFunctionInfo &arrangeFunctionDeclaration(FuncOp fnOp);
+  const LowerFunctionInfo &arrangeFreeFunctionCall(const OperandRange args,
+                                                   const FuncType fnType,
+                                                   bool chainCall);
+  const LowerFunctionInfo &arrangeFreeFunctionType(FuncType FTy);
+
+  const LowerFunctionInfo &arrangeGlobalDeclaration(FuncOp fnOp);
+
+  /// Arrange the argument and result information for an abstract value
+  /// of a given function type.  This is the method which all of the
+  /// above functions ultimately defer to.
+  ///
+  /// \param resultType - ABI-agnostic CIR result type.
+  /// \param opts - Options to control the arrangement.
+  /// \param argTypes - ABI-agnostic CIR argument types.
+  /// \param required - Information about required/optional arguments.
+  const LowerFunctionInfo &arrangeLLVMFunctionInfo(Type resultType,
+                                                   ::cir::FnInfoOpts opts,
+                                                   ArrayRef<Type> argTypes,
+                                                   RequiredArgs required);
+
+  /// Return the ABI-specific function type for a CIR function type.
+  FuncType getFunctionType(const LowerFunctionInfo &FI);
+
+  /// Convert a CIR type to its ABI-specific default form.
+  Type convertType(Type T);
+};
+
+} // namespace cir
+} // namespace mlir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_LOWERTYPES_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/RecordLayoutBuilder.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/RecordLayoutBuilder.cpp
new file mode 100644
index 000000000000..ea8ef6f28144
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/RecordLayoutBuilder.cpp
@@ -0,0 +1,638 @@
+//=== RecordLayoutBuilder.cpp - Helper class for building record layouts ---==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/AST/CGRecordLayoutBuilder.cpp. The
+// queries are adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRLowerContext.h"
+#include "CIRRecordLayout.h"
+#include "mlir/IR/Types.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+
+using namespace mlir;
+using namespace mlir::cir;
+
+namespace {
+
+//===-----------------------------------------------------------------------==//
+// EmptySubobjectMap Implementation
+//===----------------------------------------------------------------------===//
+
+/// Keeps track of which empty subobjects exist at different offsets while
+/// laying out a C++ class.
+class EmptySubobjectMap {
+  [[maybe_unused]] const CIRLowerContext &Context;
+  uint64_t CharWidth;
+
+  /// The class whose empty entries we're keeping track of.
+  const StructType Class;
+
+  /// The highest offset known to contain an empty base subobject.
+  clang::CharUnits MaxEmptyClassOffset;
+
+  /// Compute the size of the largest base or member subobject that is empty.
+  void ComputeEmptySubobjectSizes();
+
+public:
+  /// This holds the size of the largest empty subobject (either a base
+  /// or a member). Will be zero if the record being built doesn't contain
+  /// any empty classes.
+  clang::CharUnits SizeOfLargestEmptySubobject;
+
+  EmptySubobjectMap(const CIRLowerContext &Context, const StructType Class)
+      : Context(Context), CharWidth(Context.getCharWidth()), Class(Class) {
+    ComputeEmptySubobjectSizes();
+  }
+
+  /// Return whether a field can be placed at the given offset.
+  bool canPlaceFieldAtOffset(const Type Ty, clang::CharUnits Offset);
+};
+
+void EmptySubobjectMap::ComputeEmptySubobjectSizes() {
+  // Check the bases.
+  assert(!::cir::MissingFeatures::getCXXRecordBases());
+
+  // Check the fields.
+  for (const auto FT : Class.getMembers()) {
+    assert(!::cir::MissingFeatures::qualifiedTypes());
+    const auto RT = dyn_cast<StructType>(FT);
+
+    // We only care about record types.
+    if (!RT)
+      continue;
+
+    // TODO(cir): Handle nested record types.
+    llvm_unreachable("NYI");
+  }
+}
+
+bool EmptySubobjectMap::canPlaceFieldAtOffset(const Type Ty,
+                                              clang::CharUnits Offset) {
+  llvm_unreachable("NYI");
+}
+
+//===-----------------------------------------------------------------------==//
+// ItaniumRecordLayoutBuilder Implementation
+//===----------------------------------------------------------------------===//
+
+class ItaniumRecordLayoutBuilder {
+protected:
+  // FIXME(cir):  Remove this and make the appropriate fields public.
+  friend class mlir::cir::CIRLowerContext;
+
+  const CIRLowerContext &Context;
+
+  EmptySubobjectMap *EmptySubobjects;
+
+  /// Size - The current size of the record layout.
+  uint64_t Size;
+
+  /// Alignment - The current alignment of the record layout.
+  clang::CharUnits Alignment;
+
+  /// PreferredAlignment - The preferred alignment of the record layout.
+  clang::CharUnits PreferredAlignment;
+
+  /// The alignment if attribute packed is not used.
+  clang::CharUnits UnpackedAlignment;
+
+  /// \brief The maximum of the alignments of top-level members.
+  clang::CharUnits UnadjustedAlignment;
+
+  SmallVector<uint64_t, 16> FieldOffsets;
+
+  /// Whether the external AST source has provided a layout for this
+  /// record.
+  unsigned UseExternalLayout : 1;
+
+  /// Whether we need to infer alignment, even when we have an
+  /// externally-provided layout.
+  unsigned InferAlignment : 1;
+
+  /// Packed - Whether the record is packed or not.
+  unsigned Packed : 1;
+
+  unsigned IsUnion : 1;
+
+  unsigned IsMac68kAlign : 1;
+
+  unsigned IsNaturalAlign : 1;
+
+  unsigned IsMsStruct : 1;
+
+  /// UnfilledBitsInLastUnit - If the last field laid out was a bitfield,
+  /// this contains the number of bits in the last unit that can be used for
+  /// an adjacent bitfield if necessary.  The unit in question is usually
+  /// a byte, but larger units are used if IsMsStruct.
+  unsigned char UnfilledBitsInLastUnit;
+
+  /// LastBitfieldStorageUnitSize - If IsMsStruct, represents the size of the
+  /// storage unit of the previous field if it was a bitfield.
+  unsigned char LastBitfieldStorageUnitSize;
+
+  /// MaxFieldAlignment - The maximum allowed field alignment. This is set by
+  /// #pragma pack.
+  clang::CharUnits MaxFieldAlignment;
+
+  /// DataSize - The data size of the record being laid out.
+  uint64_t DataSize;
+
+  clang::CharUnits NonVirtualSize;
+  clang::CharUnits NonVirtualAlignment;
+  clang::CharUnits PreferredNVAlignment;
+
+  /// If we've laid out a field but not included its tail padding in Size yet,
+  /// this is the size up to the end of that field.
+  clang::CharUnits PaddedFieldSize;
+
+  /// The primary base class (if one exists) of the class we're laying out.
+  const StructType PrimaryBase;
+
+  /// Whether the primary base of the class we're laying out is virtual.
+  bool PrimaryBaseIsVirtual;
+
+  /// Whether the class provides its own vtable/vftbl pointer, as opposed to
+  /// inheriting one from a primary base class.
+  bool HasOwnVFPtr;
+
+  /// the flag of field offset changing due to packed attribute.
+  bool HasPackedField;
+
+  /// An auxiliary field used for AIX. When there are OverlappingEmptyFields
+  /// existing in the aggregate, the flag shows if the following first non-empty
+  /// or empty-but-non-overlapping field has been handled, if any.
+  bool HandledFirstNonOverlappingEmptyField;
+
+public:
+  ItaniumRecordLayoutBuilder(const CIRLowerContext &Context,
+                             EmptySubobjectMap *EmptySubobjects)
+      : Context(Context), EmptySubobjects(EmptySubobjects), Size(0),
+        Alignment(clang::CharUnits::One()),
+        PreferredAlignment(clang::CharUnits::One()),
+        UnpackedAlignment(clang::CharUnits::One()),
+        UnadjustedAlignment(clang::CharUnits::One()), UseExternalLayout(false),
+        InferAlignment(false), Packed(false), IsUnion(false),
+        IsMac68kAlign(false),
+        IsNaturalAlign(!Context.getTargetInfo().getTriple().isOSAIX()),
+        IsMsStruct(false), UnfilledBitsInLastUnit(0),
+        LastBitfieldStorageUnitSize(0),
+        MaxFieldAlignment(clang::CharUnits::Zero()), DataSize(0),
+        NonVirtualSize(clang::CharUnits::Zero()),
+        NonVirtualAlignment(clang::CharUnits::One()),
+        PreferredNVAlignment(clang::CharUnits::One()),
+        PaddedFieldSize(clang::CharUnits::Zero()), PrimaryBaseIsVirtual(false),
+        HasOwnVFPtr(false), HasPackedField(false),
+        HandledFirstNonOverlappingEmptyField(false) {}
+
+  void layout(const StructType D);
+
+  void layoutFields(const StructType D);
+  void layoutField(const Type Ty, bool InsertExtraPadding);
+
+  void UpdateAlignment(clang::CharUnits NewAlignment,
+                       clang::CharUnits UnpackedNewAlignment,
+                       clang::CharUnits PreferredAlignment);
+
+  void checkFieldPadding(uint64_t Offset, uint64_t UnpaddedOffset,
+                         uint64_t UnpackedOffset, unsigned UnpackedAlign,
+                         bool isPacked, const Type Ty);
+
+  clang::CharUnits getSize() const {
+    assert(Size % Context.getCharWidth() == 0);
+    return Context.toCharUnitsFromBits(Size);
+  }
+  uint64_t getSizeInBits() const { return Size; }
+
+  void setSize(clang::CharUnits NewSize) { Size = Context.toBits(NewSize); }
+  void setSize(uint64_t NewSize) { Size = NewSize; }
+
+  clang::CharUnits getDataSize() const {
+    assert(DataSize % Context.getCharWidth() == 0);
+    return Context.toCharUnitsFromBits(DataSize);
+  }
+
+  /// Initialize record layout for the given record decl.
+  void initializeLayout(const Type Ty);
+
+  uint64_t getDataSizeInBits() const { return DataSize; }
+
+  void setDataSize(clang::CharUnits NewSize) {
+    DataSize = Context.toBits(NewSize);
+  }
+  void setDataSize(uint64_t NewSize) { DataSize = NewSize; }
+};
+
+void ItaniumRecordLayoutBuilder::layout(const StructType RT) {
+  initializeLayout(RT);
+
+  // Lay out the vtable and the non-virtual bases.
+  assert(!::cir::MissingFeatures::isCXXRecordDecl() &&
+         !::cir::MissingFeatures::CXXRecordIsDynamicClass());
+
+  layoutFields(RT);
+
+  // FIXME(cir): Handle virtual-related layouts.
+  assert(!::cir::MissingFeatures::getCXXRecordBases());
+
+  assert(!::cir::MissingFeatures::itaniumRecordLayoutBuilderFinishLayout());
+}
+
+void ItaniumRecordLayoutBuilder::initializeLayout(const mlir::Type Ty) {
+  if (const auto RT = dyn_cast<StructType>(Ty)) {
+    IsUnion = RT.isUnion();
+    assert(!::cir::MissingFeatures::recordDeclIsMSStruct());
+  }
+
+  assert(!::cir::MissingFeatures::recordDeclIsPacked());
+
+  // Honor the default struct packing maximum alignment flag.
+  if (unsigned DefaultMaxFieldAlignment = Context.getLangOpts().PackStruct) {
+    llvm_unreachable("NYI");
+  }
+
+  // mac68k alignment supersedes maximum field alignment and attribute aligned,
+  // and forces all structures to have 2-byte alignment. The IBM docs on it
+  // allude to additional (more complicated) semantics, especially with regard
+  // to bit-fields, but gcc appears not to follow that.
+  if (::cir::MissingFeatures::declHasAlignMac68kAttr()) {
+    llvm_unreachable("NYI");
+  } else {
+    if (::cir::MissingFeatures::declHasAlignNaturalAttr())
+      llvm_unreachable("NYI");
+
+    if (::cir::MissingFeatures::declHasMaxFieldAlignmentAttr())
+      llvm_unreachable("NYI");
+
+    if (::cir::MissingFeatures::declGetMaxAlignment())
+      llvm_unreachable("NYI");
+  }
+
+  HandledFirstNonOverlappingEmptyField =
+      !Context.getTargetInfo().defaultsToAIXPowerAlignment() || IsNaturalAlign;
+
+  // If there is an external AST source, ask it for the various offsets.
+  if (const auto RT = dyn_cast<StructType>(Ty)) {
+    if (::cir::MissingFeatures::astContextGetExternalSource()) {
+      llvm_unreachable("NYI");
+    }
+  }
+}
+
+void ItaniumRecordLayoutBuilder::layoutField(const Type D,
+                                             bool InsertExtraPadding) {
+  // auto FieldClass = D.dyn_cast<StructType>();
+  assert(!::cir::MissingFeatures::fieldDeclIsPotentiallyOverlapping() &&
+         !::cir::MissingFeatures::CXXRecordDeclIsEmptyCXX11());
+  bool IsOverlappingEmptyField = false; // FIXME(cir): Needs more features.
+
+  clang::CharUnits FieldOffset = (IsUnion || IsOverlappingEmptyField)
+                                     ? clang::CharUnits::Zero()
+                                     : getDataSize();
+
+  const bool DefaultsToAIXPowerAlignment =
+      Context.getTargetInfo().defaultsToAIXPowerAlignment();
+  bool FoundFirstNonOverlappingEmptyFieldForAIX = false;
+  if (DefaultsToAIXPowerAlignment && !HandledFirstNonOverlappingEmptyField) {
+    llvm_unreachable("NYI");
+  }
+
+  assert(!::cir::MissingFeatures::fieldDeclIsBitfield());
+
+  uint64_t UnpaddedFieldOffset = getDataSizeInBits() - UnfilledBitsInLastUnit;
+  // Reset the unfilled bits.
+  UnfilledBitsInLastUnit = 0;
+  LastBitfieldStorageUnitSize = 0;
+
+  llvm::Triple Target = Context.getTargetInfo().getTriple();
+
+  clang::AlignRequirementKind AlignRequirement =
+      clang::AlignRequirementKind::None;
+  clang::CharUnits FieldSize;
+  clang::CharUnits FieldAlign;
+  // The amount of this class's dsize occupied by the field.
+  // This is equal to FieldSize unless we're permitted to pack
+  // into the field's tail padding.
+  clang::CharUnits EffectiveFieldSize;
+
+  auto setDeclInfo = [&](bool IsIncompleteArrayType) {
+    auto TI = Context.getTypeInfoInChars(D);
+    FieldAlign = TI.Align;
+    // Flexible array members don't have any size, but they have to be
+    // aligned appropriately for their element type.
+    EffectiveFieldSize = FieldSize =
+        IsIncompleteArrayType ? clang::CharUnits::Zero() : TI.Width;
+    AlignRequirement = TI.AlignRequirement;
+  };
+
+  if (isa<ArrayType>(D) && cast<ArrayType>(D).getSize() == 0) {
+    llvm_unreachable("NYI");
+  } else {
+    setDeclInfo(false /* IsIncompleteArrayType */);
+
+    if (::cir::MissingFeatures::fieldDeclIsPotentiallyOverlapping())
+      llvm_unreachable("NYI");
+
+    if (IsMsStruct)
+      llvm_unreachable("NYI");
+  }
+
+  assert(!::cir::MissingFeatures::recordDeclIsPacked() &&
+         !::cir::MissingFeatures::CXXRecordDeclIsPOD());
+  bool FieldPacked = false; // FIXME(cir): Needs more features.
+
+  // When used as part of a typedef, or together with a 'packed' attribute, the
+  // 'aligned' attribute can be used to decrease alignment. In that case, it
+  // overrides any computed alignment we have, and there is no need to upgrade
+  // the alignment.
+  auto alignedAttrCanDecreaseAIXAlignment = [AlignRequirement, FieldPacked] {
+    // Enum alignment sources can be safely ignored here, because this only
+    // helps decide whether we need the AIX alignment upgrade, which only
+    // applies to floating-point types.
+    return AlignRequirement == clang::AlignRequirementKind::RequiredByTypedef ||
+           (AlignRequirement == clang::AlignRequirementKind::RequiredByRecord &&
+            FieldPacked);
+  };
+
+  // The AIX `power` alignment rules apply the natural alignment of the
+  // "first member" if it is of a floating-point data type (or is an aggregate
+  // whose recursively "first" member or element is such a type). The alignment
+  // associated with these types for subsequent members use an alignment value
+  // where the floating-point data type is considered to have 4-byte alignment.
+  //
+  // For the purposes of the foregoing: vtable pointers, non-empty base classes,
+  // and zero-width bit-fields count as prior members; members of empty class
+  // types marked `no_unique_address` are not considered to be prior members.
+  clang::CharUnits PreferredAlign = FieldAlign;
+  if (DefaultsToAIXPowerAlignment && !alignedAttrCanDecreaseAIXAlignment() &&
+      (FoundFirstNonOverlappingEmptyFieldForAIX || IsNaturalAlign)) {
+    llvm_unreachable("NYI");
+  }
+
+  // The align if the field is not packed. This is to check if the attribute
+  // was unnecessary (-Wpacked).
+  clang::CharUnits UnpackedFieldAlign = FieldAlign;
+  clang::CharUnits PackedFieldAlign = clang::CharUnits::One();
+  clang::CharUnits UnpackedFieldOffset = FieldOffset;
+  // clang::CharUnits OriginalFieldAlign = UnpackedFieldAlign;
+
+  assert(!::cir::MissingFeatures::fieldDeclGetMaxFieldAlignment());
+  clang::CharUnits MaxAlignmentInChars = clang::CharUnits::Zero();
+  PackedFieldAlign = std::max(PackedFieldAlign, MaxAlignmentInChars);
+  PreferredAlign = std::max(PreferredAlign, MaxAlignmentInChars);
+  UnpackedFieldAlign = std::max(UnpackedFieldAlign, MaxAlignmentInChars);
+
+  // The maximum field alignment overrides the aligned attribute.
+  if (!MaxFieldAlignment.isZero()) {
+    llvm_unreachable("NYI");
+  }
+
+  if (!FieldPacked)
+    FieldAlign = UnpackedFieldAlign;
+  if (DefaultsToAIXPowerAlignment)
+    llvm_unreachable("NYI");
+  if (FieldPacked) {
+    llvm_unreachable("NYI");
+  }
+
+  clang::CharUnits AlignTo =
+      !DefaultsToAIXPowerAlignment ? FieldAlign : PreferredAlign;
+  // Round up the current record size to the field's alignment boundary.
+  FieldOffset = FieldOffset.alignTo(AlignTo);
+  UnpackedFieldOffset = UnpackedFieldOffset.alignTo(UnpackedFieldAlign);
+
+  if (UseExternalLayout) {
+    llvm_unreachable("NYI");
+  } else {
+    if (!IsUnion && EmptySubobjects) {
+      // Check if we can place the field at this offset.
+      while (/*!EmptySubobjects->CanPlaceFieldAtOffset(D, FieldOffset)*/
+             false) {
+        llvm_unreachable("NYI");
+      }
+    }
+  }
+
+  // Place this field at the current location.
+  FieldOffsets.push_back(Context.toBits(FieldOffset));
+
+  if (!UseExternalLayout)
+    checkFieldPadding(Context.toBits(FieldOffset), UnpaddedFieldOffset,
+                      Context.toBits(UnpackedFieldOffset),
+                      Context.toBits(UnpackedFieldAlign), FieldPacked, D);
+
+  if (InsertExtraPadding) {
+    llvm_unreachable("NYI");
+  }
+
+  // Reserve space for this field.
+  if (!IsOverlappingEmptyField) {
+    // uint64_t EffectiveFieldSizeInBits = Context.toBits(EffectiveFieldSize);
+    if (IsUnion)
+      llvm_unreachable("NYI");
+    else
+      setDataSize(FieldOffset + EffectiveFieldSize);
+
+    PaddedFieldSize = std::max(PaddedFieldSize, FieldOffset + FieldSize);
+    setSize(std::max(getSizeInBits(), getDataSizeInBits()));
+  } else {
+    llvm_unreachable("NYI");
+  }
+
+  // Remember max struct/class ABI-specified alignment.
+  UnadjustedAlignment = std::max(UnadjustedAlignment, FieldAlign);
+  UpdateAlignment(FieldAlign, UnpackedFieldAlign, PreferredAlign);
+
+  // For checking the alignment of inner fields against
+  // the alignment of its parent record.
+  // FIXME(cir): We need to track the parent record of the current type being
+  // laid out. A regular mlir::Type has not way of doing this. In fact, we will
+  // likely need an external abstraction, as I don't think this is possible with
+  // just the field type.
+  assert(!::cir::MissingFeatures::fieldDeclAbstraction());
+
+  if (Packed && !FieldPacked && PackedFieldAlign < FieldAlign)
+    llvm_unreachable("NYI");
+}
+
+void ItaniumRecordLayoutBuilder::layoutFields(const StructType D) {
+  // Layout each field, for now, just sequentially, respecting alignment.  In
+  // the future, this will need to be tweakable by targets.
+  assert(!::cir::MissingFeatures::recordDeclMayInsertExtraPadding() &&
+         !Context.getLangOpts().SanitizeAddressFieldPadding);
+  bool InsertExtraPadding = false;
+  assert(!::cir::MissingFeatures::recordDeclHasFlexibleArrayMember());
+  bool HasFlexibleArrayMember = false;
+  for (const auto FT : D.getMembers()) {
+    layoutField(FT, InsertExtraPadding && (FT != D.getMembers().back() ||
+                                           !HasFlexibleArrayMember));
+  }
+}
+
+void ItaniumRecordLayoutBuilder::UpdateAlignment(
+    clang::CharUnits NewAlignment, clang::CharUnits UnpackedNewAlignment,
+    clang::CharUnits PreferredNewAlignment) {
+  // The alignment is not modified when using 'mac68k' alignment or when
+  // we have an externally-supplied layout that also provides overall alignment.
+  if (IsMac68kAlign || (UseExternalLayout && !InferAlignment))
+    return;
+
+  if (NewAlignment > Alignment) {
+    assert(llvm::isPowerOf2_64(NewAlignment.getQuantity()) &&
+           "Alignment not a power of 2");
+    Alignment = NewAlignment;
+  }
+
+  if (UnpackedNewAlignment > UnpackedAlignment) {
+    assert(llvm::isPowerOf2_64(UnpackedNewAlignment.getQuantity()) &&
+           "Alignment not a power of 2");
+    UnpackedAlignment = UnpackedNewAlignment;
+  }
+
+  if (PreferredNewAlignment > PreferredAlignment) {
+    assert(llvm::isPowerOf2_64(PreferredNewAlignment.getQuantity()) &&
+           "Alignment not a power of 2");
+    PreferredAlignment = PreferredNewAlignment;
+  }
+}
+
+void ItaniumRecordLayoutBuilder::checkFieldPadding(
+    uint64_t Offset, uint64_t UnpaddedOffset, uint64_t UnpackedOffset,
+    unsigned UnpackedAlign, bool isPacked, const Type Ty) {
+  // We let objc ivars without warning, objc interfaces generally are not used
+  // for padding tricks.
+  if (::cir::MissingFeatures::objCIvarDecls())
+    llvm_unreachable("NYI");
+
+  // FIXME(cir): Should the following be skiped in CIR?
+  // Don't warn about structs created without a SourceLocation.  This can
+  // be done by clients of the AST, such as codegen.
+
+  unsigned CharBitNum = Context.getTargetInfo().getCharWidth();
+
+  // Warn if padding was introduced to the struct/class.
+  if (!IsUnion && Offset > UnpaddedOffset) {
+    unsigned PadSize = Offset - UnpaddedOffset;
+    // bool InBits = true;
+    if (PadSize % CharBitNum == 0) {
+      PadSize = PadSize / CharBitNum;
+      // InBits = false;
+    }
+    assert(::cir::MissingFeatures::bitFieldPaddingDiagnostics());
+  }
+  if (isPacked && Offset != UnpackedOffset) {
+    HasPackedField = true;
+  }
+}
+
+//===-----------------------------------------------------------------------==//
+// Misc. Helper Functions
+//===----------------------------------------------------------------------===//
+
+bool isMsLayout(const CIRLowerContext &Context) {
+  return Context.getTargetInfo().getCXXABI().isMicrosoft();
+}
+
+/// Does the target C++ ABI require us to skip over the tail-padding
+/// of the given class (considering it as a base class) when allocating
+/// objects?
+static bool mustSkipTailPadding(clang::TargetCXXABI ABI, const StructType RD) {
+  assert(!::cir::MissingFeatures::recordDeclIsCXXDecl());
+  switch (ABI.getTailPaddingUseRules()) {
+  case clang::TargetCXXABI::AlwaysUseTailPadding:
+    return false;
+
+  case clang::TargetCXXABI::UseTailPaddingUnlessPOD03:
+    // http://itanium-cxx-abi.github.io/cxx-abi/abi.html#POD :
+    //   In general, a type is considered a POD for the purposes of
+    //   layout if it is a POD type (in the sense of ISO C++
+    //   [basic.types]). However, a POD-struct or POD-union (in the
+    //   sense of ISO C++ [class]) with a bitfield member whose
+    //   declared width is wider than the declared type of the
+    //   bitfield is not a POD for the purpose of layout.  Similarly,
+    //   an array type is not a POD for the purpose of layout if the
+    //   element type of the array is not a POD for the purpose of
+    //   layout.
+    //
+    //   Where references to the ISO C++ are made in this paragraph,
+    //   the Technical Corrigendum 1 version of the standard is
+    //   intended.
+    // FIXME(cir): This always returns true since we can't check if a CIR record
+    // is a POD type.
+    assert(!::cir::MissingFeatures::CXXRecordDeclIsPOD());
+    return true;
+
+  case clang::TargetCXXABI::UseTailPaddingUnlessPOD11:
+    // This is equivalent to RD->getTypeForDecl().isCXX11PODType(),
+    // but with a lot of abstraction penalty stripped off.  This does
+    // assume that these properties are set correctly even in C++98
+    // mode; fortunately, that is true because we want to assign
+    // consistently semantics to the type-traits intrinsics (or at
+    // least as many of them as possible).
+    llvm_unreachable("NYI");
+  }
+
+  llvm_unreachable("bad tail-padding use kind");
+}
+
+} // namespace
+
+/// Get or compute information about the layout of the specified record
+/// (struct/union/class), which indicates its size and field position
+/// information.
+const CIRRecordLayout &CIRLowerContext::getCIRRecordLayout(const Type D) const {
+  assert(isa<StructType>(D) && "Not a record type");
+  auto RT = dyn_cast<StructType>(D);
+
+  assert(RT.isComplete() && "Cannot get layout of forward declarations!");
+
+  // FIXME(cir): Use a more MLIR-based approach by using it's buitin data layout
+  // features, such as interfaces, cacheing, and the DLTI dialect.
+
+  const CIRRecordLayout *NewEntry = nullptr;
+
+  if (isMsLayout(*this)) {
+    llvm_unreachable("NYI");
+  } else {
+    // FIXME(cir): Add if-else separating C and C++ records.
+    assert(!::cir::MissingFeatures::isCXXRecordDecl());
+    EmptySubobjectMap EmptySubobjects(*this, RT);
+    ItaniumRecordLayoutBuilder Builder(*this, &EmptySubobjects);
+    Builder.layout(RT);
+
+    // In certain situations, we are allowed to lay out objects in the
+    // tail-padding of base classes.  This is ABI-dependent.
+    // FIXME: this should be stored in the record layout.
+    bool skipTailPadding = mustSkipTailPadding(getTargetInfo().getCXXABI(), RT);
+
+    // FIXME: This should be done in FinalizeLayout.
+    clang::CharUnits DataSize =
+        skipTailPadding ? Builder.getSize() : Builder.getDataSize();
+    clang::CharUnits NonVirtualSize =
+        skipTailPadding ? DataSize : Builder.NonVirtualSize;
+    assert(!::cir::MissingFeatures::CXXRecordIsDynamicClass());
+    // FIXME(cir): Whose responsible for freeing the allocation below?
+    NewEntry = new CIRRecordLayout(
+        *this, Builder.getSize(), Builder.Alignment, Builder.PreferredAlignment,
+        Builder.UnadjustedAlignment,
+        /*RequiredAlignment : used by MS-ABI)*/
+        Builder.Alignment, Builder.HasOwnVFPtr, /*RD->isDynamicClass()=*/false,
+        clang::CharUnits::fromQuantity(-1), DataSize, Builder.FieldOffsets,
+        NonVirtualSize, Builder.NonVirtualAlignment,
+        Builder.PreferredNVAlignment,
+        EmptySubobjects.SizeOfLargestEmptySubobject, Builder.PrimaryBase,
+        Builder.PrimaryBaseIsVirtual, nullptr, false, false);
+  }
+
+  // TODO(cir): Add option to dump the layouts.
+  assert(!::cir::MissingFeatures::cacheRecordLayouts());
+
+  return *NewEntry;
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetInfo.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetInfo.cpp
new file mode 100644
index 000000000000..2502f8f0dfcb
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetInfo.cpp
@@ -0,0 +1,12 @@
+#include "TargetLoweringInfo.h"
+
+namespace mlir {
+namespace cir {
+
+TargetLoweringInfo::TargetLoweringInfo(std::unique_ptr<ABIInfo> Info)
+    : Info(std::move(Info)) {}
+
+TargetLoweringInfo::~TargetLoweringInfo() = default;
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetInfo.h
new file mode 100644
index 000000000000..4350458eeed2
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetInfo.h
@@ -0,0 +1,39 @@
+//===---- TargetInfo.h - Encapsulate target details -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics clang/lib/CodeGen/TargetInfo.h. The queries are
+// adapted to operate on the CIR dialect, however.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETINFO_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETINFO_H
+
+#include "LowerModule.h"
+#include "TargetLoweringInfo.h"
+#include "clang/CIR/Target/AArch64.h"
+#include "clang/CIR/Target/x86.h"
+
+namespace mlir {
+namespace cir {
+
+std::unique_ptr<TargetLoweringInfo>
+createX86_64TargetLoweringInfo(LowerModule &CGM,
+                               ::cir::X86AVXABILevel AVXLevel);
+
+std::unique_ptr<TargetLoweringInfo>
+createAArch64TargetLoweringInfo(LowerModule &CGM,
+                                ::cir::AArch64ABIKind AVXLevel);
+
+std::unique_ptr<TargetLoweringInfo>
+createSPIRVTargetLoweringInfo(LowerModule &CGM);
+
+} // namespace cir
+} // namespace mlir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETINFO_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.cpp
new file mode 100644
index 000000000000..ee670e1add4f
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.cpp
@@ -0,0 +1,12 @@
+//===---- TargetLoweringInfo.cpp - Encapsulate target details ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics the TargetCodeGenInfo class from the file
+// clang/lib/CodeGen/TargetInfo.cpp.
+//
+//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h
new file mode 100644
index 000000000000..4be2db10c1dd
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetLoweringInfo.h
@@ -0,0 +1,42 @@
+//===---- TargetLoweringInfo.h - Encapsulate target details -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file partially mimics the TargetCodeGenInfo class from the file
+// clang/lib/CodeGen/TargetInfo.h. This particular class was isolated in this
+// file due to build errors when trying to include the entire TargetInfo.h file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETLOWERINGINFO_H
+#define LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETLOWERINGINFO_H
+
+#include "ABIInfo.h"
+#include <memory>
+
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+
+namespace mlir {
+namespace cir {
+
+class TargetLoweringInfo {
+private:
+  std::unique_ptr<ABIInfo> Info;
+
+public:
+  TargetLoweringInfo(std::unique_ptr<ABIInfo> Info);
+  virtual ~TargetLoweringInfo();
+
+  const ABIInfo &getABIInfo() const { return *Info; }
+  virtual unsigned getTargetAddrSpaceFromCIRAddrSpace(
+      mlir::cir::AddressSpaceAttr addressSpaceAttr) const = 0;
+};
+
+} // namespace cir
+} // namespace mlir
+
+#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETLOWERINGINFO_H
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AArch64.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AArch64.cpp
new file mode 100644
index 000000000000..a3406b722c41
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AArch64.cpp
@@ -0,0 +1,140 @@
+//===- AArch64.cpp --------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CIR/Target/AArch64.h"
+#include "ABIInfoImpl.h"
+#include "LowerFunctionInfo.h"
+#include "LowerTypes.h"
+#include "TargetInfo.h"
+#include "TargetLoweringInfo.h"
+#include "clang/CIR/ABIArgInfo.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using AArch64ABIKind = ::cir::AArch64ABIKind;
+using ABIArgInfo = ::cir::ABIArgInfo;
+using MissingFeature = ::cir::MissingFeatures;
+
+namespace mlir {
+namespace cir {
+
+//===----------------------------------------------------------------------===//
+// AArch64 ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class AArch64ABIInfo : public ABIInfo {
+  AArch64ABIKind Kind;
+
+public:
+  AArch64ABIInfo(LowerTypes &CGT, AArch64ABIKind Kind)
+      : ABIInfo(CGT), Kind(Kind) {}
+
+private:
+  AArch64ABIKind getABIKind() const { return Kind; }
+  bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; }
+
+  ABIArgInfo classifyReturnType(Type RetTy, bool IsVariadic) const;
+  ABIArgInfo classifyArgumentType(Type RetTy, bool IsVariadic,
+                                  unsigned CallingConvention) const;
+
+  void computeInfo(LowerFunctionInfo &FI) const override {
+    if (!::mlir::cir::classifyReturnType(getCXXABI(), FI, *this))
+      FI.getReturnInfo() =
+          classifyReturnType(FI.getReturnType(), FI.isVariadic());
+
+    for (auto &it : FI.arguments())
+      it.info = classifyArgumentType(it.type, FI.isVariadic(),
+                                     FI.getCallingConvention());
+  }
+};
+
+class AArch64TargetLoweringInfo : public TargetLoweringInfo {
+public:
+  AArch64TargetLoweringInfo(LowerTypes &LT, AArch64ABIKind Kind)
+      : TargetLoweringInfo(std::make_unique<AArch64ABIInfo>(LT, Kind)) {
+    assert(!MissingFeature::swift());
+  }
+
+  unsigned getTargetAddrSpaceFromCIRAddrSpace(
+      mlir::cir::AddressSpaceAttr addressSpaceAttr) const override {
+    using Kind = mlir::cir::AddressSpaceAttr::Kind;
+    switch (addressSpaceAttr.getValue()) {
+    case Kind::offload_private:
+    case Kind::offload_local:
+    case Kind::offload_global:
+    case Kind::offload_constant:
+    case Kind::offload_generic:
+      return 0;
+    default:
+      llvm_unreachable("Unknown CIR address space for this target");
+    }
+  }
+};
+
+} // namespace
+
+ABIArgInfo AArch64ABIInfo::classifyReturnType(Type RetTy,
+                                              bool IsVariadic) const {
+  if (isa<VoidType>(RetTy))
+    return ABIArgInfo::getIgnore();
+
+  if (const auto _ = dyn_cast<VectorType>(RetTy)) {
+    llvm_unreachable("NYI");
+  }
+
+  // Large vector types should be returned via memory.
+  if (isa<VectorType>(RetTy) && getContext().getTypeSize(RetTy) > 128)
+    llvm_unreachable("NYI");
+
+  if (!isAggregateTypeForABI(RetTy)) {
+    // NOTE(cir): Skip enum handling.
+
+    if (MissingFeature::fixedSizeIntType())
+      llvm_unreachable("NYI");
+
+    return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS()
+                ? ABIArgInfo::getExtend(RetTy)
+                : ABIArgInfo::getDirect());
+  }
+
+  llvm_unreachable("NYI");
+}
+
+ABIArgInfo
+AArch64ABIInfo::classifyArgumentType(Type Ty, bool IsVariadic,
+                                     unsigned CallingConvention) const {
+  Ty = useFirstFieldIfTransparentUnion(Ty);
+
+  // TODO(cir): check for illegal vector types.
+  if (MissingFeature::vectorType())
+    llvm_unreachable("NYI");
+
+  if (!isAggregateTypeForABI(Ty)) {
+    // NOTE(cir): Enum is IntType in CIR. Skip enum handling here.
+
+    if (MissingFeature::fixedSizeIntType())
+      llvm_unreachable("NYI");
+
+    return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
+                ? ABIArgInfo::getExtend(Ty)
+                : ABIArgInfo::getDirect());
+  }
+
+  llvm_unreachable("NYI");
+}
+
+std::unique_ptr<TargetLoweringInfo>
+createAArch64TargetLoweringInfo(LowerModule &CGM, AArch64ABIKind Kind) {
+  return std::make_unique<AArch64TargetLoweringInfo>(CGM.getTypes(), Kind);
+}
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareAArch64CXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareAArch64CXXABI.cpp
new file mode 100644
index 000000000000..7d43000877b7
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareAArch64CXXABI.cpp
@@ -0,0 +1,362 @@
+//====- LoweringPrepareArm64CXXABI.cpp - Arm64 ABI specific code -----====//
+//
+// Part of the LLVM Project,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===------------------------------------------------------------------===//
+//
+// This file provides ARM64 C++ ABI specific code that is used during LLVMIR
+// lowering prepare.
+//
+//===------------------------------------------------------------------===//
+
+// TODO(cir): Refactor this to follow some level of codegen parity.
+
+#include "../LoweringPrepareItaniumCXXABI.h"
+#include "clang/AST/CharUnits.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+
+#include <assert.h>
+
+using cir::AArch64ABIKind;
+
+namespace {
+class LoweringPrepareAArch64CXXABI : public LoweringPrepareItaniumCXXABI {
+public:
+  LoweringPrepareAArch64CXXABI(AArch64ABIKind k) : Kind(k) {}
+  mlir::Value lowerVAArg(cir::CIRBaseBuilderTy &builder, mlir::cir::VAArgOp op,
+                         const cir::CIRDataLayout &datalayout) override;
+
+private:
+  AArch64ABIKind Kind;
+  mlir::Value lowerAAPCSVAArg(cir::CIRBaseBuilderTy &builder,
+                              mlir::cir::VAArgOp op,
+                              const cir::CIRDataLayout &datalayout);
+  bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; }
+  mlir::Value lowerMSVAArg(cir::CIRBaseBuilderTy &builder,
+                           mlir::cir::VAArgOp op,
+                           const cir::CIRDataLayout &datalayout) {
+    llvm_unreachable("MSVC ABI not supported yet");
+  }
+  mlir::Value lowerDarwinVAArg(cir::CIRBaseBuilderTy &builder,
+                               mlir::cir::VAArgOp op,
+                               const cir::CIRDataLayout &datalayout) {
+    llvm_unreachable("Darwin ABI not supported yet");
+  }
+};
+} // namespace
+
+cir::LoweringPrepareCXXABI *
+cir::LoweringPrepareCXXABI::createAArch64ABI(AArch64ABIKind k) {
+  return new LoweringPrepareAArch64CXXABI(k);
+}
+
+mlir::Value LoweringPrepareAArch64CXXABI::lowerAAPCSVAArg(
+    cir::CIRBaseBuilderTy &builder, mlir::cir::VAArgOp op,
+    const cir::CIRDataLayout &datalayout) {
+  auto loc = op->getLoc();
+  auto valist = op->getOperand(0);
+  auto opResTy = op.getType();
+  // front end should not produce non-scalar type of VAArgOp
+  bool isSupportedType =
+      mlir::isa<mlir::cir::IntType, mlir::cir::SingleType,
+                mlir::cir::PointerType, mlir::cir::BoolType,
+                mlir::cir::DoubleType, mlir::cir::ArrayType>(opResTy);
+
+  // Homogenous Aggregate type not supported and indirect arg
+  // passing not supported yet. And for these supported types,
+  // we should not have alignment greater than 8 problem.
+  assert(isSupportedType);
+  assert(!cir::MissingFeatures::classifyArgumentTypeForAArch64());
+  // indirect arg passing would expect one more level of pointer dereference.
+  assert(!cir::MissingFeatures::handleAArch64Indirect());
+  // false as a place holder for now, as we don't have a way to query
+  bool isIndirect = false;
+  assert(!cir::MissingFeatures::supportgetCoerceToTypeForAArch64());
+  // we don't convert to LLVM Type here as we are lowering to CIR here.
+  // so baseTy is the just type of the result of va_arg.
+  // but it depends on arg type indirectness and coercion defined by ABI.
+  auto baseTy = opResTy;
+
+  if (mlir::isa<mlir::cir::ArrayType>(baseTy)) {
+    llvm_unreachable("ArrayType VAArg loweing NYI");
+  }
+  // numRegs may not be 1 if ArrayType is supported.
+  unsigned numRegs = 1;
+
+  if (Kind == AArch64ABIKind::AAPCSSoft) {
+    llvm_unreachable("AAPCSSoft cir.var_arg lowering NYI");
+  }
+  bool IsFPR = mlir::cir::isAnyFloatingPointType(baseTy);
+
+  // The AArch64 va_list type and handling is specified in the Procedure Call
+  // Standard, section B.4:
+  //
+  // struct {
+  //   void *__stack;
+  //   void *__gr_top;
+  //   void *__vr_top;
+  //   int __gr_offs;
+  //   int __vr_offs;
+  // };
+  auto curInsertionP = builder.saveInsertionPoint();
+  auto currentBlock = builder.getInsertionBlock();
+  auto boolTy = builder.getBoolTy();
+
+  auto maybeRegBlock = builder.createBlock(builder.getBlock()->getParent());
+  auto inRegBlock = builder.createBlock(builder.getBlock()->getParent());
+  auto onStackBlock = builder.createBlock(builder.getBlock()->getParent());
+
+  //=======================================
+  // Find out where argument was passed
+  //=======================================
+
+  // If v/gr_offs >= 0 we're already using the stack for this type of
+  // argument. We don't want to keep updating regOffs (in case it overflows,
+  // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
+  // whatever they get).
+
+  assert(!cir::MissingFeatures::supportTySizeQueryForAArch64());
+  assert(!cir::MissingFeatures::supportTyAlignQueryForAArch64());
+  // One is just place holder for now, as we don't have a way to query
+  // type size and alignment.
+  clang::CharUnits tySize =
+      clang::CharUnits::fromQuantity(datalayout.getTypeStoreSize(opResTy));
+  clang::CharUnits tyAlign =
+      clang::CharUnits::fromQuantity(datalayout.getAlignment(opResTy, true));
+
+  // indirectness, type size and type alignment all
+  // decide regSize, but they are all ABI defined
+  // thus need ABI lowering query system.
+  assert(!cir::MissingFeatures::handleAArch64Indirect());
+  int regSize = isIndirect ? 8 : tySize.getQuantity();
+  int regTopIndex;
+  mlir::Value regOffsP;
+  mlir::cir::LoadOp regOffs;
+
+  builder.restoreInsertionPoint(curInsertionP);
+  // 3 is the field number of __gr_offs, 4 is the field number of __vr_offs
+  if (!IsFPR) {
+    regOffsP = builder.createGetMemberOp(loc, valist, "gr_offs", 3);
+    regOffs = builder.create<mlir::cir::LoadOp>(loc, regOffsP);
+    regTopIndex = 1;
+    regSize = llvm::alignTo(regSize, 8);
+  } else {
+    regOffsP = builder.createGetMemberOp(loc, valist, "vr_offs", 4);
+    regOffs = builder.create<mlir::cir::LoadOp>(loc, regOffsP);
+    regTopIndex = 2;
+    regSize = 16 * numRegs;
+  }
+
+  //=======================================
+  // Find out where argument was passed
+  //=======================================
+
+  // If regOffs >= 0 we're already using the stack for this type of
+  // argument. We don't want to keep updating regOffs (in case it overflows,
+  // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
+  // whatever they get).
+  auto zeroValue = builder.create<mlir::cir::ConstantOp>(
+      loc, regOffs.getType(), mlir::cir::IntAttr::get(regOffs.getType(), 0));
+  auto usingStack = builder.create<mlir::cir::CmpOp>(
+      loc, boolTy, mlir::cir::CmpOpKind::ge, regOffs, zeroValue);
+  builder.create<mlir::cir::BrCondOp>(loc, usingStack, onStackBlock,
+                                      maybeRegBlock);
+
+  auto contBlock = currentBlock->splitBlock(op);
+  // now contBlock should be the block after onStackBlock in CFG.
+  // This is essential, considering the case where originally currentBlock
+  // was the only block in the region. By splitting the block, and added
+  // above blocks, really the rear block in the region should be contBlock,
+  // not onStackBlock, but splitBlock would just insert contBlock after
+  // currentBlock, so we need to move it.
+  auto contBlockIter = contBlock->getIterator();
+  contBlock->getParent()->getBlocks().remove(contBlockIter);
+  onStackBlock->getParent()->getBlocks().insertAfter(
+      mlir::Region::iterator(onStackBlock), contBlock);
+
+  // Otherwise, at least some kind of argument could go in these registers, the
+  // question is whether this particular type is too big.
+  builder.setInsertionPointToEnd(maybeRegBlock);
+
+  // Integer arguments may need to correct register alignment (for example a
+  // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
+  // align __gr_offs to calculate the potential address.
+  if (!IsFPR && !isIndirect && tyAlign.getQuantity() > 8) {
+    assert(!cir::MissingFeatures::handleAArch64Indirect());
+    assert(!cir::MissingFeatures::supportTyAlignQueryForAArch64());
+    llvm_unreachable("register alignment correction NYI");
+  }
+
+  // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
+  // The fact that this is done unconditionally reflects the fact that
+  // allocating an argument to the stack also uses up all the remaining
+  // registers of the appropriate kind.
+  auto regSizeValue = builder.create<mlir::cir::ConstantOp>(
+      loc, regOffs.getType(),
+      mlir::cir::IntAttr::get(regOffs.getType(), regSize));
+  auto newOffset = builder.create<mlir::cir::BinOp>(
+      loc, regOffs.getType(), mlir::cir::BinOpKind::Add, regOffs, regSizeValue);
+  builder.createStore(loc, newOffset, regOffsP);
+  // Now we're in a position to decide whether this argument really was in
+  // registers or not.
+  auto inRegs = builder.create<mlir::cir::CmpOp>(
+      loc, boolTy, mlir::cir::CmpOpKind::le, newOffset, zeroValue);
+  builder.create<mlir::cir::BrCondOp>(loc, inRegs, inRegBlock, onStackBlock);
+
+  //=======================================
+  // Argument was in registers
+  //=======================================
+  // Now we emit the code for if the argument was originally passed in
+  // registers. First start the appropriate block:
+  builder.setInsertionPointToEnd(inRegBlock);
+  auto regTopP = builder.createGetMemberOp(
+      loc, valist, IsFPR ? "vr_top" : "gr_top", regTopIndex);
+  auto regTop = builder.create<mlir::cir::LoadOp>(loc, regTopP);
+  auto i8Ty = mlir::IntegerType::get(builder.getContext(), 8);
+  auto i8PtrTy = mlir::cir::PointerType::get(builder.getContext(), i8Ty);
+  auto castRegTop = builder.createBitcast(regTop, i8PtrTy);
+  auto resAsInt8P = builder.create<mlir::cir::PtrStrideOp>(
+      loc, castRegTop.getType(), castRegTop, regOffs);
+
+  if (isIndirect) {
+    assert(!cir::MissingFeatures::handleAArch64Indirect());
+    llvm_unreachable("indirect arg passing NYI");
+  }
+
+  // TODO: isHFA, numMembers and base should be query result from query
+  uint64_t numMembers = 0;
+  assert(!cir::MissingFeatures::supportisHomogeneousAggregateQueryForAArch64());
+  bool isHFA = false;
+  // though endianess can be known from datalayout, it might need an unified
+  // ABI lowering query system to answer the question.
+  assert(!cir::MissingFeatures::supportisEndianQueryForAArch64());
+  bool isBigEndian = datalayout.isBigEndian();
+  assert(!cir::MissingFeatures::supportisAggregateTypeForABIAArch64());
+  // TODO: isAggregateTypeForABI should be query result from ABI info
+  bool isAggregateTypeForABI = false;
+  if (isHFA && numMembers > 1) {
+    // Homogeneous aggregates passed in registers will have their elements split
+    // and stored 16-bytes apart regardless of size (they're notionally in qN,
+    // qN+1, ...). We reload and store into a temporary local variable
+    // contiguously.
+    assert(!isIndirect && "Homogeneous aggregates should be passed directly");
+    llvm_unreachable("Homogeneous aggregates NYI");
+  } else {
+    assert(!cir::MissingFeatures::supportTyAlignQueryForAArch64());
+    // TODO: slotSize should be query result about alignment.
+    clang::CharUnits slotSize = clang::CharUnits::fromQuantity(8);
+    if (isBigEndian && !isIndirect && (isHFA || isAggregateTypeForABI) &&
+        tySize < slotSize) {
+      clang::CharUnits offset = slotSize - tySize;
+      auto offsetConst = builder.create<mlir::cir::ConstantOp>(
+          loc, regOffs.getType(),
+          mlir::cir::IntAttr::get(regOffs.getType(), offset.getQuantity()));
+
+      resAsInt8P = builder.create<mlir::cir::PtrStrideOp>(
+          loc, castRegTop.getType(), resAsInt8P, offsetConst);
+    }
+  }
+
+  auto resAsVoidP = builder.createBitcast(resAsInt8P, regTop.getType());
+
+  // On big-endian platforms, the value will be right-aligned in its stack slot.
+  // and we also need to think about other ABI lowering concerns listed below.
+  assert(!cir::MissingFeatures::handleBigEndian());
+  assert(!cir::MissingFeatures::handleAArch64Indirect());
+  assert(!cir::MissingFeatures::supportisHomogeneousAggregateQueryForAArch64());
+  assert(!cir::MissingFeatures::supportTySizeQueryForAArch64());
+  assert(!cir::MissingFeatures::supportTyAlignQueryForAArch64());
+
+  builder.create<mlir::cir::BrOp>(loc, mlir::ValueRange{resAsVoidP}, contBlock);
+
+  //=======================================
+  // Argument was on the stack
+  //=======================================
+  builder.setInsertionPointToEnd(onStackBlock);
+  auto stackP = builder.createGetMemberOp(loc, valist, "stack", 0);
+
+  auto onStackPtr = builder.create<mlir::cir::LoadOp>(loc, stackP);
+  auto ptrDiffTy =
+      mlir::cir::IntType::get(builder.getContext(), 64, /*signed=*/false);
+
+  assert(!cir::MissingFeatures::handleAArch64Indirect());
+  assert(!cir::MissingFeatures::supportTyAlignQueryForAArch64());
+  // Again, stack arguments may need realignment. In this case both integer and
+  // floating-point ones might be affected.
+  if (!isIndirect && tyAlign.getQuantity() > 8) {
+    // TODO: this algorithm requres casting from ptr type to int type, then
+    // back to ptr type thus needs careful handling. NYI now.
+    llvm_unreachable("alignment greater than 8 NYI");
+  }
+
+  // All stack slots are multiples of 8 bytes.
+  clang::CharUnits stackSlotSize = clang::CharUnits::fromQuantity(8);
+  clang::CharUnits stackSize;
+  if (isIndirect)
+    stackSize = stackSlotSize;
+  else
+    stackSize = tySize.alignTo(stackSlotSize);
+
+  // On big-endian platforms, the value will be right-aligned in its stack slot
+  // Also, the consideration involves type size and alignment, arg indirectness
+  // which are all ABI defined thus need ABI lowering query system.
+  // The implementation we have now supports most common cases which assumes
+  // no indirectness, no alignment greater than 8, and little endian.
+  assert(!cir::MissingFeatures::handleBigEndian());
+  assert(!cir::MissingFeatures::supportTySizeQueryForAArch64());
+
+  auto stackSizeC = builder.create<mlir::cir::ConstantOp>(
+      loc, ptrDiffTy,
+      mlir::cir::IntAttr::get(ptrDiffTy, stackSize.getQuantity()));
+  auto castStack = builder.createBitcast(onStackPtr, i8PtrTy);
+  // Write the new value of __stack for the next call to va_arg
+  auto newStackAsi8Ptr = builder.create<mlir::cir::PtrStrideOp>(
+      loc, castStack.getType(), castStack, stackSizeC);
+  auto newStack = builder.createBitcast(newStackAsi8Ptr, onStackPtr.getType());
+  builder.createStore(loc, newStack, stackP);
+
+  if (isBigEndian && !isAggregateTypeForABI && tySize < stackSlotSize) {
+    clang::CharUnits offset = stackSlotSize - tySize;
+    auto offsetConst = builder.create<mlir::cir::ConstantOp>(
+        loc, ptrDiffTy,
+        mlir::cir::IntAttr::get(ptrDiffTy, offset.getQuantity()));
+    auto offsetStackAsi8Ptr = builder.create<mlir::cir::PtrStrideOp>(
+        loc, castStack.getType(), castStack, offsetConst);
+    auto onStackPtrBE =
+        builder.createBitcast(offsetStackAsi8Ptr, onStackPtr.getType());
+    builder.create<mlir::cir::BrOp>(loc, mlir::ValueRange{onStackPtrBE},
+                                    contBlock);
+  } else {
+    builder.create<mlir::cir::BrOp>(loc, mlir::ValueRange{onStackPtr},
+                                    contBlock);
+  }
+
+  // generate additional instructions for end block
+  builder.setInsertionPoint(op);
+  contBlock->addArgument(onStackPtr.getType(), loc);
+  auto resP = contBlock->getArgument(0);
+  assert(mlir::isa<mlir::cir::PointerType>(resP.getType()));
+  auto opResPTy = mlir::cir::PointerType::get(builder.getContext(), opResTy);
+  auto castResP = builder.createBitcast(resP, opResPTy);
+  auto res = builder.create<mlir::cir::LoadOp>(loc, castResP);
+  // there would be another level of ptr dereference if indirect arg passing
+  assert(!cir::MissingFeatures::handleAArch64Indirect());
+  if (isIndirect) {
+    res = builder.create<mlir::cir::LoadOp>(loc, res.getResult());
+  }
+  return res.getResult();
+}
+
+mlir::Value
+LoweringPrepareAArch64CXXABI::lowerVAArg(cir::CIRBaseBuilderTy &builder,
+                                         mlir::cir::VAArgOp op,
+                                         const cir::CIRDataLayout &datalayout) {
+  return Kind == AArch64ABIKind::Win64 ? lowerMSVAArg(builder, op, datalayout)
+         : isDarwinPCS() ? lowerDarwinVAArg(builder, op, datalayout)
+                         : lowerAAPCSVAArg(builder, op, datalayout);
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareItaniumCXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareItaniumCXXABI.cpp
new file mode 100644
index 000000000000..9d79fb7ccb43
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareItaniumCXXABI.cpp
@@ -0,0 +1,173 @@
+//====- LoweringPrepareItaniumCXXABI.cpp - Itanium ABI specific code-----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with
+// LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--------------------------------------------------------------------===//
+//
+// This file provides Itanium C++ ABI specific code
+// that is used during LLVMIR lowering prepare.
+//
+//===--------------------------------------------------------------------===//
+
+// TODO(cir): Refactor this to follow some level of codegen parity.
+
+#include "../LoweringPrepareItaniumCXXABI.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Value.h"
+#include "mlir/IR/ValueRange.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/MissingFeatures.h"
+
+using namespace cir;
+
+cir::LoweringPrepareCXXABI *cir::LoweringPrepareCXXABI::createItaniumABI() {
+  return new LoweringPrepareItaniumCXXABI();
+}
+
+static void buildBadCastCall(CIRBaseBuilderTy &builder, mlir::Location loc,
+                             mlir::FlatSymbolRefAttr badCastFuncRef) {
+  // TODO(cir): set the calling convention to __cxa_bad_cast.
+  assert(!MissingFeatures::setCallingConv());
+
+  builder.createCallOp(loc, badCastFuncRef, mlir::ValueRange{});
+  builder.create<mlir::cir::UnreachableOp>(loc);
+  builder.clearInsertionPoint();
+}
+
+static mlir::Value buildDynamicCastAfterNullCheck(CIRBaseBuilderTy &builder,
+                                                  mlir::cir::DynamicCastOp op) {
+  auto loc = op->getLoc();
+  auto srcValue = op.getSrc();
+  auto castInfo = op.getInfo().value();
+
+  // TODO(cir): consider address space
+  assert(!MissingFeatures::addressSpace());
+
+  auto srcPtr = builder.createBitcast(srcValue, builder.getVoidPtrTy());
+  auto srcRtti = builder.getConstant(loc, castInfo.getSrcRtti());
+  auto destRtti = builder.getConstant(loc, castInfo.getDestRtti());
+  auto offsetHint = builder.getConstant(loc, castInfo.getOffsetHint());
+
+  auto dynCastFuncRef = castInfo.getRuntimeFunc();
+  mlir::Value dynCastFuncArgs[4] = {srcPtr, srcRtti, destRtti, offsetHint};
+
+  // TODO(cir): set the calling convention for __dynamic_cast.
+  assert(!MissingFeatures::setCallingConv());
+  mlir::Value castedPtr =
+      builder
+          .createCallOp(loc, dynCastFuncRef, builder.getVoidPtrTy(),
+                        dynCastFuncArgs)
+          .getResult();
+
+  assert(mlir::isa<mlir::cir::PointerType>(castedPtr.getType()) &&
+         "the return value of __dynamic_cast should be a ptr");
+
+  /// C++ [expr.dynamic.cast]p9:
+  ///   A failed cast to reference type throws std::bad_cast
+  if (op.isRefcast()) {
+    // Emit a cir.if that checks the casted value.
+    mlir::Value castedValueIsNull = builder.createPtrIsNull(castedPtr);
+    builder.create<mlir::cir::IfOp>(
+        loc, castedValueIsNull, false, [&](mlir::OpBuilder &, mlir::Location) {
+          buildBadCastCall(builder, loc, castInfo.getBadCastFunc());
+        });
+  }
+
+  // Note that castedPtr is a void*. Cast it to a pointer to the destination
+  // type before return.
+  return builder.createBitcast(castedPtr, op.getType());
+}
+
+static mlir::Value
+buildDynamicCastToVoidAfterNullCheck(CIRBaseBuilderTy &builder,
+                                     clang::ASTContext &astCtx,
+                                     mlir::cir::DynamicCastOp op) {
+  auto loc = op.getLoc();
+  bool vtableUsesRelativeLayout = op.getRelativeLayout();
+
+  // TODO(cir): consider address space in this function.
+  assert(!MissingFeatures::addressSpace());
+
+  mlir::Type vtableElemTy;
+  uint64_t vtableElemAlign;
+  if (vtableUsesRelativeLayout) {
+    vtableElemTy = builder.getSIntNTy(32);
+    vtableElemAlign = 4;
+  } else {
+    const auto &targetInfo = astCtx.getTargetInfo();
+    auto ptrdiffTy = targetInfo.getPtrDiffType(clang::LangAS::Default);
+    auto ptrdiffTyIsSigned = clang::TargetInfo::isTypeSigned(ptrdiffTy);
+    auto ptrdiffTyWidth = targetInfo.getTypeWidth(ptrdiffTy);
+
+    vtableElemTy = mlir::cir::IntType::get(builder.getContext(), ptrdiffTyWidth,
+                                           ptrdiffTyIsSigned);
+    vtableElemAlign =
+        llvm::divideCeil(targetInfo.getPointerAlign(clang::LangAS::Default), 8);
+  }
+
+  // Access vtable to get the offset from the given object to its containing
+  // complete object.
+  auto vtablePtrTy = builder.getPointerTo(vtableElemTy);
+  auto vtablePtrPtr =
+      builder.createBitcast(op.getSrc(), builder.getPointerTo(vtablePtrTy));
+  auto vtablePtr = builder.createLoad(loc, vtablePtrPtr);
+  auto offsetToTopSlotPtr = builder.create<mlir::cir::VTableAddrPointOp>(
+      loc, vtablePtrTy, mlir::FlatSymbolRefAttr{}, vtablePtr,
+      /*vtable_index=*/0, -2ULL);
+  auto offsetToTop =
+      builder.createAlignedLoad(loc, offsetToTopSlotPtr, vtableElemAlign);
+
+  // Add the offset to the given pointer to get the cast result.
+  // Cast the input pointer to a uint8_t* to allow pointer arithmetic.
+  auto u8PtrTy = builder.getPointerTo(builder.getUIntNTy(8));
+  auto srcBytePtr = builder.createBitcast(op.getSrc(), u8PtrTy);
+  auto dstBytePtr = builder.create<mlir::cir::PtrStrideOp>(
+      loc, u8PtrTy, srcBytePtr, offsetToTop);
+  // Cast the result to a void*.
+  return builder.createBitcast(dstBytePtr, builder.getVoidPtrTy());
+}
+
+mlir::Value
+LoweringPrepareItaniumCXXABI::lowerDynamicCast(CIRBaseBuilderTy &builder,
+                                               clang::ASTContext &astCtx,
+                                               mlir::cir::DynamicCastOp op) {
+  auto loc = op->getLoc();
+  auto srcValue = op.getSrc();
+
+  assert(!MissingFeatures::buildTypeCheck());
+
+  if (op.isRefcast())
+    return buildDynamicCastAfterNullCheck(builder, op);
+
+  auto srcValueIsNotNull = builder.createPtrToBoolCast(srcValue);
+  return builder
+      .create<mlir::cir::TernaryOp>(
+          loc, srcValueIsNotNull,
+          [&](mlir::OpBuilder &, mlir::Location) {
+            mlir::Value castedValue =
+                op.isCastToVoid()
+                    ? buildDynamicCastToVoidAfterNullCheck(builder, astCtx, op)
+                    : buildDynamicCastAfterNullCheck(builder, op);
+            builder.createYield(loc, castedValue);
+          },
+          [&](mlir::OpBuilder &, mlir::Location) {
+            builder.createYield(
+                loc, builder.getNullPtr(op.getType(), loc).getResult());
+          })
+      .getResult();
+}
+
+mlir::Value LoweringPrepareItaniumCXXABI::lowerVAArg(
+    CIRBaseBuilderTy &builder, mlir::cir::VAArgOp op,
+    const ::cir::CIRDataLayout &datalayout) {
+  // There is no generic cir lowering for var_arg, here we fail
+  // so to prevent attempt of calling lowerVAArg for ItaniumCXXABI
+  llvm_unreachable("NYI");
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/SPIR.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/SPIR.cpp
new file mode 100644
index 000000000000..f5a7250dffd0
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/SPIR.cpp
@@ -0,0 +1,73 @@
+//===- SPIR.cpp - TargetInfo for SPIR and SPIR-V --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ABIInfoImpl.h"
+#include "LowerFunctionInfo.h"
+#include "LowerTypes.h"
+#include "TargetInfo.h"
+#include "TargetLoweringInfo.h"
+#include "clang/CIR/ABIArgInfo.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using ABIArgInfo = ::cir::ABIArgInfo;
+using MissingFeature = ::cir::MissingFeatures;
+
+namespace mlir {
+namespace cir {
+
+//===----------------------------------------------------------------------===//
+// SPIR-V ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class SPIRVABIInfo : public ABIInfo {
+public:
+  SPIRVABIInfo(LowerTypes &LT) : ABIInfo(LT) {}
+
+private:
+  void computeInfo(LowerFunctionInfo &FI) const override {
+    llvm_unreachable("ABI NYI");
+  }
+};
+
+class SPIRVTargetLoweringInfo : public TargetLoweringInfo {
+public:
+  SPIRVTargetLoweringInfo(LowerTypes &LT)
+      : TargetLoweringInfo(std::make_unique<SPIRVABIInfo>(LT)) {}
+
+  unsigned getTargetAddrSpaceFromCIRAddrSpace(
+      mlir::cir::AddressSpaceAttr addressSpaceAttr) const override {
+    using Kind = mlir::cir::AddressSpaceAttr::Kind;
+    switch (addressSpaceAttr.getValue()) {
+    case Kind::offload_private:
+      return 0;
+    case Kind::offload_local:
+      return 3;
+    case Kind::offload_global:
+      return 1;
+    case Kind::offload_constant:
+      return 2;
+    case Kind::offload_generic:
+      return 4;
+    default:
+      llvm_unreachable("Unknown CIR address space for this target");
+    }
+  }
+};
+
+} // namespace
+
+std::unique_ptr<TargetLoweringInfo>
+createSPIRVTargetLoweringInfo(LowerModule &lowerModule) {
+  return std::make_unique<SPIRVTargetLoweringInfo>(lowerModule.getTypes());
+}
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp
new file mode 100644
index 000000000000..38501f7c3124
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp
@@ -0,0 +1,729 @@
+
+#include "clang/CIR/Target/x86.h"
+#include "ABIInfo.h"
+#include "ABIInfoImpl.h"
+#include "LowerModule.h"
+#include "LowerTypes.h"
+#include "TargetInfo.h"
+#include "clang/CIR/ABIArgInfo.h"
+#include "clang/CIR/Dialect/IR/CIRDataLayout.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <memory>
+
+using X86AVXABILevel = ::cir::X86AVXABILevel;
+using ABIArgInfo = ::cir::ABIArgInfo;
+
+namespace mlir {
+namespace cir {
+
+namespace {
+
+/// \p returns the size in bits of the largest (native) vector for \p AVXLevel.
+unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
+  switch (AVXLevel) {
+  case X86AVXABILevel::AVX512:
+    return 512;
+  case X86AVXABILevel::AVX:
+    return 256;
+  case X86AVXABILevel::None:
+    return 128;
+  }
+  llvm_unreachable("Unknown AVXLevel");
+}
+
+/// Return true if the specified [start,end) bit range is known to either be
+/// off the end of the specified type or being in alignment padding.  The user
+/// type specified is known to be at most 128 bits in size, and have passed
+/// through X86_64ABIInfo::classify with a successful classification that put
+/// one of the two halves in the INTEGER class.
+///
+/// It is conservatively correct to return false.
+static bool BitsContainNoUserData(Type Ty, unsigned StartBit, unsigned EndBit,
+                                  CIRLowerContext &Context) {
+  // If the bytes being queried are off the end of the type, there is no user
+  // data hiding here.  This handles analysis of builtins, vectors and other
+  // types that don't contain interesting padding.
+  unsigned TySize = (unsigned)Context.getTypeSize(Ty);
+  if (TySize <= StartBit)
+    return true;
+
+  if (auto arrTy = llvm::dyn_cast<ArrayType>(Ty)) {
+    llvm_unreachable("NYI");
+  }
+
+  if (auto structTy = llvm::dyn_cast<StructType>(Ty)) {
+    const CIRRecordLayout &Layout = Context.getCIRRecordLayout(Ty);
+
+    // If this is a C++ record, check the bases first.
+    if (::cir::MissingFeatures::isCXXRecordDecl() ||
+        ::cir::MissingFeatures::getCXXRecordBases()) {
+      llvm_unreachable("NYI");
+    }
+
+    // Verify that no field has data that overlaps the region of interest. Yes
+    // this could be sped up a lot by being smarter about queried fields,
+    // however we're only looking at structs up to 16 bytes, so we don't care
+    // much.
+    unsigned idx = 0;
+    for (auto type : structTy.getMembers()) {
+      unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx);
+
+      // If we found a field after the region we care about, then we're done.
+      if (FieldOffset >= EndBit)
+        break;
+
+      unsigned FieldStart = FieldOffset < StartBit ? StartBit - FieldOffset : 0;
+      if (!BitsContainNoUserData(type, FieldStart, EndBit - FieldOffset,
+                                 Context))
+        return false;
+
+      ++idx;
+    }
+
+    // If nothing in this record overlapped the area of interest, we're good.
+    return true;
+  }
+
+  return false;
+}
+
+/// Return a floating point type at the specified offset.
+Type getFPTypeAtOffset(Type IRType, unsigned IROffset,
+                       const ::cir::CIRDataLayout &TD) {
+  if (IROffset == 0 && isa<SingleType, DoubleType>(IRType))
+    return IRType;
+
+  llvm_unreachable("NYI");
+}
+
+} // namespace
+
+class X86_64ABIInfo : public ABIInfo {
+  using Class = ::cir::X86ArgClass;
+
+  /// Implement the X86_64 ABI merging algorithm.
+  ///
+  /// Merge an accumulating classification \arg Accum with a field
+  /// classification \arg Field.
+  ///
+  /// \param Accum - The accumulating classification. This should
+  /// always be either NoClass or the result of a previous merge
+  /// call. In addition, this should never be Memory (the caller
+  /// should just return Memory for the aggregate).
+  static Class merge(Class Accum, Class Field);
+
+  /// Implement the X86_64 ABI post merging algorithm.
+  ///
+  /// Post merger cleanup, reduces a malformed Hi and Lo pair to
+  /// final MEMORY or SSE classes when necessary.
+  ///
+  /// \param AggregateSize - The size of the current aggregate in
+  /// the classification process.
+  ///
+  /// \param Lo - The classification for the parts of the type
+  /// residing in the low word of the containing object.
+  ///
+  /// \param Hi - The classification for the parts of the type
+  /// residing in the higher words of the containing object.
+  ///
+  void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const;
+
+  /// Determine the x86_64 register classes in which the given type T should be
+  /// passed.
+  ///
+  /// \param Lo - The classification for the parts of the type
+  /// residing in the low word of the containing object.
+  ///
+  /// \param Hi - The classification for the parts of the type
+  /// residing in the high word of the containing object.
+  ///
+  /// \param OffsetBase - The bit offset of this type in the
+  /// containing object.  Some parameters are classified different
+  /// depending on whether they straddle an eightbyte boundary.
+  ///
+  /// \param isNamedArg - Whether the argument in question is a "named"
+  /// argument, as used in AMD64-ABI 3.5.7.
+  ///
+  /// \param IsRegCall - Whether the calling conversion is regcall.
+  ///
+  /// If a word is unused its result will be NoClass; if a type should
+  /// be passed in Memory then at least the classification of \arg Lo
+  /// will be Memory.
+  ///
+  /// The \arg Lo class will be NoClass iff the argument is ignored.
+  ///
+  /// If the \arg Lo class is ComplexX87, then the \arg Hi class will
+  /// also be ComplexX87.
+  void classify(Type T, uint64_t OffsetBase, Class &Lo, Class &Hi,
+                bool isNamedArg, bool IsRegCall = false) const;
+
+  Type GetSSETypeAtOffset(Type IRType, unsigned IROffset, Type SourceTy,
+                          unsigned SourceOffset) const;
+
+  Type GetINTEGERTypeAtOffset(Type DestTy, unsigned IROffset, Type SourceTy,
+                              unsigned SourceOffset) const;
+
+  /// The 0.98 ABI revision clarified a lot of ambiguities,
+  /// unfortunately in ways that were not always consistent with
+  /// certain previous compilers.  In particular, platforms which
+  /// required strict binary compatibility with older versions of GCC
+  /// may need to exempt themselves.
+  bool honorsRevision0_98() const {
+    return !getTarget().getTriple().isOSDarwin();
+  }
+
+  X86AVXABILevel AVXLevel;
+
+public:
+  X86_64ABIInfo(LowerTypes &CGT, X86AVXABILevel AVXLevel)
+      : ABIInfo(CGT), AVXLevel(AVXLevel) {}
+
+  ::cir::ABIArgInfo classifyReturnType(Type RetTy) const;
+
+  ABIArgInfo classifyArgumentType(Type Ty, unsigned freeIntRegs,
+                                  unsigned &neededInt, unsigned &neededSSE,
+                                  bool isNamedArg, bool IsRegCall) const;
+
+  void computeInfo(LowerFunctionInfo &FI) const override;
+};
+
+class X86_64TargetLoweringInfo : public TargetLoweringInfo {
+public:
+  X86_64TargetLoweringInfo(LowerTypes &LM, X86AVXABILevel AVXLevel)
+      : TargetLoweringInfo(std::make_unique<X86_64ABIInfo>(LM, AVXLevel)) {
+    assert(!::cir::MissingFeatures::swift());
+  }
+
+  unsigned getTargetAddrSpaceFromCIRAddrSpace(
+      mlir::cir::AddressSpaceAttr addressSpaceAttr) const override {
+    using Kind = mlir::cir::AddressSpaceAttr::Kind;
+    switch (addressSpaceAttr.getValue()) {
+    case Kind::offload_private:
+    case Kind::offload_local:
+    case Kind::offload_global:
+    case Kind::offload_constant:
+    case Kind::offload_generic:
+      return 0;
+    default:
+      llvm_unreachable("Unknown CIR address space for this target");
+    }
+  }
+};
+
+void X86_64ABIInfo::classify(Type Ty, uint64_t OffsetBase, Class &Lo, Class &Hi,
+                             bool isNamedArg, bool IsRegCall) const {
+  // FIXME: This code can be simplified by introducing a simple value class
+  // for Class pairs with appropriate constructor methods for the various
+  // situations.
+
+  // FIXME: Some of the split computations are wrong; unaligned vectors
+  // shouldn't be passed in registers for example, so there is no chance they
+  // can straddle an eightbyte. Verify & simplify.
+
+  Lo = Hi = Class::NoClass;
+
+  Class &Current = OffsetBase < 64 ? Lo : Hi;
+  Current = Class::Memory;
+
+  // FIXME(cir): There's currently no direct way to identify if a type is a
+  // builtin.
+  if (/*isBuitinType=*/true) {
+    if (isa<VoidType>(Ty)) {
+      Current = Class::NoClass;
+    } else if (isa<IntType>(Ty)) {
+
+      // FIXME(cir): Clang's BuiltinType::Kind allow comparisons (GT, LT, etc).
+      // We should implement this in CIR to simplify the conditions below.
+      // Hence, Comparisons below might not be truly equivalent to the ones in
+      // Clang.
+      if (isa<IntType>(Ty)) {
+        Current = Class::Integer;
+      }
+      return;
+
+    } else if (isa<SingleType>(Ty) || isa<DoubleType>(Ty)) {
+      Current = Class::SSE;
+      return;
+
+    } else if (isa<BoolType>(Ty)) {
+      Current = Class::Integer;
+    } else if (const auto RT = dyn_cast<StructType>(Ty)) {
+      uint64_t Size = getContext().getTypeSize(Ty);
+
+      // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
+      // than eight eightbytes, ..., it has class MEMORY.
+      if (Size > 512)
+        llvm_unreachable("NYI");
+
+      // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial
+      // copy constructor or a non-trivial destructor, it is passed by invisible
+      // reference.
+      if (getRecordArgABI(RT, getCXXABI()))
+        llvm_unreachable("NYI");
+
+      // Assume variable sized types are passed in memory.
+      if (::cir::MissingFeatures::recordDeclHasFlexibleArrayMember())
+        llvm_unreachable("NYI");
+
+      const auto &Layout = getContext().getCIRRecordLayout(Ty);
+
+      // Reset Lo class, this will be recomputed.
+      Current = Class::NoClass;
+
+      // If this is a C++ record, classify the bases first.
+      assert(!::cir::MissingFeatures::isCXXRecordDecl() &&
+             !::cir::MissingFeatures::getCXXRecordBases());
+
+      // Classify the fields one at a time, merging the results.
+      bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <=
+                                  clang::LangOptions::ClangABI::Ver11 ||
+                              getContext().getTargetInfo().getTriple().isPS();
+      bool IsUnion = RT.isUnion() && !UseClang11Compat;
+
+      // FIXME(cir): An interface to handle field declaration might be needed.
+      assert(!::cir::MissingFeatures::fieldDeclAbstraction());
+      for (auto [idx, FT] : llvm::enumerate(RT.getMembers())) {
+        uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
+        assert(!::cir::MissingFeatures::fieldDeclIsBitfield());
+        bool BitField = false;
+
+        // Ignore padding bit-fields.
+        if (BitField && !::cir::MissingFeatures::fieldDeclisUnnamedBitField())
+          llvm_unreachable("NYI");
+
+        // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than
+        // eight eightbytes, or it contains unaligned fields, it has class
+        // MEMORY.
+        //
+        // The only case a 256-bit or a 512-bit wide vector could be used is
+        // when the struct contains a single 256-bit or 512-bit element. Early
+        // check and fallback to memory.
+        //
+        // FIXME: Extended the Lo and Hi logic properly to work for size wider
+        // than 128.
+        if (Size > 128 && ((!IsUnion && Size != getContext().getTypeSize(FT)) ||
+                           Size > getNativeVectorSizeForAVXABI(AVXLevel))) {
+          llvm_unreachable("NYI");
+        }
+        // Note, skip this test for bit-fields, see below.
+        if (!BitField && Offset % getContext().getTypeAlign(RT)) {
+          llvm_unreachable("NYI");
+        }
+
+        // Classify this field.
+        //
+        // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate
+        // exceeds a single eightbyte, each is classified
+        // separately. Each eightbyte gets initialized to class
+        // NO_CLASS.
+        Class FieldLo, FieldHi;
+
+        // Bit-fields require special handling, they do not force the
+        // structure to be passed in memory even if unaligned, and
+        // therefore they can straddle an eightbyte.
+        if (BitField) {
+          llvm_unreachable("NYI");
+        } else {
+          classify(FT, Offset, FieldLo, FieldHi, isNamedArg);
+        }
+        Lo = merge(Lo, FieldLo);
+        Hi = merge(Hi, FieldHi);
+        if (Lo == Class::Memory || Hi == Class::Memory)
+          break;
+      }
+
+      postMerge(Size, Lo, Hi);
+    } else {
+      llvm::outs() << "Missing X86 classification for type " << Ty << "\n";
+      llvm_unreachable("NYI");
+    }
+    // FIXME: _Decimal32 and _Decimal64 are SSE.
+    // FIXME: _float128 and _Decimal128 are (SSE, SSEUp).
+    return;
+  }
+
+  llvm::outs() << "Missing X86 classification for non-builtin types\n";
+  llvm_unreachable("NYI");
+}
+
+/// Return a type that will be passed by the backend in the low 8 bytes of an
+/// XMM register, corresponding to the SSE class.
+Type X86_64ABIInfo::GetSSETypeAtOffset(Type IRType, unsigned IROffset,
+                                       Type SourceTy,
+                                       unsigned SourceOffset) const {
+  const ::cir::CIRDataLayout &TD = getDataLayout();
+  unsigned SourceSize =
+      (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
+  Type T0 = getFPTypeAtOffset(IRType, IROffset, TD);
+  if (!T0 || isa<Float64Type>(T0))
+    return T0; // NOTE(cir): Not sure if this is correct.
+
+  Type T1 = {};
+  unsigned T0Size = TD.getTypeAllocSize(T0);
+  if (SourceSize > T0Size)
+    llvm_unreachable("NYI");
+  if (T1 == nullptr) {
+    // Check if IRType is a half/bfloat + float. float type will be in
+    // IROffset+4 due to its alignment.
+    if (isa<Float16Type>(T0) && SourceSize > 4)
+      llvm_unreachable("NYI");
+    // If we can't get a second FP type, return a simple half or float.
+    // avx512fp16-abi.c:pr51813_2 shows it works to return float for
+    // {float, i8} too.
+    if (T1 == nullptr)
+      return T0;
+  }
+
+  llvm_unreachable("NYI");
+}
+
+/// The ABI specifies that a value should be passed in an 8-byte GPR.  This
+/// means that we either have a scalar or we are talking about the high or low
+/// part of an up-to-16-byte struct.  This routine picks the best CIR type
+/// to represent this, which may be i64 or may be anything else that the
+/// backend will pass in a GPR that works better (e.g. i8, %foo*, etc).
+///
+/// PrefType is an CIR type that corresponds to (part of) the IR type for
+/// the source type.  IROffset is an offset in bytes into the CIR type that
+/// the 8-byte value references.  PrefType may be null.
+///
+/// SourceTy is the source-level type for the entire argument.  SourceOffset
+/// is an offset into this that we're processing (which is always either 0 or
+/// 8).
+///
+Type X86_64ABIInfo::GetINTEGERTypeAtOffset(Type DestTy, unsigned IROffset,
+                                           Type SourceTy,
+                                           unsigned SourceOffset) const {
+  // If we're dealing with an un-offset CIR type, then it means that we're
+  // returning an 8-byte unit starting with it. See if we can safely use it.
+  if (IROffset == 0) {
+    // Pointers and int64's always fill the 8-byte unit.
+    assert(!isa<PointerType>(DestTy) && "Ptrs are NYI");
+
+    // If we have a 1/2/4-byte integer, we can use it only if the rest of the
+    // goodness in the source type is just tail padding.  This is allowed to
+    // kick in for struct {double,int} on the int, but not on
+    // struct{double,int,int} because we wouldn't return the second int.  We
+    // have to do this analysis on the source type because we can't depend on
+    // unions being lowered a specific way etc.
+    if (auto intTy = dyn_cast<IntType>(DestTy)) {
+      if (intTy.getWidth() == 8 || intTy.getWidth() == 16 ||
+          intTy.getWidth() == 32) {
+        unsigned BitWidth = intTy.getWidth();
+        if (BitsContainNoUserData(SourceTy, SourceOffset * 8 + BitWidth,
+                                  SourceOffset * 8 + 64, getContext()))
+          return DestTy;
+      }
+    }
+  }
+
+  if (auto RT = dyn_cast<StructType>(DestTy)) {
+    // If this is a struct, recurse into the field at the specified offset.
+    const ::cir::StructLayout *SL = getDataLayout().getStructLayout(RT);
+    if (IROffset < SL->getSizeInBytes()) {
+      unsigned FieldIdx = SL->getElementContainingOffset(IROffset);
+      IROffset -= SL->getElementOffset(FieldIdx);
+
+      return GetINTEGERTypeAtOffset(RT.getMembers()[FieldIdx], IROffset,
+                                    SourceTy, SourceOffset);
+    }
+  }
+
+  // Okay, we don't have any better idea of what to pass, so we pass this in
+  // an integer register that isn't too big to fit the rest of the struct.
+  unsigned TySizeInBytes =
+      (unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity();
+
+  assert(TySizeInBytes != SourceOffset && "Empty field?");
+
+  // It is always safe to classify this as an integer type up to i64 that
+  // isn't larger than the structure.
+  // FIXME(cir): Perhaps we should have the concept of singless integers in
+  // CIR, mostly because coerced types should carry sign. On the other hand,
+  // this might not make a difference in practice. For now, we just preserve the
+  // sign as is to avoid unecessary bitcasts.
+  bool isSigned = false;
+  if (auto intTy = dyn_cast<IntType>(SourceTy))
+    isSigned = intTy.isSigned();
+  return IntType::get(LT.getMLIRContext(),
+                      std::min(TySizeInBytes - SourceOffset, 8U) * 8, isSigned);
+}
+
+::cir::ABIArgInfo X86_64ABIInfo::classifyReturnType(Type RetTy) const {
+  // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the
+  // classification algorithm.
+  X86_64ABIInfo::Class Lo, Hi;
+  classify(RetTy, 0, Lo, Hi, true);
+
+  // Check some invariants.
+  assert((Hi != Class::Memory || Lo == Class::Memory) &&
+         "Invalid memory classification.");
+  assert((Hi != Class::SSEUp || Lo == Class::SSE) &&
+         "Invalid SSEUp classification.");
+
+  Type resType = {};
+  switch (Lo) {
+  case Class::NoClass:
+    if (Hi == Class::NoClass)
+      return ABIArgInfo::getIgnore();
+    break;
+
+  case Class::Integer:
+    resType = GetINTEGERTypeAtOffset(RetTy, 0, RetTy, 0);
+
+    // If we have a sign or zero extended integer, make sure to return Extend
+    // so that the parameter gets the right LLVM IR attributes.
+    if (Hi == Class::NoClass && isa<IntType>(resType)) {
+      // NOTE(cir): We skip enum types handling here since CIR represents
+      // enums directly as their unerlying integer types. NOTE(cir): For some
+      // reason, Clang does not set the coerce type here and delays it to
+      // arrangeLLVMFunctionInfo. We do the same to keep parity.
+      if (isa<IntType, BoolType>(RetTy) && isPromotableIntegerTypeForABI(RetTy))
+        return ABIArgInfo::getExtend(RetTy);
+    }
+    break;
+
+    // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next
+    // available SSE register of the sequence %xmm0, %xmm1 is used.
+  case Class::SSE:
+    resType = GetSSETypeAtOffset(RetTy, 0, RetTy, 0);
+    break;
+
+  default:
+    llvm_unreachable("NYI");
+  }
+
+  Type HighPart = {};
+  switch (Hi) {
+
+  case Class::NoClass:
+    break;
+
+  default:
+    llvm_unreachable("NYI");
+  }
+
+  // If a high part was specified, merge it together with the low part.  It is
+  // known to pass in the high eightbyte of the result.  We do this by forming
+  // a first class struct aggregate with the high and low part: {low, high}
+  if (HighPart)
+    llvm_unreachable("NYI");
+
+  return ABIArgInfo::getDirect(resType);
+}
+
+ABIArgInfo X86_64ABIInfo::classifyArgumentType(Type Ty, unsigned freeIntRegs,
+                                               unsigned &neededInt,
+                                               unsigned &neededSSE,
+                                               bool isNamedArg,
+                                               bool IsRegCall = false) const {
+  Ty = useFirstFieldIfTransparentUnion(Ty);
+
+  X86_64ABIInfo::Class Lo, Hi;
+  classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall);
+
+  // Check some invariants.
+  // FIXME: Enforce these by construction.
+  assert((Hi != Class::Memory || Lo == Class::Memory) &&
+         "Invalid memory classification.");
+  assert((Hi != Class::SSEUp || Lo == Class::SSE) &&
+         "Invalid SSEUp classification.");
+
+  neededInt = 0;
+  neededSSE = 0;
+  Type ResType = {};
+  switch (Lo) {
+    // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next
+    // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8
+    // and %r9 is used.
+  case Class::Integer:
+    ++neededInt;
+
+    // Pick an 8-byte type based on the preferred type.
+    ResType = GetINTEGERTypeAtOffset(Ty, 0, Ty, 0);
+
+    // If we have a sign or zero extended integer, make sure to return Extend
+    // so that the parameter gets the right LLVM IR attributes.
+    if (Hi == Class::NoClass && isa<IntType>(ResType)) {
+      // NOTE(cir): We skip enum types handling here since CIR represents
+      // enums directly as their unerlying integer types. NOTE(cir): For some
+      // reason, Clang does not set the coerce type here and delays it to
+      // arrangeLLVMFunctionInfo. We do the same to keep parity.
+      if (isa<IntType, BoolType>(Ty) && isPromotableIntegerTypeForABI(Ty))
+        return ABIArgInfo::getExtend(Ty);
+    }
+
+    break;
+
+    // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next
+    // available SSE register is used, the registers are taken in the
+    // order from %xmm0 to %xmm7.
+  case Class::SSE: {
+    ResType = GetSSETypeAtOffset(Ty, 0, Ty, 0);
+    ++neededSSE;
+    break;
+  }
+  default:
+    llvm_unreachable("NYI");
+  }
+
+  Type HighPart = {};
+  switch (Hi) {
+  case Class::NoClass:
+    break;
+  default:
+    llvm_unreachable("NYI");
+  }
+
+  if (HighPart)
+    llvm_unreachable("NYI");
+
+  return ABIArgInfo::getDirect(ResType);
+}
+
+void X86_64ABIInfo::computeInfo(LowerFunctionInfo &FI) const {
+  const unsigned CallingConv = FI.getCallingConvention();
+  // It is possible to force Win64 calling convention on any x86_64 target by
+  // using __attribute__((ms_abi)). In such case to correctly emit Win64
+  // compatible code delegate this call to WinX86_64ABIInfo::computeInfo.
+  if (CallingConv == llvm::CallingConv::Win64) {
+    llvm_unreachable("Win64 CC is NYI");
+  }
+
+  bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall;
+
+  // Keep track of the number of assigned registers.
+  unsigned FreeIntRegs = IsRegCall ? 11 : 6;
+  unsigned FreeSSERegs = IsRegCall ? 16 : 8;
+  unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0;
+
+  if (!::mlir::cir::classifyReturnType(getCXXABI(), FI, *this)) {
+    if (IsRegCall || ::cir::MissingFeatures::regCall()) {
+      llvm_unreachable("RegCall is NYI");
+    } else
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+  }
+
+  // If the return value is indirect, then the hidden argument is consuming
+  // one integer register.
+  if (FI.getReturnInfo().isIndirect())
+    llvm_unreachable("NYI");
+  else if (NeededSSE && MaxVectorWidth)
+    llvm_unreachable("NYI");
+
+  // The chain argument effectively gives us another free register.
+  if (::cir::MissingFeatures::chainCall())
+    llvm_unreachable("NYI");
+
+  unsigned NumRequiredArgs = FI.getNumRequiredArgs();
+  // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers
+  // get assigned (in left-to-right order) for passing as follows...
+  unsigned ArgNo = 0;
+  for (LowerFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
+       it != ie; ++it, ++ArgNo) {
+    bool IsNamedArg = ArgNo < NumRequiredArgs;
+
+    if (IsRegCall && ::cir::MissingFeatures::regCall())
+      llvm_unreachable("NYI");
+    else
+      it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt,
+                                      NeededSSE, IsNamedArg);
+
+    // AMD64-ABI 3.2.3p3: If there are no registers available for any
+    // eightbyte of an argument, the whole argument is passed on the
+    // stack. If registers have already been assigned for some
+    // eightbytes of such an argument, the assignments get reverted.
+    if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
+      FreeIntRegs -= NeededInt;
+      FreeSSERegs -= NeededSSE;
+      if (::cir::MissingFeatures::vectorType())
+        llvm_unreachable("NYI");
+    } else {
+      llvm_unreachable("Indirect results are NYI");
+    }
+  }
+}
+
+X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) {
+  // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is
+  // classified recursively so that always two fields are
+  // considered. The resulting class is calculated according to
+  // the classes of the fields in the eightbyte:
+  //
+  // (a) If both classes are equal, this is the resulting class.
+  //
+  // (b) If one of the classes is NO_CLASS, the resulting class is
+  // the other class.
+  //
+  // (c) If one of the classes is MEMORY, the result is the MEMORY
+  // class.
+  //
+  // (d) If one of the classes is INTEGER, the result is the
+  // INTEGER.
+  //
+  // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class,
+  // MEMORY is used as class.
+  //
+  // (f) Otherwise class SSE is used.
+
+  // Accum should never be memory (we should have returned) or
+  // ComplexX87 (because this cannot be passed in a structure).
+  assert((Accum != Class::Memory && Accum != Class::ComplexX87) &&
+         "Invalid accumulated classification during merge.");
+  if (Accum == Field || Field == Class::NoClass)
+    return Accum;
+  if (Field == Class::Memory)
+    return Class::Memory;
+  if (Accum == Class::NoClass)
+    return Field;
+  if (Accum == Class::Integer || Field == Class::Integer)
+    return Class::Integer;
+  if (Field == Class::X87 || Field == Class::X87Up ||
+      Field == Class::ComplexX87 || Accum == Class::X87 ||
+      Accum == Class::X87Up)
+    return Class::Memory;
+  return Class::SSE;
+}
+
+void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo,
+                              Class &Hi) const {
+  // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done:
+  //
+  // (a) If one of the classes is Memory, the whole argument is passed in
+  //     memory.
+  //
+  // (b) If X87UP is not preceded by X87, the whole argument is passed in
+  //     memory.
+  //
+  // (c) If the size of the aggregate exceeds two eightbytes and the first
+  //     eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole
+  //     argument is passed in memory. NOTE: This is necessary to keep the
+  //     ABI working for processors that don't support the __m256 type.
+  //
+  // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE.
+  //
+  // Some of these are enforced by the merging logic.  Others can arise
+  // only with unions; for example:
+  //   union { _Complex double; unsigned; }
+  //
+  // Note that clauses (b) and (c) were added in 0.98.
+  //
+  if (Hi == Class::Memory)
+    Lo = Class::Memory;
+  if (Hi == Class::X87Up && Lo != Class::X87 && honorsRevision0_98())
+    Lo = Class::Memory;
+  if (AggregateSize > 128 && (Lo != Class::SSE || Hi != Class::SSEUp))
+    Lo = Class::Memory;
+  if (Hi == Class::SSEUp && Lo != Class::SSE)
+    Hi = Class::SSE;
+}
+
+std::unique_ptr<TargetLoweringInfo>
+createX86_64TargetLoweringInfo(LowerModule &LM, X86AVXABILevel AVXLevel) {
+  return std::make_unique<X86_64TargetLoweringInfo>(LM.getTypes(), AVXLevel);
+}
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
new file mode 100644
index 000000000000..8e3dbb0d35f7
--- /dev/null
+++ b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
@@ -0,0 +1,524 @@
+//===--- CIRGenAction.cpp - LLVM Code generation Frontend Action ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CIRFrontendAction/CIRGenAction.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/OperationSupport.h"
+#include "mlir/Parser/Parser.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclGroup.h"
+#include "clang/Basic/DiagnosticFrontend.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LangStandard.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/CIR/CIRGenerator.h"
+#include "clang/CIR/CIRToCIRPasses.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/LowerToLLVM.h"
+#include "clang/CIR/Passes.h"
+#include "clang/CodeGen/BackendUtil.h"
+#include "clang/CodeGen/ModuleBuilder.h"
+#include "clang/Driver/DriverDiagnostic.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
+#include "clang/Frontend/MultiplexConsumer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LLVMRemarkStreamer.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/LTO/LTOBackend.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Transforms/IPO/Internalize.h"
+
+#include <memory>
+
+using namespace cir;
+using namespace clang;
+
+static std::string sanitizePassOptions(llvm::StringRef o) {
+  if (o.empty())
+    return "";
+  std::string opts{o};
+  // MLIR pass options are space separated, but we use ';' in clang since
+  // space aren't well supported, switch it back.
+  for (unsigned i = 0, e = opts.size(); i < e; ++i)
+    if (opts[i] == ';')
+      opts[i] = ' ';
+  // If arguments are surrounded with '"', trim them off
+  return llvm::StringRef(opts).trim('"').str();
+}
+
+namespace cir {
+
+static BackendAction
+getBackendActionFromOutputType(CIRGenAction::OutputType action) {
+  switch (action) {
+  case CIRGenAction::OutputType::EmitAssembly:
+    return BackendAction::Backend_EmitAssembly;
+  case CIRGenAction::OutputType::EmitBC:
+    return BackendAction::Backend_EmitBC;
+  case CIRGenAction::OutputType::EmitLLVM:
+    return BackendAction::Backend_EmitLL;
+  case CIRGenAction::OutputType::EmitObj:
+    return BackendAction::Backend_EmitObj;
+  default:
+    llvm_unreachable("Unsupported action");
+  }
+}
+
+static std::unique_ptr<llvm::Module>
+lowerFromCIRToLLVMIR(const clang::FrontendOptions &feOptions,
+                     mlir::ModuleOp mlirMod,
+                     std::unique_ptr<mlir::MLIRContext> mlirCtx,
+                     llvm::LLVMContext &llvmCtx, bool disableVerifier = false) {
+  if (feOptions.ClangIRDirectLowering)
+    return direct::lowerDirectlyFromCIRToLLVMIR(mlirMod, llvmCtx,
+                                                disableVerifier);
+  else
+    return lowerFromCIRToMLIRToLLVMIR(mlirMod, std::move(mlirCtx), llvmCtx);
+}
+
+class CIRGenConsumer : public clang::ASTConsumer {
+
+  virtual void anchor();
+
+  CIRGenAction::OutputType action;
+
+  DiagnosticsEngine &diagnosticsEngine;
+  const HeaderSearchOptions &headerSearchOptions;
+  const CodeGenOptions &codeGenOptions;
+  const TargetOptions &targetOptions;
+  const LangOptions &langOptions;
+  const FrontendOptions &feOptions;
+
+  std::unique_ptr<raw_pwrite_stream> outputStream;
+
+  ASTContext *astContext{nullptr};
+  IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;
+  std::unique_ptr<CIRGenerator> gen;
+
+public:
+  CIRGenConsumer(CIRGenAction::OutputType action,
+                 DiagnosticsEngine &diagnosticsEngine,
+                 IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+                 const HeaderSearchOptions &headerSearchOptions,
+                 const CodeGenOptions &codeGenOptions,
+                 const TargetOptions &targetOptions,
+                 const LangOptions &langOptions,
+                 const FrontendOptions &feOptions,
+                 std::unique_ptr<raw_pwrite_stream> os)
+      : action(action), diagnosticsEngine(diagnosticsEngine),
+        headerSearchOptions(headerSearchOptions),
+        codeGenOptions(codeGenOptions), targetOptions(targetOptions),
+        langOptions(langOptions), feOptions(feOptions),
+        outputStream(std::move(os)), FS(VFS),
+        gen(std::make_unique<CIRGenerator>(diagnosticsEngine, std::move(VFS),
+                                           codeGenOptions)) {}
+
+  void Initialize(ASTContext &ctx) override {
+    assert(!astContext && "initialized multiple times");
+
+    astContext = &ctx;
+
+    gen->Initialize(ctx);
+  }
+
+  bool HandleTopLevelDecl(DeclGroupRef D) override {
+    PrettyStackTraceDecl CrashInfo(*D.begin(), SourceLocation(),
+                                   astContext->getSourceManager(),
+                                   "LLVM IR generation of declaration");
+    gen->HandleTopLevelDecl(D);
+    return true;
+  }
+
+  void HandleCXXStaticMemberVarInstantiation(clang::VarDecl *VD) override {
+    gen->HandleCXXStaticMemberVarInstantiation(VD);
+  }
+
+  void HandleInlineFunctionDefinition(FunctionDecl *D) override {
+    gen->HandleInlineFunctionDefinition(D);
+  }
+
+  void HandleInterestingDecl(DeclGroupRef D) override {
+    llvm_unreachable("NYI");
+  }
+
+  void HandleTranslationUnit(ASTContext &C) override {
+    // Note that this method is called after `HandleTopLevelDecl` has already
+    // ran all over the top level decls. Here clang mostly wraps defered and
+    // global codegen, followed by running CIR passes.
+    gen->HandleTranslationUnit(C);
+
+    if (!feOptions.ClangIRDisableCIRVerifier)
+      if (!gen->verifyModule()) {
+        llvm::report_fatal_error(
+            "CIR codegen: module verification error before running CIR passes");
+        return;
+      }
+
+    auto mlirMod = gen->getModule();
+    auto mlirCtx = gen->takeContext();
+
+    auto setupCIRPipelineAndExecute = [&] {
+      // Sanitize passes options. MLIR uses spaces between pass options
+      // and since that's hard to fly in clang, we currently use ';'.
+      std::string lifetimeOpts, idiomRecognizerOpts, libOptOpts;
+      if (feOptions.ClangIRLifetimeCheck)
+        lifetimeOpts = sanitizePassOptions(feOptions.ClangIRLifetimeCheckOpts);
+      if (feOptions.ClangIRIdiomRecognizer)
+        idiomRecognizerOpts =
+            sanitizePassOptions(feOptions.ClangIRIdiomRecognizerOpts);
+      if (feOptions.ClangIRLibOpt)
+        libOptOpts = sanitizePassOptions(feOptions.ClangIRLibOptOpts);
+
+      // Setup and run CIR pipeline.
+      std::string passOptParsingFailure;
+      if (runCIRToCIRPasses(
+              mlirMod, mlirCtx.get(), C, !feOptions.ClangIRDisableCIRVerifier,
+              feOptions.ClangIRLifetimeCheck, lifetimeOpts,
+              feOptions.ClangIRIdiomRecognizer, idiomRecognizerOpts,
+              feOptions.ClangIRLibOpt, libOptOpts, passOptParsingFailure,
+              codeGenOptions.OptimizationLevel > 0,
+              action == CIRGenAction::OutputType::EmitCIRFlat,
+              action == CIRGenAction::OutputType::EmitMLIR,
+              feOptions.ClangIREnableCallConvLowering,
+              feOptions.ClangIREnableMem2Reg)
+              .failed()) {
+        if (!passOptParsingFailure.empty())
+          diagnosticsEngine.Report(diag::err_drv_cir_pass_opt_parsing)
+              << feOptions.ClangIRLifetimeCheckOpts;
+        else
+          llvm::report_fatal_error("CIR codegen: MLIR pass manager fails "
+                                   "when running CIR passes!");
+        return;
+      }
+    };
+
+    if (!feOptions.ClangIRDisablePasses) {
+      // Handle source manager properly given that lifetime analysis
+      // might emit warnings and remarks.
+      auto &clangSourceMgr = C.getSourceManager();
+      FileID MainFileID = clangSourceMgr.getMainFileID();
+
+      std::unique_ptr<llvm::MemoryBuffer> FileBuf =
+          llvm::MemoryBuffer::getMemBuffer(
+              clangSourceMgr.getBufferOrFake(MainFileID));
+
+      llvm::SourceMgr mlirSourceMgr;
+      mlirSourceMgr.AddNewSourceBuffer(std::move(FileBuf), llvm::SMLoc());
+
+      if (feOptions.ClangIRVerifyDiags) {
+        mlir::SourceMgrDiagnosticVerifierHandler sourceMgrHandler(
+            mlirSourceMgr, mlirCtx.get());
+        mlirCtx->printOpOnDiagnostic(false);
+        setupCIRPipelineAndExecute();
+
+        // Verify the diagnostic handler to make sure that each of the
+        // diagnostics matched.
+        if (sourceMgrHandler.verify().failed()) {
+          // FIXME: we fail ungracefully, there's probably a better way
+          // to communicate non-zero return so tests can actually fail.
+          llvm::sys::RunInterruptHandlers();
+          exit(1);
+        }
+      } else {
+        mlir::SourceMgrDiagnosticHandler sourceMgrHandler(mlirSourceMgr,
+                                                          mlirCtx.get());
+        setupCIRPipelineAndExecute();
+      }
+    }
+
+    switch (action) {
+    case CIRGenAction::OutputType::EmitCIR:
+    case CIRGenAction::OutputType::EmitCIRFlat:
+      if (outputStream && mlirMod) {
+        // Emit remaining defaulted C++ methods
+        if (!feOptions.ClangIRDisableEmitCXXDefault)
+          gen->buildDefaultMethods();
+
+        // FIXME: we cannot roundtrip prettyForm=true right now.
+        mlir::OpPrintingFlags flags;
+        flags.enableDebugInfo(/*enable=*/true, /*prettyForm=*/false);
+        mlirMod->print(*outputStream, flags);
+      }
+      break;
+    case CIRGenAction::OutputType::EmitMLIR: {
+      auto loweredMlirModule = lowerFromCIRToMLIR(mlirMod, mlirCtx.get());
+      assert(outputStream && "Why are we here without an output stream?");
+      // FIXME: we cannot roundtrip prettyForm=true right now.
+      mlir::OpPrintingFlags flags;
+      flags.enableDebugInfo(/*enable=*/true, /*prettyForm=*/false);
+      loweredMlirModule->print(*outputStream, flags);
+      break;
+    }
+    case CIRGenAction::OutputType::EmitLLVM:
+    case CIRGenAction::OutputType::EmitBC:
+    case CIRGenAction::OutputType::EmitObj:
+    case CIRGenAction::OutputType::EmitAssembly: {
+      llvm::LLVMContext llvmCtx;
+      auto llvmModule =
+          lowerFromCIRToLLVMIR(feOptions, mlirMod, std::move(mlirCtx), llvmCtx,
+                               feOptions.ClangIRDisableCIRVerifier);
+
+      llvmModule->setTargetTriple(targetOptions.Triple);
+
+      BackendAction backendAction = getBackendActionFromOutputType(action);
+
+      EmitBackendOutput(
+          diagnosticsEngine, headerSearchOptions, codeGenOptions, targetOptions,
+          langOptions, C.getTargetInfo().getDataLayoutString(),
+          llvmModule.get(), backendAction, FS, std::move(outputStream));
+      break;
+    }
+    case CIRGenAction::OutputType::None:
+      break;
+    }
+  }
+
+  void HandleTagDeclDefinition(TagDecl *D) override {
+    PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
+                                   astContext->getSourceManager(),
+                                   "CIR generation of declaration");
+    gen->HandleTagDeclDefinition(D);
+  }
+
+  void HandleTagDeclRequiredDefinition(const TagDecl *D) override {
+    gen->HandleTagDeclRequiredDefinition(D);
+  }
+
+  void CompleteTentativeDefinition(VarDecl *D) override {
+    gen->CompleteTentativeDefinition(D);
+  }
+
+  void CompleteExternalDeclaration(VarDecl *D) override {
+    llvm_unreachable("NYI");
+  }
+
+  void AssignInheritanceModel(CXXRecordDecl *RD) override {
+    llvm_unreachable("NYI");
+  }
+
+  void HandleVTable(CXXRecordDecl *RD) override { gen->HandleVTable(RD); }
+};
+} // namespace cir
+
+void CIRGenConsumer::anchor() {}
+
+CIRGenAction::CIRGenAction(OutputType act, mlir::MLIRContext *_MLIRContext)
+    : mlirContext(_MLIRContext ? _MLIRContext : new mlir::MLIRContext),
+      action(act) {}
+
+CIRGenAction::~CIRGenAction() { mlirModule.reset(); }
+
+void CIRGenAction::EndSourceFileAction() {
+  // If the consumer creation failed, do nothing.
+  if (!getCompilerInstance().hasASTConsumer())
+    return;
+
+  // TODO: pass the module around
+  // module = cgConsumer->takeModule();
+}
+
+static std::unique_ptr<raw_pwrite_stream>
+getOutputStream(CompilerInstance &ci, StringRef inFile,
+                CIRGenAction::OutputType action) {
+  switch (action) {
+  case CIRGenAction::OutputType::EmitAssembly:
+    return ci.createDefaultOutputFile(false, inFile, "s");
+  case CIRGenAction::OutputType::EmitCIR:
+    return ci.createDefaultOutputFile(false, inFile, "cir");
+  case CIRGenAction::OutputType::EmitCIRFlat:
+    return ci.createDefaultOutputFile(false, inFile, "cir");
+  case CIRGenAction::OutputType::EmitMLIR:
+    return ci.createDefaultOutputFile(false, inFile, "mlir");
+  case CIRGenAction::OutputType::EmitLLVM:
+    return ci.createDefaultOutputFile(false, inFile, "ll");
+  case CIRGenAction::OutputType::EmitBC:
+    return ci.createDefaultOutputFile(true, inFile, "bc");
+  case CIRGenAction::OutputType::EmitObj:
+    return ci.createDefaultOutputFile(true, inFile, "o");
+  case CIRGenAction::OutputType::None:
+    return nullptr;
+  }
+
+  llvm_unreachable("Invalid action!");
+}
+
+std::unique_ptr<ASTConsumer>
+CIRGenAction::CreateASTConsumer(CompilerInstance &ci, StringRef inputFile) {
+  auto out = ci.takeOutputStream();
+  if (!out)
+    out = getOutputStream(ci, inputFile, action);
+
+  auto Result = std::make_unique<cir::CIRGenConsumer>(
+      action, ci.getDiagnostics(), &ci.getVirtualFileSystem(),
+      ci.getHeaderSearchOpts(), ci.getCodeGenOpts(), ci.getTargetOpts(),
+      ci.getLangOpts(), ci.getFrontendOpts(), std::move(out));
+  cgConsumer = Result.get();
+
+  // Enable generating macro debug info only when debug info is not disabled and
+  // also macrod ebug info is enabled
+  if (ci.getCodeGenOpts().getDebugInfo() != llvm::codegenoptions::NoDebugInfo &&
+      ci.getCodeGenOpts().MacroDebugInfo) {
+    llvm_unreachable("NYI");
+  }
+
+  return std::move(Result);
+}
+
+mlir::OwningOpRef<mlir::ModuleOp>
+CIRGenAction::loadModule(llvm::MemoryBufferRef mbRef) {
+  auto module =
+      mlir::parseSourceString<mlir::ModuleOp>(mbRef.getBuffer(), mlirContext);
+  assert(module && "Failed to parse ClangIR module");
+  return module;
+}
+
+void CIRGenAction::ExecuteAction() {
+  if (getCurrentFileKind().getLanguage() != Language::CIR) {
+    this->ASTFrontendAction::ExecuteAction();
+    return;
+  }
+
+  // If this is a CIR file we have to treat it specially.
+  // TODO: This could be done more logically. This is just modeled at the moment
+  // mimicing CodeGenAction but this is clearly suboptimal.
+  auto &ci = getCompilerInstance();
+  std::unique_ptr<raw_pwrite_stream> outstream =
+      getOutputStream(ci, getCurrentFile(), action);
+  if (action != OutputType::None && !outstream)
+    return;
+
+  auto &sourceManager = ci.getSourceManager();
+  auto fileID = sourceManager.getMainFileID();
+  auto mainFile = sourceManager.getBufferOrNone(fileID);
+
+  if (!mainFile)
+    return;
+
+  mlirContext->getOrLoadDialect<mlir::cir::CIRDialect>();
+  mlirContext->getOrLoadDialect<mlir::func::FuncDialect>();
+  mlirContext->getOrLoadDialect<mlir::memref::MemRefDialect>();
+
+  // TODO: unwrap this -- this exists because including the `OwningModuleRef` in
+  // CIRGenAction's header would require linking the Frontend against MLIR.
+  // Let's avoid that for now.
+  auto mlirModule = loadModule(*mainFile);
+  if (!mlirModule)
+    return;
+
+  llvm::LLVMContext llvmCtx;
+  auto llvmModule = lowerFromCIRToLLVMIR(
+      ci.getFrontendOpts(), mlirModule.release(),
+      std::unique_ptr<mlir::MLIRContext>(mlirContext), llvmCtx);
+
+  if (outstream)
+    llvmModule->print(*outstream, nullptr);
+}
+
+namespace cir {
+void EmitAssemblyAction::anchor() {}
+EmitAssemblyAction::EmitAssemblyAction(mlir::MLIRContext *_MLIRContext)
+    : CIRGenAction(OutputType::EmitAssembly, _MLIRContext) {}
+
+void EmitCIRAction::anchor() {}
+EmitCIRAction::EmitCIRAction(mlir::MLIRContext *_MLIRContext)
+    : CIRGenAction(OutputType::EmitCIR, _MLIRContext) {}
+
+void EmitCIRFlatAction::anchor() {}
+EmitCIRFlatAction::EmitCIRFlatAction(mlir::MLIRContext *_MLIRContext)
+    : CIRGenAction(OutputType::EmitCIRFlat, _MLIRContext) {}
+
+void EmitCIROnlyAction::anchor() {}
+EmitCIROnlyAction::EmitCIROnlyAction(mlir::MLIRContext *_MLIRContext)
+    : CIRGenAction(OutputType::None, _MLIRContext) {}
+
+void EmitMLIRAction::anchor() {}
+EmitMLIRAction::EmitMLIRAction(mlir::MLIRContext *_MLIRContext)
+    : CIRGenAction(OutputType::EmitMLIR, _MLIRContext) {}
+
+void EmitLLVMAction::anchor() {}
+EmitLLVMAction::EmitLLVMAction(mlir::MLIRContext *_MLIRContext)
+    : CIRGenAction(OutputType::EmitLLVM, _MLIRContext) {}
+
+void EmitBCAction::anchor() {}
+EmitBCAction::EmitBCAction(mlir::MLIRContext *_MLIRContext)
+    : CIRGenAction(OutputType::EmitBC, _MLIRContext) {}
+
+void EmitObjAction::anchor() {}
+EmitObjAction::EmitObjAction(mlir::MLIRContext *_MLIRContext)
+    : CIRGenAction(OutputType::EmitObj, _MLIRContext) {}
+} // namespace cir
+
+// Used for -fclangir-analysis-only: use CIR analysis but still use original LLVM codegen path
+void AnalysisOnlyActionBase::anchor() {}
+AnalysisOnlyActionBase::AnalysisOnlyActionBase(unsigned _Act,
+                                               llvm::LLVMContext *_VMContext)
+    : clang::CodeGenAction(_Act, _VMContext) {}
+
+std::unique_ptr<ASTConsumer>
+AnalysisOnlyActionBase::CreateASTConsumer(clang::CompilerInstance &ci,
+                                          llvm::StringRef inFile) {
+  std::vector<std::unique_ptr<ASTConsumer>> Consumers;
+  Consumers.push_back(clang::CodeGenAction::CreateASTConsumer(ci, inFile));
+  Consumers.push_back(std::make_unique<cir::CIRGenConsumer>(
+      CIRGenAction::OutputType::None, ci.getDiagnostics(),
+      &ci.getVirtualFileSystem(), ci.getHeaderSearchOpts(), ci.getCodeGenOpts(),
+      ci.getTargetOpts(), ci.getLangOpts(), ci.getFrontendOpts(), nullptr));
+  return std::make_unique<MultiplexConsumer>(std::move(Consumers));
+}
+
+void AnalysisOnlyAndEmitAssemblyAction::anchor() {}
+AnalysisOnlyAndEmitAssemblyAction::AnalysisOnlyAndEmitAssemblyAction(
+    llvm::LLVMContext *_VMContext)
+    : AnalysisOnlyActionBase(Backend_EmitAssembly, _VMContext) {}
+
+void AnalysisOnlyAndEmitBCAction::anchor() {}
+AnalysisOnlyAndEmitBCAction::AnalysisOnlyAndEmitBCAction(
+    llvm::LLVMContext *_VMContext)
+    : AnalysisOnlyActionBase(Backend_EmitBC, _VMContext) {}
+
+void AnalysisOnlyAndEmitLLVMAction::anchor() {}
+AnalysisOnlyAndEmitLLVMAction::AnalysisOnlyAndEmitLLVMAction(
+    llvm::LLVMContext *_VMContext)
+    : AnalysisOnlyActionBase(Backend_EmitLL, _VMContext) {}
+
+void AnalysisOnlyAndEmitLLVMOnlyAction::anchor() {}
+AnalysisOnlyAndEmitLLVMOnlyAction::AnalysisOnlyAndEmitLLVMOnlyAction(
+    llvm::LLVMContext *_VMContext)
+    : AnalysisOnlyActionBase(Backend_EmitNothing, _VMContext) {}
+
+void AnalysisOnlyAndEmitCodeGenOnlyAction::anchor() {}
+AnalysisOnlyAndEmitCodeGenOnlyAction::AnalysisOnlyAndEmitCodeGenOnlyAction(
+    llvm::LLVMContext *_VMContext)
+    : AnalysisOnlyActionBase(Backend_EmitMCNull, _VMContext) {}
+
+void AnalysisOnlyAndEmitObjAction::anchor() {}
+AnalysisOnlyAndEmitObjAction::AnalysisOnlyAndEmitObjAction(
+    llvm::LLVMContext *_VMContext)
+    : AnalysisOnlyActionBase(Backend_EmitObj, _VMContext) {}
diff --git a/clang/lib/CIR/FrontendAction/CMakeLists.txt b/clang/lib/CIR/FrontendAction/CMakeLists.txt
new file mode 100644
index 000000000000..077bd733cbd8
--- /dev/null
+++ b/clang/lib/CIR/FrontendAction/CMakeLists.txt
@@ -0,0 +1,38 @@
+set(LLVM_LINK_COMPONENTS
+  Core
+  Support
+  )
+
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
+add_clang_library(clangCIRFrontendAction
+  CIRGenAction.cpp
+
+  DEPENDS
+  MLIRCIROpsIncGen
+  MLIRCIRASTAttrInterfacesIncGen
+  MLIRCIROpInterfacesIncGen
+  MLIRCIRLoopOpInterfaceIncGen
+  MLIRBuiltinLocationAttributesIncGen
+  MLIRBuiltinTypeInterfacesIncGen
+  MLIRFunctionInterfacesIncGen
+
+  LINK_LIBS
+  clangAST
+  clangBasic
+  clangCodeGen
+  clangLex
+  clangFrontend
+  clangCIR
+  clangCIRLoweringDirectToLLVM
+  clangCIRLoweringThroughMLIR
+  ${dialect_libs}
+  MLIRCIR
+  MLIRAnalysis
+  MLIRIR
+  MLIRParser
+  MLIRSideEffectInterfaces
+  MLIRTransforms
+  MLIRSupport
+  MLIRMemRefDialect
+  )
diff --git a/clang/lib/CIR/Interfaces/ASTAttrInterfaces.cpp b/clang/lib/CIR/Interfaces/ASTAttrInterfaces.cpp
new file mode 100644
index 000000000000..a3f525dd65a3
--- /dev/null
+++ b/clang/lib/CIR/Interfaces/ASTAttrInterfaces.cpp
@@ -0,0 +1,15 @@
+//====- ASTAttrInterfaces.cpp - Interface to AST Attributes ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "clang/CIR/Interfaces/ASTAttrInterfaces.h"
+
+#include "llvm/ADT/SmallVector.h"
+
+using namespace mlir::cir;
+
+/// Include the generated type qualifiers interfaces.
+#include "clang/CIR/Interfaces/ASTAttrInterfaces.cpp.inc"
diff --git a/clang/lib/CIR/Interfaces/CIRFPTypeInterface.cpp b/clang/lib/CIR/Interfaces/CIRFPTypeInterface.cpp
new file mode 100644
index 000000000000..6062a39be7fa
--- /dev/null
+++ b/clang/lib/CIR/Interfaces/CIRFPTypeInterface.cpp
@@ -0,0 +1,14 @@
+//====- CIRFPTypeInterface.cpp - Interface for floating-point types -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CIR/Interfaces/CIRFPTypeInterface.h"
+
+using namespace mlir::cir;
+
+/// Include the generated interfaces.
+#include "clang/CIR/Interfaces/CIRFPTypeInterface.cpp.inc"
diff --git a/clang/lib/CIR/Interfaces/CIRLoopOpInterface.cpp b/clang/lib/CIR/Interfaces/CIRLoopOpInterface.cpp
new file mode 100644
index 000000000000..8b1708fa815c
--- /dev/null
+++ b/clang/lib/CIR/Interfaces/CIRLoopOpInterface.cpp
@@ -0,0 +1,57 @@
+//===- CIRLoopOpInterface.cpp - Interface for CIR loop-like ops *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#include "clang/CIR/Interfaces/CIRLoopOpInterface.h"
+
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Interfaces/CIRLoopOpInterface.cpp.inc"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace mlir {
+namespace cir {
+
+void LoopOpInterface::getLoopOpSuccessorRegions(
+    LoopOpInterface op, RegionBranchPoint point,
+    SmallVectorImpl<RegionSuccessor> &regions) {
+  assert(point.isParent() || point.getRegionOrNull());
+
+  // Branching to first region: go to condition or body (do-while).
+  if (point.isParent()) {
+    regions.emplace_back(&op.getEntry(), op.getEntry().getArguments());
+  }
+  // Branching from condition: go to body or exit.
+  else if (&op.getCond() == point.getRegionOrNull()) {
+    regions.emplace_back(RegionSuccessor(op->getResults()));
+    regions.emplace_back(&op.getBody(), op.getBody().getArguments());
+  }
+  // Branching from body: go to step (for) or condition.
+  else if (&op.getBody() == point.getRegionOrNull()) {
+    // FIXME(cir): Should we consider break/continue statements here?
+    auto *afterBody = (op.maybeGetStep() ? op.maybeGetStep() : &op.getCond());
+    regions.emplace_back(afterBody, afterBody->getArguments());
+  }
+  // Branching from step: go to condition.
+  else if (op.maybeGetStep() == point.getRegionOrNull()) {
+    regions.emplace_back(&op.getCond(), op.getCond().getArguments());
+  } else {
+    llvm_unreachable("unexpected branch origin");
+  }
+}
+
+/// Verify invariants of the LoopOpInterface.
+LogicalResult detail::verifyLoopOpInterface(Operation *op) {
+  // FIXME: fix this so the conditionop isn't requiring MLIRCIR
+  // auto loopOp = cast<LoopOpInterface>(op);
+  // if (!isa<ConditionOp>(loopOp.getCond().back().getTerminator()))
+  //   return op->emitOpError(
+  //       "expected condition region to terminate with 'cir.condition'");
+  return success();
+}
+
+} // namespace cir
+} // namespace mlir
diff --git a/clang/lib/CIR/Interfaces/CIROpInterfaces.cpp b/clang/lib/CIR/Interfaces/CIROpInterfaces.cpp
new file mode 100644
index 000000000000..93ab428d5f13
--- /dev/null
+++ b/clang/lib/CIR/Interfaces/CIROpInterfaces.cpp
@@ -0,0 +1,37 @@
+//====- CIROpInterfaces.cpp - Interface to AST Attributes ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "clang/CIR/Interfaces/CIROpInterfaces.h"
+
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "llvm/ADT/SmallVector.h"
+
+using namespace mlir::cir;
+
+/// Include the generated type qualifiers interfaces.
+#include "clang/CIR/Interfaces/CIROpInterfaces.cpp.inc"
+
+#include "clang/CIR/MissingFeatures.h"
+
+bool CIRGlobalValueInterface::hasDefaultVisibility() {
+  assert(!::cir::MissingFeatures::hiddenVisibility());
+  assert(!::cir::MissingFeatures::protectedVisibility());
+  return isPublic() || isPrivate();
+}
+
+bool CIRGlobalValueInterface::canBenefitFromLocalAlias() {
+  assert(!::cir::MissingFeatures::supportIFuncAttr());
+  // hasComdat here should be isDeduplicateComdat, but as far as clang codegen
+  // is concerned, there is no case for Comdat::NoDeduplicate as all comdat
+  // would be Comdat::Any or Comdat::Largest (in the case of MS ABI). And CIRGen
+  // wouldn't even generate Comdat::Largest comdat as it tries to leave ABI
+  // specifics to LLVM lowering stage, thus here we don't need test Comdat
+  // selectionKind.
+  return hasDefaultVisibility() && isExternalLinkage() && !isDeclaration() &&
+         !hasComdat();
+  return false;
+}
diff --git a/clang/lib/CIR/Interfaces/CMakeLists.txt b/clang/lib/CIR/Interfaces/CMakeLists.txt
new file mode 100644
index 000000000000..dee0a1408250
--- /dev/null
+++ b/clang/lib/CIR/Interfaces/CMakeLists.txt
@@ -0,0 +1,21 @@
+add_clang_library(MLIRCIRInterfaces
+  ASTAttrInterfaces.cpp
+  CIROpInterfaces.cpp
+  CIRLoopOpInterface.cpp
+  CIRFPTypeInterface.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
+
+  DEPENDS
+  MLIRCIRASTAttrInterfacesIncGen
+  MLIRCIREnumsGen
+  MLIRCIRFPTypeInterfaceIncGen
+  MLIRCIRLoopOpInterfaceIncGen
+  MLIRCIROpInterfacesIncGen
+
+  LINK_LIBS
+  ${dialect_libs}
+  MLIRIR
+  MLIRSupport
+ )
diff --git a/clang/lib/CIR/Lowering/CMakeLists.txt b/clang/lib/CIR/Lowering/CMakeLists.txt
new file mode 100644
index 000000000000..e34884ce21bd
--- /dev/null
+++ b/clang/lib/CIR/Lowering/CMakeLists.txt
@@ -0,0 +1,42 @@
+set(LLVM_LINK_COMPONENTS
+  Core
+  Support
+  )
+
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
+add_clang_library(clangCIRLoweringHelpers
+  LoweringHelpers.cpp
+
+  DEPENDS
+  MLIRCIROpsIncGen
+  MLIRCIREnumsGen
+  MLIRCIRASTAttrInterfacesIncGen
+  MLIRCIROpInterfacesIncGen
+  MLIRCIRLoopOpInterfaceIncGen
+  MLIRBuiltinLocationAttributesIncGen
+  MLIRBuiltinTypeInterfacesIncGen
+  MLIRFunctionInterfacesIncGen
+
+  LINK_LIBS
+  clangAST
+  clangBasic
+  clangCodeGen
+  clangLex
+  clangFrontend
+  clangCIR
+  ${dialect_libs}
+  MLIRCIR
+  MLIRAnalysis
+  MLIRBuiltinToLLVMIRTranslation
+  MLIRLLVMToLLVMIRTranslation
+  MLIRIR
+  MLIRParser
+  MLIRSideEffectInterfaces
+  MLIRTransforms
+  MLIRSupport
+  MLIRMemRefDialect
+  )
+
+add_subdirectory(DirectToLLVM)
+add_subdirectory(ThroughMLIR)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt b/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt
new file mode 100644
index 000000000000..ace5166afdf9
--- /dev/null
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt
@@ -0,0 +1,48 @@
+set(LLVM_LINK_COMPONENTS
+  Core
+  Support
+  )
+
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
+add_clang_library(clangCIRLoweringDirectToLLVM
+  LowerToLLVMIR.cpp
+  LowerToLLVM.cpp
+
+  DEPENDS
+  MLIRCIREnumsGen
+  MLIRCIROpsIncGen
+  MLIRCIRASTAttrInterfacesIncGen
+  MLIRCIROpInterfacesIncGen
+  MLIRCIRLoopOpInterfaceIncGen
+  MLIRBuiltinLocationAttributesIncGen
+  MLIRBuiltinTypeInterfacesIncGen
+  MLIRFunctionInterfacesIncGen
+
+  LINK_LIBS
+  clangAST
+  clangBasic
+  clangCodeGen
+  clangLex
+  clangFrontend
+  clangCIR
+  clangCIRLoweringHelpers
+  ${dialect_libs}
+  MLIRCIR
+  MLIRAnalysis
+  MLIRBuiltinToLLVMIRTranslation
+  MLIRLLVMToLLVMIRTranslation
+  MLIRCIRTransforms
+  MLIRIR
+  MLIRParser
+  MLIRSideEffectInterfaces
+  MLIRTransforms
+  MLIRSupport
+  MLIRMemRefDialect
+  MLIROpenMPDialect
+  MLIROpenMPToLLVMIRTranslation
+  )
+
+target_include_directories(clangCIRLoweringDirectToLLVM PRIVATE
+  ${CLANG_SOURCE_DIR}/lib/CIR/Dialect/Transforms/TargetLowering
+  )
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
new file mode 100644
index 000000000000..b38c4608f875
--- /dev/null
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -0,0 +1,4448 @@
+//====- LowerToLLVM.cpp - Lowering from CIR to LLVMIR ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering of CIR operations to LLVMIR.
+//
+//===----------------------------------------------------------------------===//
+#include "LoweringHelpers.h"
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
+#include "mlir/Dialect/LLVMIR/Transforms/Passes.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributeInterfaces.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/Types.h"
+#include "mlir/IR/Value.h"
+#include "mlir/IR/ValueRange.h"
+#include "mlir/IR/Visitors.h"
+#include "mlir/Interfaces/DataLayoutInterfaces.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Export.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/Dialect/Passes.h"
+#include "clang/CIR/LoweringHelpers.h"
+#include "clang/CIR/MissingFeatures.h"
+#include "clang/CIR/Passes.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
+#include <deque>
+#include <optional>
+#include <set>
+
+#include "LowerModule.h"
+
+using namespace cir;
+using namespace llvm;
+
+namespace cir {
+namespace direct {
+
+//===----------------------------------------------------------------------===//
+// Helper Methods
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// Walks a region while skipping operations of type `Ops`. This ensures the
+/// callback is not applied to said operations and its children.
+template <typename... Ops>
+void walkRegionSkipping(mlir::Region &region,
+                        mlir::function_ref<void(mlir::Operation *)> callback) {
+  region.walk<mlir::WalkOrder::PreOrder>([&](mlir::Operation *op) {
+    if (isa<Ops...>(op))
+      return mlir::WalkResult::skip();
+    callback(op);
+    return mlir::WalkResult::advance();
+  });
+}
+
+/// Convert from a CIR comparison kind to an LLVM IR integral comparison kind.
+mlir::LLVM::ICmpPredicate
+convertCmpKindToICmpPredicate(mlir::cir::CmpOpKind kind, bool isSigned) {
+  using CIR = mlir::cir::CmpOpKind;
+  using LLVMICmp = mlir::LLVM::ICmpPredicate;
+  switch (kind) {
+  case CIR::eq:
+    return LLVMICmp::eq;
+  case CIR::ne:
+    return LLVMICmp::ne;
+  case CIR::lt:
+    return (isSigned ? LLVMICmp::slt : LLVMICmp::ult);
+  case CIR::le:
+    return (isSigned ? LLVMICmp::sle : LLVMICmp::ule);
+  case CIR::gt:
+    return (isSigned ? LLVMICmp::sgt : LLVMICmp::ugt);
+  case CIR::ge:
+    return (isSigned ? LLVMICmp::sge : LLVMICmp::uge);
+  }
+  llvm_unreachable("Unknown CmpOpKind");
+}
+
+/// Convert from a CIR comparison kind to an LLVM IR floating-point comparison
+/// kind.
+mlir::LLVM::FCmpPredicate
+convertCmpKindToFCmpPredicate(mlir::cir::CmpOpKind kind) {
+  using CIR = mlir::cir::CmpOpKind;
+  using LLVMFCmp = mlir::LLVM::FCmpPredicate;
+  switch (kind) {
+  case CIR::eq:
+    return LLVMFCmp::oeq;
+  case CIR::ne:
+    return LLVMFCmp::une;
+  case CIR::lt:
+    return LLVMFCmp::olt;
+  case CIR::le:
+    return LLVMFCmp::ole;
+  case CIR::gt:
+    return LLVMFCmp::ogt;
+  case CIR::ge:
+    return LLVMFCmp::oge;
+  }
+  llvm_unreachable("Unknown CmpOpKind");
+}
+
+/// If the given type is a vector type, return the vector's element type.
+/// Otherwise return the given type unchanged.
+mlir::Type elementTypeIfVector(mlir::Type type) {
+  if (auto VecType = mlir::dyn_cast<mlir::cir::VectorType>(type)) {
+    return VecType.getEltType();
+  }
+  return type;
+}
+
+mlir::LLVM::Visibility
+lowerCIRVisibilityToLLVMVisibility(mlir::cir::VisibilityKind visibilityKind) {
+  switch (visibilityKind) {
+  case mlir::cir::VisibilityKind::Default:
+    return ::mlir::LLVM::Visibility::Default;
+  case mlir::cir::VisibilityKind::Hidden:
+    return ::mlir::LLVM::Visibility::Hidden;
+  case mlir::cir::VisibilityKind::Protected:
+    return ::mlir::LLVM::Visibility::Protected;
+  }
+}
+
+// Make sure the LLVM function we are about to create a call for actually
+// exists, if not create one. Returns a function
+void getOrCreateLLVMFuncOp(mlir::ConversionPatternRewriter &rewriter,
+                           mlir::Operation *srcOp, llvm::StringRef fnName,
+                           mlir::Type fnTy) {
+  auto modOp = srcOp->getParentOfType<mlir::ModuleOp>();
+  auto enclosingFnOp = srcOp->getParentOfType<mlir::LLVM::LLVMFuncOp>();
+  auto *sourceSymbol = mlir::SymbolTable::lookupSymbolIn(modOp, fnName);
+  if (!sourceSymbol) {
+    mlir::OpBuilder::InsertionGuard guard(rewriter);
+    rewriter.setInsertionPoint(enclosingFnOp);
+    rewriter.create<mlir::LLVM::LLVMFuncOp>(srcOp->getLoc(), fnName, fnTy);
+  }
+}
+
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// Visitors for Lowering CIR Const Attributes
+//===----------------------------------------------------------------------===//
+
+/// Switches on the type of attribute and calls the appropriate conversion.
+inline mlir::Value
+lowerCirAttrAsValue(mlir::Operation *parentOp, mlir::Attribute attr,
+                    mlir::ConversionPatternRewriter &rewriter,
+                    const mlir::TypeConverter *converter);
+
+/// IntAttr visitor.
+inline mlir::Value
+lowerCirAttrAsValue(mlir::Operation *parentOp, mlir::cir::IntAttr intAttr,
+                    mlir::ConversionPatternRewriter &rewriter,
+                    const mlir::TypeConverter *converter) {
+  auto loc = parentOp->getLoc();
+  return rewriter.create<mlir::LLVM::ConstantOp>(
+      loc, converter->convertType(intAttr.getType()), intAttr.getValue());
+}
+
+/// BoolAttr visitor.
+inline mlir::Value
+lowerCirAttrAsValue(mlir::Operation *parentOp, mlir::cir::BoolAttr boolAttr,
+                    mlir::ConversionPatternRewriter &rewriter,
+                    const mlir::TypeConverter *converter) {
+  auto loc = parentOp->getLoc();
+  return rewriter.create<mlir::LLVM::ConstantOp>(
+      loc, converter->convertType(boolAttr.getType()), boolAttr.getValue());
+}
+
+/// ConstPtrAttr visitor.
+inline mlir::Value
+lowerCirAttrAsValue(mlir::Operation *parentOp, mlir::cir::ConstPtrAttr ptrAttr,
+                    mlir::ConversionPatternRewriter &rewriter,
+                    const mlir::TypeConverter *converter) {
+  auto loc = parentOp->getLoc();
+  if (ptrAttr.isNullValue()) {
+    return rewriter.create<mlir::LLVM::ZeroOp>(
+        loc, converter->convertType(ptrAttr.getType()));
+  }
+  mlir::DataLayout layout(parentOp->getParentOfType<mlir::ModuleOp>());
+  mlir::Value ptrVal = rewriter.create<mlir::LLVM::ConstantOp>(
+      loc, rewriter.getIntegerType(layout.getTypeSizeInBits(ptrAttr.getType())),
+      ptrAttr.getValue().getInt());
+  return rewriter.create<mlir::LLVM::IntToPtrOp>(
+      loc, converter->convertType(ptrAttr.getType()), ptrVal);
+}
+
+/// FPAttr visitor.
+inline mlir::Value
+lowerCirAttrAsValue(mlir::Operation *parentOp, mlir::cir::FPAttr fltAttr,
+                    mlir::ConversionPatternRewriter &rewriter,
+                    const mlir::TypeConverter *converter) {
+  auto loc = parentOp->getLoc();
+  return rewriter.create<mlir::LLVM::ConstantOp>(
+      loc, converter->convertType(fltAttr.getType()), fltAttr.getValue());
+}
+
+/// ZeroAttr visitor.
+inline mlir::Value
+lowerCirAttrAsValue(mlir::Operation *parentOp, mlir::cir::ZeroAttr zeroAttr,
+                    mlir::ConversionPatternRewriter &rewriter,
+                    const mlir::TypeConverter *converter) {
+  auto loc = parentOp->getLoc();
+  return rewriter.create<mlir::LLVM::ZeroOp>(
+      loc, converter->convertType(zeroAttr.getType()));
+}
+
+/// ConstStruct visitor.
+mlir::Value lowerCirAttrAsValue(mlir::Operation *parentOp,
+                                mlir::cir::ConstStructAttr constStruct,
+                                mlir::ConversionPatternRewriter &rewriter,
+                                const mlir::TypeConverter *converter) {
+  auto llvmTy = converter->convertType(constStruct.getType());
+  auto loc = parentOp->getLoc();
+  mlir::Value result = rewriter.create<mlir::LLVM::UndefOp>(loc, llvmTy);
+
+  // Iteratively lower each constant element of the struct.
+  for (auto [idx, elt] : llvm::enumerate(constStruct.getMembers())) {
+    mlir::Value init = lowerCirAttrAsValue(parentOp, elt, rewriter, converter);
+    result = rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx);
+  }
+
+  return result;
+}
+
+// VTableAttr visitor.
+mlir::Value lowerCirAttrAsValue(mlir::Operation *parentOp,
+                                mlir::cir::VTableAttr vtableArr,
+                                mlir::ConversionPatternRewriter &rewriter,
+                                const mlir::TypeConverter *converter) {
+  auto llvmTy = converter->convertType(vtableArr.getType());
+  auto loc = parentOp->getLoc();
+  mlir::Value result = rewriter.create<mlir::LLVM::UndefOp>(loc, llvmTy);
+
+  for (auto [idx, elt] : llvm::enumerate(vtableArr.getVtableData())) {
+    mlir::Value init = lowerCirAttrAsValue(parentOp, elt, rewriter, converter);
+    result = rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx);
+  }
+
+  return result;
+}
+
+// TypeInfoAttr visitor.
+mlir::Value lowerCirAttrAsValue(mlir::Operation *parentOp,
+                                mlir::cir::TypeInfoAttr typeinfoArr,
+                                mlir::ConversionPatternRewriter &rewriter,
+                                const mlir::TypeConverter *converter) {
+  auto llvmTy = converter->convertType(typeinfoArr.getType());
+  auto loc = parentOp->getLoc();
+  mlir::Value result = rewriter.create<mlir::LLVM::UndefOp>(loc, llvmTy);
+
+  for (auto [idx, elt] : llvm::enumerate(typeinfoArr.getData())) {
+    mlir::Value init = lowerCirAttrAsValue(parentOp, elt, rewriter, converter);
+    result = rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx);
+  }
+
+  return result;
+}
+
+// ConstArrayAttr visitor
+mlir::Value lowerCirAttrAsValue(mlir::Operation *parentOp,
+                                mlir::cir::ConstArrayAttr constArr,
+                                mlir::ConversionPatternRewriter &rewriter,
+                                const mlir::TypeConverter *converter) {
+  auto llvmTy = converter->convertType(constArr.getType());
+  auto loc = parentOp->getLoc();
+  mlir::Value result;
+
+  if (auto zeros = constArr.getTrailingZerosNum()) {
+    auto arrayTy = constArr.getType();
+    result = rewriter.create<mlir::LLVM::ZeroOp>(
+        loc, converter->convertType(arrayTy));
+  } else {
+    result = rewriter.create<mlir::LLVM::UndefOp>(loc, llvmTy);
+  }
+
+  // Iteratively lower each constant element of the array.
+  if (auto arrayAttr = mlir::dyn_cast<mlir::ArrayAttr>(constArr.getElts())) {
+    for (auto [idx, elt] : llvm::enumerate(arrayAttr)) {
+      mlir::Value init =
+          lowerCirAttrAsValue(parentOp, elt, rewriter, converter);
+      result =
+          rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx);
+    }
+  }
+  // TODO(cir): this diverges from traditional lowering. Normally the string
+  // would be a global constant that is memcopied.
+  else if (auto strAttr =
+               mlir::dyn_cast<mlir::StringAttr>(constArr.getElts())) {
+    auto arrayTy = mlir::dyn_cast<mlir::cir::ArrayType>(strAttr.getType());
+    assert(arrayTy && "String attribute must have an array type");
+    auto eltTy = arrayTy.getEltType();
+    for (auto [idx, elt] : llvm::enumerate(strAttr)) {
+      auto init = rewriter.create<mlir::LLVM::ConstantOp>(
+          loc, converter->convertType(eltTy), elt);
+      result =
+          rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx);
+    }
+  } else {
+    llvm_unreachable("unexpected ConstArrayAttr elements");
+  }
+
+  return result;
+}
+
+// ConstVectorAttr visitor.
+mlir::Value lowerCirAttrAsValue(mlir::Operation *parentOp,
+                                mlir::cir::ConstVectorAttr constVec,
+                                mlir::ConversionPatternRewriter &rewriter,
+                                const mlir::TypeConverter *converter) {
+  auto llvmTy = converter->convertType(constVec.getType());
+  auto loc = parentOp->getLoc();
+  SmallVector<mlir::Attribute> mlirValues;
+  for (auto elementAttr : constVec.getElts()) {
+    mlir::Attribute mlirAttr;
+    if (auto intAttr = mlir::dyn_cast<mlir::cir::IntAttr>(elementAttr)) {
+      mlirAttr = rewriter.getIntegerAttr(
+          converter->convertType(intAttr.getType()), intAttr.getValue());
+    } else if (auto floatAttr =
+                   mlir::dyn_cast<mlir::cir::FPAttr>(elementAttr)) {
+      mlirAttr = rewriter.getFloatAttr(
+          converter->convertType(floatAttr.getType()), floatAttr.getValue());
+    } else {
+      llvm_unreachable(
+          "vector constant with an element that is neither an int nor a float");
+    }
+    mlirValues.push_back(mlirAttr);
+  }
+  return rewriter.create<mlir::LLVM::ConstantOp>(
+      loc, llvmTy,
+      mlir::DenseElementsAttr::get(mlir::cast<mlir::ShapedType>(llvmTy),
+                                   mlirValues));
+}
+
+// GlobalViewAttr visitor.
+mlir::Value lowerCirAttrAsValue(mlir::Operation *parentOp,
+                                mlir::cir::GlobalViewAttr globalAttr,
+                                mlir::ConversionPatternRewriter &rewriter,
+                                const mlir::TypeConverter *converter) {
+  auto module = parentOp->getParentOfType<mlir::ModuleOp>();
+  mlir::Type sourceType;
+  llvm::StringRef symName;
+  auto *sourceSymbol =
+      mlir::SymbolTable::lookupSymbolIn(module, globalAttr.getSymbol());
+  if (auto llvmSymbol = dyn_cast<mlir::LLVM::GlobalOp>(sourceSymbol)) {
+    sourceType = llvmSymbol.getType();
+    symName = llvmSymbol.getSymName();
+  } else if (auto cirSymbol = dyn_cast<mlir::cir::GlobalOp>(sourceSymbol)) {
+    sourceType = converter->convertType(cirSymbol.getSymType());
+    symName = cirSymbol.getSymName();
+  } else if (auto llvmFun = dyn_cast<mlir::LLVM::LLVMFuncOp>(sourceSymbol)) {
+    sourceType = llvmFun.getFunctionType();
+    symName = llvmFun.getSymName();
+  } else if (auto fun = dyn_cast<mlir::cir::FuncOp>(sourceSymbol)) {
+    sourceType = converter->convertType(fun.getFunctionType());
+    symName = fun.getSymName();
+  } else {
+    llvm_unreachable("Unexpected GlobalOp type");
+  }
+
+  auto loc = parentOp->getLoc();
+  mlir::Value addrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
+      loc, mlir::LLVM::LLVMPointerType::get(rewriter.getContext()), symName);
+
+  if (globalAttr.getIndices()) {
+    llvm::SmallVector<mlir::LLVM::GEPArg> indices;
+    for (auto idx : globalAttr.getIndices()) {
+      auto intAttr = dyn_cast<mlir::IntegerAttr>(idx);
+      assert(intAttr && "index must be integers");
+      indices.push_back(intAttr.getValue().getSExtValue());
+    }
+    auto resTy = addrOp.getType();
+    auto eltTy = converter->convertType(sourceType);
+    addrOp = rewriter.create<mlir::LLVM::GEPOp>(loc, resTy, eltTy, addrOp,
+                                                indices, true);
+  }
+
+  auto ptrTy = mlir::dyn_cast<mlir::cir::PointerType>(globalAttr.getType());
+  assert(ptrTy && "Expecting pointer type in GlobalViewAttr");
+  auto llvmEltTy = converter->convertType(ptrTy.getPointee());
+
+  if (llvmEltTy == sourceType)
+    return addrOp;
+
+  auto llvmDstTy = converter->convertType(globalAttr.getType());
+  return rewriter.create<mlir::LLVM::BitcastOp>(parentOp->getLoc(), llvmDstTy,
+                                                addrOp);
+}
+
+/// Switches on the type of attribute and calls the appropriate conversion.
+inline mlir::Value
+lowerCirAttrAsValue(mlir::Operation *parentOp, mlir::Attribute attr,
+                    mlir::ConversionPatternRewriter &rewriter,
+                    const mlir::TypeConverter *converter) {
+  if (const auto intAttr = mlir::dyn_cast<mlir::cir::IntAttr>(attr))
+    return lowerCirAttrAsValue(parentOp, intAttr, rewriter, converter);
+  if (const auto fltAttr = mlir::dyn_cast<mlir::cir::FPAttr>(attr))
+    return lowerCirAttrAsValue(parentOp, fltAttr, rewriter, converter);
+  if (const auto ptrAttr = mlir::dyn_cast<mlir::cir::ConstPtrAttr>(attr))
+    return lowerCirAttrAsValue(parentOp, ptrAttr, rewriter, converter);
+  if (const auto constStruct = mlir::dyn_cast<mlir::cir::ConstStructAttr>(attr))
+    return lowerCirAttrAsValue(parentOp, constStruct, rewriter, converter);
+  if (const auto constArr = mlir::dyn_cast<mlir::cir::ConstArrayAttr>(attr))
+    return lowerCirAttrAsValue(parentOp, constArr, rewriter, converter);
+  if (const auto constVec = mlir::dyn_cast<mlir::cir::ConstVectorAttr>(attr))
+    return lowerCirAttrAsValue(parentOp, constVec, rewriter, converter);
+  if (const auto boolAttr = mlir::dyn_cast<mlir::cir::BoolAttr>(attr))
+    return lowerCirAttrAsValue(parentOp, boolAttr, rewriter, converter);
+  if (const auto zeroAttr = mlir::dyn_cast<mlir::cir::ZeroAttr>(attr))
+    return lowerCirAttrAsValue(parentOp, zeroAttr, rewriter, converter);
+  if (const auto globalAttr = mlir::dyn_cast<mlir::cir::GlobalViewAttr>(attr))
+    return lowerCirAttrAsValue(parentOp, globalAttr, rewriter, converter);
+  if (const auto vtableAttr = mlir::dyn_cast<mlir::cir::VTableAttr>(attr))
+    return lowerCirAttrAsValue(parentOp, vtableAttr, rewriter, converter);
+  if (const auto typeinfoAttr = mlir::dyn_cast<mlir::cir::TypeInfoAttr>(attr))
+    return lowerCirAttrAsValue(parentOp, typeinfoAttr, rewriter, converter);
+
+  llvm_unreachable("unhandled attribute type");
+}
+
+//===----------------------------------------------------------------------===//
+
+mlir::LLVM::Linkage convertLinkage(mlir::cir::GlobalLinkageKind linkage) {
+  using CIR = mlir::cir::GlobalLinkageKind;
+  using LLVM = mlir::LLVM::Linkage;
+
+  switch (linkage) {
+  case CIR::AvailableExternallyLinkage:
+    return LLVM::AvailableExternally;
+  case CIR::CommonLinkage:
+    return LLVM::Common;
+  case CIR::ExternalLinkage:
+    return LLVM::External;
+  case CIR::ExternalWeakLinkage:
+    return LLVM::ExternWeak;
+  case CIR::InternalLinkage:
+    return LLVM::Internal;
+  case CIR::LinkOnceAnyLinkage:
+    return LLVM::Linkonce;
+  case CIR::LinkOnceODRLinkage:
+    return LLVM::LinkonceODR;
+  case CIR::PrivateLinkage:
+    return LLVM::Private;
+  case CIR::WeakAnyLinkage:
+    return LLVM::Weak;
+  case CIR::WeakODRLinkage:
+    return LLVM::WeakODR;
+  };
+}
+
+mlir::LLVM::CConv convertCallingConv(mlir::cir::CallingConv callinvConv) {
+  using CIR = mlir::cir::CallingConv;
+  using LLVM = mlir::LLVM::CConv;
+
+  switch (callinvConv) {
+  case CIR::C:
+    return LLVM::C;
+  case CIR::SpirKernel:
+    return LLVM::SPIR_KERNEL;
+  case CIR::SpirFunction:
+    return LLVM::SPIR_FUNC;
+  }
+  llvm_unreachable("Unknown calling convention");
+}
+
+class CIRCopyOpLowering : public mlir::OpConversionPattern<mlir::cir::CopyOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::CopyOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::CopyOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    const mlir::Value length = rewriter.create<mlir::LLVM::ConstantOp>(
+        op.getLoc(), rewriter.getI32Type(), op.getLength());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::MemcpyOp>(
+        op, adaptor.getDst(), adaptor.getSrc(), length, op.getIsVolatile());
+    return mlir::success();
+  }
+};
+
+class CIRMemCpyOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::MemCpyOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::MemCpyOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::MemCpyOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::LLVM::MemcpyOp>(
+        op, adaptor.getDst(), adaptor.getSrc(), adaptor.getLen(),
+        /*isVolatile=*/false);
+    return mlir::success();
+  }
+};
+
+static mlir::Value getLLVMIntCast(mlir::ConversionPatternRewriter &rewriter,
+                                  mlir::Value llvmSrc,
+                                  mlir::IntegerType llvmDstIntTy,
+                                  bool isUnsigned, uint64_t cirDstIntWidth) {
+  auto cirSrcWidth =
+      mlir::cast<mlir::IntegerType>(llvmSrc.getType()).getWidth();
+  if (cirSrcWidth == cirDstIntWidth)
+    return llvmSrc;
+
+  auto loc = llvmSrc.getLoc();
+  if (cirSrcWidth < cirDstIntWidth) {
+    if (isUnsigned)
+      return rewriter.create<mlir::LLVM::ZExtOp>(loc, llvmDstIntTy, llvmSrc);
+    return rewriter.create<mlir::LLVM::SExtOp>(loc, llvmDstIntTy, llvmSrc);
+  }
+
+  // Otherwise truncate
+  return rewriter.create<mlir::LLVM::TruncOp>(loc, llvmDstIntTy, llvmSrc);
+}
+
+class CIRPtrStrideOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::PtrStrideOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::PtrStrideOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::PtrStrideOp ptrStrideOp, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto *tc = getTypeConverter();
+    const auto resultTy = tc->convertType(ptrStrideOp.getType());
+    auto elementTy = tc->convertType(ptrStrideOp.getElementTy());
+    auto *ctx = elementTy.getContext();
+
+    // void and function types doesn't really have a layout to use in GEPs,
+    // make it i8 instead.
+    if (mlir::isa<mlir::LLVM::LLVMVoidType>(elementTy) ||
+        mlir::isa<mlir::LLVM::LLVMFunctionType>(elementTy))
+      elementTy = mlir::IntegerType::get(elementTy.getContext(), 8,
+                                         mlir::IntegerType::Signless);
+
+    // Zero-extend, sign-extend or trunc the pointer value.
+    auto index = adaptor.getStride();
+    auto width = mlir::cast<mlir::IntegerType>(index.getType()).getWidth();
+    mlir::DataLayout LLVMLayout(ptrStrideOp->getParentOfType<mlir::ModuleOp>());
+    auto layoutWidth =
+        LLVMLayout.getTypeIndexBitwidth(adaptor.getBase().getType());
+    auto indexOp = index.getDefiningOp();
+    if (indexOp && layoutWidth && width != *layoutWidth) {
+      // If the index comes from a subtraction, make sure the extension happens
+      // before it. To achieve that, look at unary minus, which already got
+      // lowered to "sub 0, x".
+      auto sub = dyn_cast<mlir::LLVM::SubOp>(indexOp);
+      auto unary =
+          dyn_cast<mlir::cir::UnaryOp>(ptrStrideOp.getStride().getDefiningOp());
+      bool rewriteSub =
+          unary && unary.getKind() == mlir::cir::UnaryOpKind::Minus && sub;
+      if (rewriteSub)
+        index = indexOp->getOperand(1);
+
+      // Handle the cast
+      auto llvmDstType = mlir::IntegerType::get(ctx, *layoutWidth);
+      index = getLLVMIntCast(rewriter, index, llvmDstType,
+                             ptrStrideOp.getStride().getType().isUnsigned(),
+                             *layoutWidth);
+
+      // Rewrite the sub in front of extensions/trunc
+      if (rewriteSub) {
+        index = rewriter.create<mlir::LLVM::SubOp>(
+            index.getLoc(), index.getType(),
+            rewriter.create<mlir::LLVM::ConstantOp>(
+                index.getLoc(), index.getType(),
+                mlir::IntegerAttr::get(index.getType(), 0)),
+            index);
+        rewriter.eraseOp(sub);
+      }
+    }
+
+    rewriter.replaceOpWithNewOp<mlir::LLVM::GEPOp>(
+        ptrStrideOp, resultTy, elementTy, adaptor.getBase(), index);
+    return mlir::success();
+  }
+};
+
+class CIRBrCondOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::BrCondOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::BrCondOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BrCondOp brOp, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Value i1Condition;
+
+    auto hasOneUse = false;
+
+    if (auto defOp = brOp.getCond().getDefiningOp())
+      hasOneUse = defOp->getResult(0).hasOneUse();
+
+    if (auto defOp = adaptor.getCond().getDefiningOp()) {
+      if (auto zext = dyn_cast<mlir::LLVM::ZExtOp>(defOp)) {
+        if (zext->use_empty() &&
+            zext->getOperand(0).getType() == rewriter.getI1Type()) {
+          i1Condition = zext->getOperand(0);
+          if (hasOneUse)
+            rewriter.eraseOp(zext);
+        }
+      }
+    }
+
+    if (!i1Condition)
+      i1Condition = rewriter.create<mlir::LLVM::TruncOp>(
+          brOp.getLoc(), rewriter.getI1Type(), adaptor.getCond());
+
+    rewriter.replaceOpWithNewOp<mlir::LLVM::CondBrOp>(
+        brOp, i1Condition, brOp.getDestTrue(), adaptor.getDestOperandsTrue(),
+        brOp.getDestFalse(), adaptor.getDestOperandsFalse());
+
+    return mlir::success();
+  }
+};
+
+class CIRCastOpLowering : public mlir::OpConversionPattern<mlir::cir::CastOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::CastOp>::OpConversionPattern;
+
+  inline mlir::Type convertTy(mlir::Type ty) const {
+    return getTypeConverter()->convertType(ty);
+  }
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::CastOp castOp, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // For arithmetic conversions, LLVM IR uses the same instruction to convert
+    // both individual scalars and entire vectors. This lowering pass handles
+    // both situations.
+
+    auto src = adaptor.getSrc();
+
+    switch (castOp.getKind()) {
+    case mlir::cir::CastKind::array_to_ptrdecay: {
+      const auto ptrTy = mlir::cast<mlir::cir::PointerType>(castOp.getType());
+      auto sourceValue = adaptor.getOperands().front();
+      auto targetType = convertTy(ptrTy);
+      auto elementTy = convertTy(ptrTy.getPointee());
+      auto offset = llvm::SmallVector<mlir::LLVM::GEPArg>{0};
+      rewriter.replaceOpWithNewOp<mlir::LLVM::GEPOp>(
+          castOp, targetType, elementTy, sourceValue, offset);
+      break;
+    }
+    case mlir::cir::CastKind::int_to_bool: {
+      auto zero = rewriter.create<mlir::cir::ConstantOp>(
+          src.getLoc(), castOp.getSrc().getType(),
+          mlir::cir::IntAttr::get(castOp.getSrc().getType(), 0));
+      rewriter.replaceOpWithNewOp<mlir::cir::CmpOp>(
+          castOp, mlir::cir::BoolType::get(getContext()),
+          mlir::cir::CmpOpKind::ne, castOp.getSrc(), zero);
+      break;
+    }
+    case mlir::cir::CastKind::integral: {
+      auto srcType = castOp.getSrc().getType();
+      auto dstType = castOp.getResult().getType();
+      auto llvmSrcVal = adaptor.getOperands().front();
+      auto llvmDstType = getTypeConverter()->convertType(dstType);
+      mlir::cir::IntType srcIntType =
+          mlir::cast<mlir::cir::IntType>(elementTypeIfVector(srcType));
+      mlir::cir::IntType dstIntType =
+          mlir::cast<mlir::cir::IntType>(elementTypeIfVector(dstType));
+      rewriter.replaceOp(
+          castOp,
+          getLLVMIntCast(rewriter, llvmSrcVal,
+                         mlir::cast<mlir::IntegerType>(llvmDstType),
+                         srcIntType.isUnsigned(), dstIntType.getWidth()));
+      break;
+    }
+    case mlir::cir::CastKind::floating: {
+      auto llvmSrcVal = adaptor.getOperands().front();
+      auto llvmDstTy =
+          getTypeConverter()->convertType(castOp.getResult().getType());
+
+      auto srcTy = elementTypeIfVector(castOp.getSrc().getType());
+      auto dstTy = elementTypeIfVector(castOp.getResult().getType());
+
+      if (!mlir::isa<mlir::cir::CIRFPTypeInterface>(dstTy) ||
+          !mlir::isa<mlir::cir::CIRFPTypeInterface>(srcTy))
+        return castOp.emitError()
+               << "NYI cast from " << srcTy << " to " << dstTy;
+
+      auto getFloatWidth = [](mlir::Type ty) -> unsigned {
+        return mlir::cast<mlir::cir::CIRFPTypeInterface>(ty).getWidth();
+      };
+
+      if (getFloatWidth(srcTy) > getFloatWidth(dstTy))
+        rewriter.replaceOpWithNewOp<mlir::LLVM::FPTruncOp>(castOp, llvmDstTy,
+                                                           llvmSrcVal);
+      else
+        rewriter.replaceOpWithNewOp<mlir::LLVM::FPExtOp>(castOp, llvmDstTy,
+                                                         llvmSrcVal);
+      return mlir::success();
+    }
+    case mlir::cir::CastKind::int_to_ptr: {
+      auto dstTy = mlir::cast<mlir::cir::PointerType>(castOp.getType());
+      auto llvmSrcVal = adaptor.getOperands().front();
+      auto llvmDstTy = getTypeConverter()->convertType(dstTy);
+      rewriter.replaceOpWithNewOp<mlir::LLVM::IntToPtrOp>(castOp, llvmDstTy,
+                                                          llvmSrcVal);
+      return mlir::success();
+    }
+    case mlir::cir::CastKind::ptr_to_int: {
+      auto dstTy = mlir::cast<mlir::cir::IntType>(castOp.getType());
+      auto llvmSrcVal = adaptor.getOperands().front();
+      auto llvmDstTy = getTypeConverter()->convertType(dstTy);
+      rewriter.replaceOpWithNewOp<mlir::LLVM::PtrToIntOp>(castOp, llvmDstTy,
+                                                          llvmSrcVal);
+      return mlir::success();
+    }
+    case mlir::cir::CastKind::float_to_bool: {
+      auto dstTy = mlir::cast<mlir::cir::BoolType>(castOp.getType());
+      auto llvmSrcVal = adaptor.getOperands().front();
+      auto llvmDstTy = getTypeConverter()->convertType(dstTy);
+      auto kind = mlir::LLVM::FCmpPredicate::une;
+
+      // Check if float is not equal to zero.
+      auto zeroFloat = rewriter.create<mlir::LLVM::ConstantOp>(
+          castOp.getLoc(), llvmSrcVal.getType(),
+          mlir::FloatAttr::get(llvmSrcVal.getType(), 0.0));
+
+      // Extend comparison result to either bool (C++) or int (C).
+      mlir::Value cmpResult = rewriter.create<mlir::LLVM::FCmpOp>(
+          castOp.getLoc(), kind, llvmSrcVal, zeroFloat);
+      rewriter.replaceOpWithNewOp<mlir::LLVM::ZExtOp>(castOp, llvmDstTy,
+                                                      cmpResult);
+      return mlir::success();
+    }
+    case mlir::cir::CastKind::bool_to_int: {
+      auto dstTy = mlir::cast<mlir::cir::IntType>(castOp.getType());
+      auto llvmSrcVal = adaptor.getOperands().front();
+      auto llvmSrcTy = mlir::cast<mlir::IntegerType>(llvmSrcVal.getType());
+      auto llvmDstTy =
+          mlir::cast<mlir::IntegerType>(getTypeConverter()->convertType(dstTy));
+      if (llvmSrcTy.getWidth() == llvmDstTy.getWidth())
+        rewriter.replaceOpWithNewOp<mlir::LLVM::BitcastOp>(castOp, llvmDstTy,
+                                                           llvmSrcVal);
+      else
+        rewriter.replaceOpWithNewOp<mlir::LLVM::ZExtOp>(castOp, llvmDstTy,
+                                                        llvmSrcVal);
+      return mlir::success();
+    }
+    case mlir::cir::CastKind::bool_to_float: {
+      auto dstTy = castOp.getType();
+      auto llvmSrcVal = adaptor.getOperands().front();
+      auto llvmDstTy = getTypeConverter()->convertType(dstTy);
+      rewriter.replaceOpWithNewOp<mlir::LLVM::UIToFPOp>(castOp, llvmDstTy,
+                                                        llvmSrcVal);
+      return mlir::success();
+    }
+    case mlir::cir::CastKind::int_to_float: {
+      auto dstTy = castOp.getType();
+      auto llvmSrcVal = adaptor.getOperands().front();
+      auto llvmDstTy = getTypeConverter()->convertType(dstTy);
+      if (mlir::cast<mlir::cir::IntType>(
+              elementTypeIfVector(castOp.getSrc().getType()))
+              .isSigned())
+        rewriter.replaceOpWithNewOp<mlir::LLVM::SIToFPOp>(castOp, llvmDstTy,
+                                                          llvmSrcVal);
+      else
+        rewriter.replaceOpWithNewOp<mlir::LLVM::UIToFPOp>(castOp, llvmDstTy,
+                                                          llvmSrcVal);
+      return mlir::success();
+    }
+    case mlir::cir::CastKind::float_to_int: {
+      auto dstTy = castOp.getType();
+      auto llvmSrcVal = adaptor.getOperands().front();
+      auto llvmDstTy = getTypeConverter()->convertType(dstTy);
+      if (mlir::cast<mlir::cir::IntType>(
+              elementTypeIfVector(castOp.getResult().getType()))
+              .isSigned())
+        rewriter.replaceOpWithNewOp<mlir::LLVM::FPToSIOp>(castOp, llvmDstTy,
+                                                          llvmSrcVal);
+      else
+        rewriter.replaceOpWithNewOp<mlir::LLVM::FPToUIOp>(castOp, llvmDstTy,
+                                                          llvmSrcVal);
+      return mlir::success();
+    }
+    case mlir::cir::CastKind::bitcast: {
+      auto dstTy = castOp.getType();
+      auto llvmSrcVal = adaptor.getOperands().front();
+      auto llvmDstTy = getTypeConverter()->convertType(dstTy);
+      rewriter.replaceOpWithNewOp<mlir::LLVM::BitcastOp>(castOp, llvmDstTy,
+                                                         llvmSrcVal);
+      return mlir::success();
+    }
+    case mlir::cir::CastKind::ptr_to_bool: {
+      auto zero =
+          mlir::IntegerAttr::get(mlir::IntegerType::get(getContext(), 64), 0);
+      auto null = rewriter.create<mlir::cir::ConstantOp>(
+          src.getLoc(), castOp.getSrc().getType(),
+          mlir::cir::ConstPtrAttr::get(getContext(), castOp.getSrc().getType(),
+                                       zero));
+      rewriter.replaceOpWithNewOp<mlir::cir::CmpOp>(
+          castOp, mlir::cir::BoolType::get(getContext()),
+          mlir::cir::CmpOpKind::ne, castOp.getSrc(), null);
+      break;
+    }
+    case mlir::cir::CastKind::address_space: {
+      auto dstTy = castOp.getType();
+      auto llvmSrcVal = adaptor.getOperands().front();
+      auto llvmDstTy = getTypeConverter()->convertType(dstTy);
+      rewriter.replaceOpWithNewOp<mlir::LLVM::AddrSpaceCastOp>(
+          castOp, llvmDstTy, llvmSrcVal);
+      break;
+    }
+    default: {
+      return castOp.emitError("Unhandled cast kind: ")
+             << castOp.getKindAttrName();
+    }
+    }
+
+    return mlir::success();
+  }
+};
+
+class CIRReturnLowering
+    : public mlir::OpConversionPattern<mlir::cir::ReturnOp> {
+public:
+  using OpConversionPattern<mlir::cir::ReturnOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ReturnOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::func::ReturnOp>(op,
+                                                      adaptor.getOperands());
+    return mlir::LogicalResult::success();
+  }
+};
+
+struct ConvertCIRToLLVMPass
+    : public mlir::PassWrapper<ConvertCIRToLLVMPass,
+                               mlir::OperationPass<mlir::ModuleOp>> {
+  void getDependentDialects(mlir::DialectRegistry &registry) const override {
+    registry.insert<mlir::BuiltinDialect, mlir::DLTIDialect,
+                    mlir::LLVM::LLVMDialect, mlir::func::FuncDialect>();
+  }
+  void runOnOperation() final;
+
+  void buildGlobalAnnotationsVar();
+
+  virtual StringRef getArgument() const override { return "cir-flat-to-llvm"; }
+  static constexpr StringRef annotationSection = "llvm.metadata";
+};
+
+mlir::LogicalResult
+rewriteToCallOrInvoke(mlir::Operation *op, mlir::ValueRange callOperands,
+                      mlir::ConversionPatternRewriter &rewriter,
+                      const mlir::TypeConverter *converter,
+                      mlir::FlatSymbolRefAttr calleeAttr,
+                      mlir::Block *continueBlock = nullptr,
+                      mlir::Block *landingPadBlock = nullptr) {
+  llvm::SmallVector<mlir::Type, 8> llvmResults;
+  auto cirResults = op->getResultTypes();
+  auto callIf = cast<mlir::cir::CIRCallOpInterface>(op);
+
+  if (converter->convertTypes(cirResults, llvmResults).failed())
+    return mlir::failure();
+
+  auto cconv = convertCallingConv(callIf.getCallingConv());
+
+  if (calleeAttr) { // direct call
+    if (landingPadBlock) {
+      auto newOp = rewriter.replaceOpWithNewOp<mlir::LLVM::InvokeOp>(
+          op, llvmResults, calleeAttr, callOperands, continueBlock,
+          mlir::ValueRange{}, landingPadBlock, mlir::ValueRange{});
+      newOp.setCConv(cconv);
+    } else {
+      auto newOp = rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
+          op, llvmResults, calleeAttr, callOperands);
+      newOp.setCConv(cconv);
+    }
+  } else { // indirect call
+    assert(op->getOperands().size() &&
+           "operands list must no be empty for the indirect call");
+    auto typ = op->getOperands().front().getType();
+    assert(isa<mlir::cir::PointerType>(typ) && "expected pointer type");
+    auto ptyp = dyn_cast<mlir::cir::PointerType>(typ);
+    auto ftyp = dyn_cast<mlir::cir::FuncType>(ptyp.getPointee());
+    assert(ftyp && "expected a pointer to a function as the first operand");
+
+    if (landingPadBlock) {
+      auto llvmFnTy =
+          dyn_cast<mlir::LLVM::LLVMFunctionType>(converter->convertType(ftyp));
+      auto newOp = rewriter.replaceOpWithNewOp<mlir::LLVM::InvokeOp>(
+          op, llvmFnTy, mlir::FlatSymbolRefAttr{}, callOperands, continueBlock,
+          mlir::ValueRange{}, landingPadBlock, mlir::ValueRange{});
+      newOp.setCConv(cconv);
+    } else {
+      auto newOp = rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
+          op,
+          dyn_cast<mlir::LLVM::LLVMFunctionType>(converter->convertType(ftyp)),
+          callOperands);
+      newOp.setCConv(cconv);
+    }
+  }
+  return mlir::success();
+}
+
+class CIRCallLowering : public mlir::OpConversionPattern<mlir::cir::CallOp> {
+public:
+  using OpConversionPattern<mlir::cir::CallOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::CallOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    return rewriteToCallOrInvoke(op.getOperation(), adaptor.getOperands(),
+                                 rewriter, getTypeConverter(),
+                                 op.getCalleeAttr());
+  }
+};
+
+class CIRTryCallLowering
+    : public mlir::OpConversionPattern<mlir::cir::TryCallOp> {
+public:
+  using OpConversionPattern<mlir::cir::TryCallOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::TryCallOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    if (op.getCallingConv() != mlir::cir::CallingConv::C) {
+      return op.emitError(
+          "non-C calling convention is not implemented for try_call");
+    }
+    return rewriteToCallOrInvoke(
+        op.getOperation(), adaptor.getOperands(), rewriter, getTypeConverter(),
+        op.getCalleeAttr(), op.getCont(), op.getLandingPad());
+  }
+};
+
+static mlir::LLVM::LLVMStructType
+getLLVMLandingPadStructTy(mlir::ConversionPatternRewriter &rewriter) {
+  // Create the landing pad type: struct { ptr, i32 }
+  mlir::MLIRContext *ctx = rewriter.getContext();
+  auto llvmPtr = mlir::LLVM::LLVMPointerType::get(ctx);
+  llvm::SmallVector<mlir::Type> structFields;
+  structFields.push_back(llvmPtr);
+  structFields.push_back(rewriter.getI32Type());
+
+  return mlir::LLVM::LLVMStructType::getLiteral(ctx, structFields);
+}
+
+class CIREhInflightOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::EhInflightOp> {
+public:
+  using OpConversionPattern<mlir::cir::EhInflightOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::EhInflightOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Location loc = op.getLoc();
+    auto llvmLandingPadStructTy = getLLVMLandingPadStructTy(rewriter);
+    mlir::ArrayAttr symListAttr = op.getSymTypeListAttr();
+    mlir::SmallVector<mlir::Value, 4> symAddrs;
+
+    auto llvmFn = op->getParentOfType<mlir::LLVM::LLVMFuncOp>();
+    assert(llvmFn && "expected LLVM function parent");
+    mlir::Block *entryBlock = &llvmFn.getRegion().front();
+    assert(entryBlock->isEntryBlock());
+
+    // %x = landingpad { ptr, i32 }
+    // Note that since llvm.landingpad has to be the first operation on the
+    // block, any needed value for its operands has to be added somewhere else.
+    if (symListAttr) {
+      //   catch ptr @_ZTIi
+      //   catch ptr @_ZTIPKc
+      for (mlir::Attribute attr : op.getSymTypeListAttr()) {
+        auto symAttr = cast<mlir::FlatSymbolRefAttr>(attr);
+        // Generate `llvm.mlir.addressof` for each symbol, and place those
+        // operations in the LLVM function entry basic block.
+        mlir::OpBuilder::InsertionGuard guard(rewriter);
+        rewriter.setInsertionPointToStart(entryBlock);
+        mlir::Value addrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
+            loc, mlir::LLVM::LLVMPointerType::get(rewriter.getContext()),
+            symAttr.getValue());
+        symAddrs.push_back(addrOp);
+      }
+    } else {
+      if (!op.getCleanup()) {
+        //   catch ptr null
+        mlir::OpBuilder::InsertionGuard guard(rewriter);
+        rewriter.setInsertionPointToStart(entryBlock);
+        mlir::Value nullOp = rewriter.create<mlir::LLVM::ZeroOp>(
+            loc, mlir::LLVM::LLVMPointerType::get(rewriter.getContext()));
+        symAddrs.push_back(nullOp);
+      }
+    }
+
+    // %slot = extractvalue { ptr, i32 } %x, 0
+    // %selector = extractvalue { ptr, i32 } %x, 1
+    auto padOp = rewriter.create<mlir::LLVM::LandingpadOp>(
+        loc, llvmLandingPadStructTy, symAddrs);
+    SmallVector<int64_t> slotIdx = {0};
+    SmallVector<int64_t> selectorIdx = {1};
+
+    if (op.getCleanup())
+      padOp.setCleanup(true);
+
+    mlir::Value slot =
+        rewriter.create<mlir::LLVM::ExtractValueOp>(loc, padOp, slotIdx);
+    mlir::Value selector =
+        rewriter.create<mlir::LLVM::ExtractValueOp>(loc, padOp, selectorIdx);
+
+    rewriter.replaceOp(op, mlir::ValueRange{slot, selector});
+
+    // Landing pads are required to be in LLVM functions with personality
+    // attribute. FIXME: for now hardcode personality creation in order to start
+    // adding exception tests, once we annotate CIR with such information,
+    // change it to be in FuncOp lowering instead.
+    {
+      mlir::OpBuilder::InsertionGuard guard(rewriter);
+      // Insert personality decl before the current function.
+      rewriter.setInsertionPoint(llvmFn);
+      auto personalityFnTy =
+          mlir::LLVM::LLVMFunctionType::get(rewriter.getI32Type(), {},
+                                            /*isVarArg=*/true);
+      // Get or create `__gxx_personality_v0`
+      StringRef fnName = "__gxx_personality_v0";
+      getOrCreateLLVMFuncOp(rewriter, op, fnName, personalityFnTy);
+      llvmFn.setPersonality(fnName);
+    }
+    return mlir::success();
+  }
+};
+
+class CIRAllocaLowering
+    : public mlir::OpConversionPattern<mlir::cir::AllocaOp> {
+  mlir::DataLayout const &dataLayout;
+
+public:
+  CIRAllocaLowering(mlir::TypeConverter const &typeConverter,
+                    mlir::DataLayout const &dataLayout,
+                    mlir::MLIRContext *context)
+      : OpConversionPattern<mlir::cir::AllocaOp>(typeConverter, context),
+        dataLayout(dataLayout) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::AllocaOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Value size =
+        op.isDynamic()
+            ? adaptor.getDynAllocSize()
+            : rewriter.create<mlir::LLVM::ConstantOp>(
+                  op.getLoc(),
+                  typeConverter->convertType(rewriter.getIndexType()),
+                  rewriter.getIntegerAttr(rewriter.getIndexType(), 1));
+    auto elementTy = getTypeConverter()->convertType(op.getAllocaType());
+    auto resultTy = getTypeConverter()->convertType(op.getResult().getType());
+    // Verification between the CIR alloca AS and the one from data layout.
+    {
+      auto resPtrTy = mlir::cast<mlir::LLVM::LLVMPointerType>(resultTy);
+      auto dlAllocaASAttr = mlir::cast_if_present<mlir::IntegerAttr>(
+          dataLayout.getAllocaMemorySpace());
+      // Absence means 0
+      // TODO: The query for the alloca AS should be done through CIRDataLayout
+      // instead to reuse the logic of interpret null attr as 0.
+      auto dlAllocaAS = dlAllocaASAttr ? dlAllocaASAttr.getInt() : 0;
+      if (dlAllocaAS != resPtrTy.getAddressSpace()) {
+        return op.emitError() << "alloca address space doesn't match the one "
+                                 "from the target data layout: "
+                              << dlAllocaAS;
+      }
+    }
+    rewriter.replaceOpWithNewOp<mlir::LLVM::AllocaOp>(
+        op, resultTy, elementTy, size, op.getAlignmentAttr().getInt());
+    return mlir::success();
+  }
+};
+
+static mlir::LLVM::AtomicOrdering
+getLLVMMemOrder(std::optional<mlir::cir::MemOrder> &memorder) {
+  if (!memorder)
+    return mlir::LLVM::AtomicOrdering::not_atomic;
+  switch (*memorder) {
+  case mlir::cir::MemOrder::Relaxed:
+    return mlir::LLVM::AtomicOrdering::monotonic;
+  case mlir::cir::MemOrder::Consume:
+  case mlir::cir::MemOrder::Acquire:
+    return mlir::LLVM::AtomicOrdering::acquire;
+  case mlir::cir::MemOrder::Release:
+    return mlir::LLVM::AtomicOrdering::release;
+  case mlir::cir::MemOrder::AcquireRelease:
+    return mlir::LLVM::AtomicOrdering::acq_rel;
+  case mlir::cir::MemOrder::SequentiallyConsistent:
+    return mlir::LLVM::AtomicOrdering::seq_cst;
+  }
+  llvm_unreachable("unknown memory order");
+}
+
+class CIRLoadLowering : public mlir::OpConversionPattern<mlir::cir::LoadOp> {
+public:
+  using OpConversionPattern<mlir::cir::LoadOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::LoadOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    const auto llvmTy =
+        getTypeConverter()->convertType(op.getResult().getType());
+    auto memorder = op.getMemOrder();
+    auto ordering = getLLVMMemOrder(memorder);
+    auto alignOpt = op.getAlignment();
+    unsigned alignment = 0;
+    if (!alignOpt) {
+      mlir::DataLayout layout(op->getParentOfType<mlir::ModuleOp>());
+      alignment = (unsigned)layout.getTypeABIAlignment(llvmTy);
+    } else {
+      alignment = *alignOpt;
+    }
+
+    // TODO: nontemporal, invariant, syncscope.
+    rewriter.replaceOpWithNewOp<mlir::LLVM::LoadOp>(
+        op, llvmTy, adaptor.getAddr(), /* alignment */ alignment,
+        op.getIsVolatile(), /* nontemporal */ false,
+        /* invariant */ false, ordering);
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRStoreLowering : public mlir::OpConversionPattern<mlir::cir::StoreOp> {
+public:
+  using OpConversionPattern<mlir::cir::StoreOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::StoreOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto memorder = op.getMemOrder();
+    auto ordering = getLLVMMemOrder(memorder);
+    auto alignOpt = op.getAlignment();
+    unsigned alignment = 0;
+    if (!alignOpt) {
+      const auto llvmTy =
+          getTypeConverter()->convertType(op.getValue().getType());
+      mlir::DataLayout layout(op->getParentOfType<mlir::ModuleOp>());
+      alignment = (unsigned)layout.getTypeABIAlignment(llvmTy);
+    } else {
+      alignment = *alignOpt;
+    }
+
+    // TODO: nontemporal, syncscope.
+    rewriter.replaceOpWithNewOp<mlir::LLVM::StoreOp>(
+        op, adaptor.getValue(), adaptor.getAddr(), alignment,
+        op.getIsVolatile(), /* nontemporal */ false, ordering);
+    return mlir::LogicalResult::success();
+  }
+};
+
+bool hasTrailingZeros(mlir::cir::ConstArrayAttr attr) {
+  auto array = mlir::dyn_cast<mlir::ArrayAttr>(attr.getElts());
+  return attr.hasTrailingZeros() ||
+         (array && std::count_if(array.begin(), array.end(), [](auto elt) {
+            auto ar = dyn_cast<mlir::cir::ConstArrayAttr>(elt);
+            return ar && hasTrailingZeros(ar);
+          }));
+}
+
+static mlir::Attribute
+lowerDataMemberAttr(mlir::ModuleOp moduleOp, mlir::cir::DataMemberAttr attr,
+                    const mlir::TypeConverter &typeConverter) {
+  mlir::DataLayout layout{moduleOp};
+
+  uint64_t memberOffset;
+  if (attr.isNullPtr()) {
+    // TODO(cir): the numerical value of a null data member pointer is
+    // ABI-specific and should be queried through ABI.
+    assert(!MissingFeatures::targetCodeGenInfoGetNullPointer());
+    memberOffset = -1ull;
+  } else {
+    auto memberIndex = attr.getMemberIndex().value();
+    memberOffset =
+        attr.getType().getClsTy().getElementOffset(layout, memberIndex);
+  }
+
+  auto underlyingIntTy = mlir::IntegerType::get(
+      moduleOp->getContext(), layout.getTypeSizeInBits(attr.getType()));
+  return mlir::IntegerAttr::get(underlyingIntTy, memberOffset);
+}
+
+class CIRConstantLowering
+    : public mlir::OpConversionPattern<mlir::cir::ConstantOp> {
+public:
+  using OpConversionPattern<mlir::cir::ConstantOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ConstantOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Attribute attr = op.getValue();
+
+    if (mlir::isa<mlir::cir::BoolType>(op.getType())) {
+      int value =
+          (op.getValue() ==
+           mlir::cir::BoolAttr::get(
+               getContext(), ::mlir::cir::BoolType::get(getContext()), true));
+      attr = rewriter.getIntegerAttr(typeConverter->convertType(op.getType()),
+                                     value);
+    } else if (mlir::isa<mlir::cir::IntType>(op.getType())) {
+      attr = rewriter.getIntegerAttr(
+          typeConverter->convertType(op.getType()),
+          mlir::cast<mlir::cir::IntAttr>(op.getValue()).getValue());
+    } else if (mlir::isa<mlir::cir::CIRFPTypeInterface>(op.getType())) {
+      attr = rewriter.getFloatAttr(
+          typeConverter->convertType(op.getType()),
+          mlir::cast<mlir::cir::FPAttr>(op.getValue()).getValue());
+    } else if (auto complexTy =
+                   mlir::dyn_cast<mlir::cir::ComplexType>(op.getType())) {
+      auto complexAttr = mlir::cast<mlir::cir::ComplexAttr>(op.getValue());
+      auto complexElemTy = complexTy.getElementTy();
+      auto complexElemLLVMTy = typeConverter->convertType(complexElemTy);
+
+      mlir::Attribute components[2];
+      if (mlir::isa<mlir::cir::IntType>(complexElemTy)) {
+        components[0] = rewriter.getIntegerAttr(
+            complexElemLLVMTy,
+            mlir::cast<mlir::cir::IntAttr>(complexAttr.getReal()).getValue());
+        components[1] = rewriter.getIntegerAttr(
+            complexElemLLVMTy,
+            mlir::cast<mlir::cir::IntAttr>(complexAttr.getImag()).getValue());
+      } else {
+        components[0] = rewriter.getFloatAttr(
+            complexElemLLVMTy,
+            mlir::cast<mlir::cir::FPAttr>(complexAttr.getReal()).getValue());
+        components[1] = rewriter.getFloatAttr(
+            complexElemLLVMTy,
+            mlir::cast<mlir::cir::FPAttr>(complexAttr.getImag()).getValue());
+      }
+
+      attr = rewriter.getArrayAttr(components);
+    } else if (mlir::isa<mlir::cir::PointerType>(op.getType())) {
+      // Optimize with dedicated LLVM op for null pointers.
+      if (mlir::isa<mlir::cir::ConstPtrAttr>(op.getValue())) {
+        if (mlir::cast<mlir::cir::ConstPtrAttr>(op.getValue()).isNullValue()) {
+          rewriter.replaceOpWithNewOp<mlir::LLVM::ZeroOp>(
+              op, typeConverter->convertType(op.getType()));
+          return mlir::success();
+        }
+      }
+      // Lower GlobalViewAttr to llvm.mlir.addressof
+      if (auto gv = mlir::dyn_cast<mlir::cir::GlobalViewAttr>(op.getValue())) {
+        auto newOp = lowerCirAttrAsValue(op, gv, rewriter, getTypeConverter());
+        rewriter.replaceOp(op, newOp);
+        return mlir::success();
+      }
+      attr = op.getValue();
+    } else if (mlir::isa<mlir::cir::DataMemberType>(op.getType())) {
+      auto dataMember = mlir::cast<mlir::cir::DataMemberAttr>(op.getValue());
+      attr = lowerDataMemberAttr(op->getParentOfType<mlir::ModuleOp>(),
+                                 dataMember, *typeConverter);
+    }
+    // TODO(cir): constant arrays are currently just pushed into the stack using
+    // the store instruction, instead of being stored as global variables and
+    // then memcopyied into the stack (as done in Clang).
+    else if (auto arrTy = mlir::dyn_cast<mlir::cir::ArrayType>(op.getType())) {
+      // Fetch operation constant array initializer.
+
+      auto constArr = mlir::dyn_cast<mlir::cir::ConstArrayAttr>(op.getValue());
+      if (!constArr && !isa<mlir::cir::ZeroAttr>(op.getValue()))
+        return op.emitError() << "array does not have a constant initializer";
+
+      std::optional<mlir::Attribute> denseAttr;
+      if (constArr && hasTrailingZeros(constArr)) {
+        auto newOp =
+            lowerCirAttrAsValue(op, constArr, rewriter, getTypeConverter());
+        rewriter.replaceOp(op, newOp);
+        return mlir::success();
+      } else if (constArr &&
+                 (denseAttr = lowerConstArrayAttr(constArr, typeConverter))) {
+        attr = denseAttr.value();
+      } else {
+        auto initVal =
+            lowerCirAttrAsValue(op, op.getValue(), rewriter, typeConverter);
+        rewriter.replaceAllUsesWith(op, initVal);
+        rewriter.eraseOp(op);
+        return mlir::success();
+      }
+    } else if (const auto structAttr =
+                   mlir::dyn_cast<mlir::cir::ConstStructAttr>(op.getValue())) {
+      // TODO(cir): this diverges from traditional lowering. Normally the
+      // initializer would be a global constant that is memcopied. Here we just
+      // define a local constant with llvm.undef that will be stored into the
+      // stack.
+      auto initVal =
+          lowerCirAttrAsValue(op, structAttr, rewriter, typeConverter);
+      rewriter.replaceAllUsesWith(op, initVal);
+      rewriter.eraseOp(op);
+      return mlir::success();
+    } else if (auto strTy =
+                   mlir::dyn_cast<mlir::cir::StructType>(op.getType())) {
+      if (auto zero = mlir::dyn_cast<mlir::cir::ZeroAttr>(op.getValue())) {
+        auto initVal = lowerCirAttrAsValue(op, zero, rewriter, typeConverter);
+        rewriter.replaceAllUsesWith(op, initVal);
+        rewriter.eraseOp(op);
+        return mlir::success();
+      }
+
+      return op.emitError() << "unsupported lowering for struct constant type "
+                            << op.getType();
+    } else if (const auto vecTy =
+                   mlir::dyn_cast<mlir::cir::VectorType>(op.getType())) {
+      rewriter.replaceOp(op, lowerCirAttrAsValue(op, op.getValue(), rewriter,
+                                                 getTypeConverter()));
+      return mlir::success();
+    } else
+      return op.emitError() << "unsupported constant type " << op.getType();
+
+    rewriter.replaceOpWithNewOp<mlir::LLVM::ConstantOp>(
+        op, getTypeConverter()->convertType(op.getType()), attr);
+
+    return mlir::success();
+  }
+};
+
+class CIRVectorCreateLowering
+    : public mlir::OpConversionPattern<mlir::cir::VecCreateOp> {
+public:
+  using OpConversionPattern<mlir::cir::VecCreateOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VecCreateOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // Start with an 'undef' value for the vector.  Then 'insertelement' for
+    // each of the vector elements.
+    auto vecTy = mlir::dyn_cast<mlir::cir::VectorType>(op.getType());
+    assert(vecTy && "result type of cir.vec.create op is not VectorType");
+    auto llvmTy = typeConverter->convertType(vecTy);
+    auto loc = op.getLoc();
+    mlir::Value result = rewriter.create<mlir::LLVM::UndefOp>(loc, llvmTy);
+    assert(vecTy.getSize() == op.getElements().size() &&
+           "cir.vec.create op count doesn't match vector type elements count");
+    for (uint64_t i = 0; i < vecTy.getSize(); ++i) {
+      mlir::Value indexValue = rewriter.create<mlir::LLVM::ConstantOp>(
+          loc, rewriter.getI64Type(), i);
+      result = rewriter.create<mlir::LLVM::InsertElementOp>(
+          loc, result, adaptor.getElements()[i], indexValue);
+    }
+    rewriter.replaceOp(op, result);
+    return mlir::success();
+  }
+};
+
+class CIRVectorCmpOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::VecCmpOp> {
+public:
+  using OpConversionPattern<mlir::cir::VecCmpOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VecCmpOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    assert(mlir::isa<mlir::cir::VectorType>(op.getType()) &&
+           mlir::isa<mlir::cir::VectorType>(op.getLhs().getType()) &&
+           mlir::isa<mlir::cir::VectorType>(op.getRhs().getType()) &&
+           "Vector compare with non-vector type");
+    // LLVM IR vector comparison returns a vector of i1.  This one-bit vector
+    // must be sign-extended to the correct result type.
+    auto elementType = elementTypeIfVector(op.getLhs().getType());
+    mlir::Value bitResult;
+    if (auto intType = mlir::dyn_cast<mlir::cir::IntType>(elementType)) {
+      bitResult = rewriter.create<mlir::LLVM::ICmpOp>(
+          op.getLoc(),
+          convertCmpKindToICmpPredicate(op.getKind(), intType.isSigned()),
+          adaptor.getLhs(), adaptor.getRhs());
+    } else if (mlir::isa<mlir::cir::CIRFPTypeInterface>(elementType)) {
+      bitResult = rewriter.create<mlir::LLVM::FCmpOp>(
+          op.getLoc(), convertCmpKindToFCmpPredicate(op.getKind()),
+          adaptor.getLhs(), adaptor.getRhs());
+    } else {
+      return op.emitError() << "unsupported type for VecCmpOp: " << elementType;
+    }
+    rewriter.replaceOpWithNewOp<mlir::LLVM::SExtOp>(
+        op, typeConverter->convertType(op.getType()), bitResult);
+    return mlir::success();
+  }
+};
+
+class CIRVectorSplatLowering
+    : public mlir::OpConversionPattern<mlir::cir::VecSplatOp> {
+public:
+  using OpConversionPattern<mlir::cir::VecSplatOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VecSplatOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // Vector splat can be implemented with an `insertelement` and a
+    // `shufflevector`, which is better than an `insertelement` for each
+    // element in the vector. Start with an undef vector. Insert the value into
+    // the first element. Then use a `shufflevector` with a mask of all 0 to
+    // fill out the entire vector with that value.
+    auto vecTy = mlir::dyn_cast<mlir::cir::VectorType>(op.getType());
+    assert(vecTy && "result type of cir.vec.splat op is not VectorType");
+    auto llvmTy = typeConverter->convertType(vecTy);
+    auto loc = op.getLoc();
+    mlir::Value undef = rewriter.create<mlir::LLVM::UndefOp>(loc, llvmTy);
+    mlir::Value indexValue =
+        rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), 0);
+    mlir::Value elementValue = adaptor.getValue();
+    mlir::Value oneElement = rewriter.create<mlir::LLVM::InsertElementOp>(
+        loc, undef, elementValue, indexValue);
+    SmallVector<int32_t> zeroValues(vecTy.getSize(), 0);
+    mlir::Value shuffled = rewriter.create<mlir::LLVM::ShuffleVectorOp>(
+        loc, oneElement, undef, zeroValues);
+    rewriter.replaceOp(op, shuffled);
+    return mlir::success();
+  }
+};
+
+class CIRVectorTernaryLowering
+    : public mlir::OpConversionPattern<mlir::cir::VecTernaryOp> {
+public:
+  using OpConversionPattern<mlir::cir::VecTernaryOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VecTernaryOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    assert(mlir::isa<mlir::cir::VectorType>(op.getType()) &&
+           mlir::isa<mlir::cir::VectorType>(op.getCond().getType()) &&
+           mlir::isa<mlir::cir::VectorType>(op.getVec1().getType()) &&
+           mlir::isa<mlir::cir::VectorType>(op.getVec2().getType()) &&
+           "Vector ternary op with non-vector type");
+    // Convert `cond` into a vector of i1, then use that in a `select` op.
+    mlir::Value bitVec = rewriter.create<mlir::LLVM::ICmpOp>(
+        op.getLoc(), mlir::LLVM::ICmpPredicate::ne, adaptor.getCond(),
+        rewriter.create<mlir::LLVM::ZeroOp>(
+            op.getCond().getLoc(),
+            typeConverter->convertType(op.getCond().getType())));
+    rewriter.replaceOpWithNewOp<mlir::LLVM::SelectOp>(
+        op, bitVec, adaptor.getVec1(), adaptor.getVec2());
+    return mlir::success();
+  }
+};
+
+class CIRVectorShuffleIntsLowering
+    : public mlir::OpConversionPattern<mlir::cir::VecShuffleOp> {
+public:
+  using OpConversionPattern<mlir::cir::VecShuffleOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VecShuffleOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // LLVM::ShuffleVectorOp takes an ArrayRef of int for the list of indices.
+    // Convert the ClangIR ArrayAttr of IntAttr constants into a
+    // SmallVector<int>.
+    SmallVector<int, 8> indices;
+    std::transform(op.getIndices().begin(), op.getIndices().end(),
+                   std::back_inserter(indices), [](mlir::Attribute intAttr) {
+                     return mlir::cast<mlir::cir::IntAttr>(intAttr)
+                         .getValue()
+                         .getSExtValue();
+                   });
+    rewriter.replaceOpWithNewOp<mlir::LLVM::ShuffleVectorOp>(
+        op, adaptor.getVec1(), adaptor.getVec2(), indices);
+    return mlir::success();
+  }
+};
+
+class CIRVectorShuffleVecLowering
+    : public mlir::OpConversionPattern<mlir::cir::VecShuffleDynamicOp> {
+public:
+  using OpConversionPattern<
+      mlir::cir::VecShuffleDynamicOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VecShuffleDynamicOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // LLVM IR does not have an operation that corresponds to this form of
+    // the built-in.
+    //     __builtin_shufflevector(V, I)
+    // is implemented as this pseudocode, where the for loop is unrolled
+    // and N is the number of elements:
+    //     masked = I & (N-1)
+    //     for (i in 0 <= i < N)
+    //       result[i] = V[masked[i]]
+    auto loc = op.getLoc();
+    mlir::Value input = adaptor.getVec();
+    mlir::Type llvmIndexVecType =
+        getTypeConverter()->convertType(op.getIndices().getType());
+    mlir::Type llvmIndexType = getTypeConverter()->convertType(
+        elementTypeIfVector(op.getIndices().getType()));
+    uint64_t numElements =
+        mlir::cast<mlir::cir::VectorType>(op.getVec().getType()).getSize();
+    mlir::Value maskValue = rewriter.create<mlir::LLVM::ConstantOp>(
+        loc, llvmIndexType,
+        mlir::IntegerAttr::get(llvmIndexType, numElements - 1));
+    mlir::Value maskVector =
+        rewriter.create<mlir::LLVM::UndefOp>(loc, llvmIndexVecType);
+    for (uint64_t i = 0; i < numElements; ++i) {
+      mlir::Value iValue = rewriter.create<mlir::LLVM::ConstantOp>(
+          loc, rewriter.getI64Type(), i);
+      maskVector = rewriter.create<mlir::LLVM::InsertElementOp>(
+          loc, maskVector, maskValue, iValue);
+    }
+    mlir::Value maskedIndices = rewriter.create<mlir::LLVM::AndOp>(
+        loc, llvmIndexVecType, adaptor.getIndices(), maskVector);
+    mlir::Value result = rewriter.create<mlir::LLVM::UndefOp>(
+        loc, getTypeConverter()->convertType(op.getVec().getType()));
+    for (uint64_t i = 0; i < numElements; ++i) {
+      mlir::Value iValue = rewriter.create<mlir::LLVM::ConstantOp>(
+          loc, rewriter.getI64Type(), i);
+      mlir::Value indexValue = rewriter.create<mlir::LLVM::ExtractElementOp>(
+          loc, maskedIndices, iValue);
+      mlir::Value valueAtIndex =
+          rewriter.create<mlir::LLVM::ExtractElementOp>(loc, input, indexValue);
+      result = rewriter.create<mlir::LLVM::InsertElementOp>(
+          loc, result, valueAtIndex, iValue);
+    }
+    rewriter.replaceOp(op, result);
+    return mlir::success();
+  }
+};
+
+class CIRVAStartLowering
+    : public mlir::OpConversionPattern<mlir::cir::VAStartOp> {
+public:
+  using OpConversionPattern<mlir::cir::VAStartOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VAStartOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto opaquePtr = mlir::LLVM::LLVMPointerType::get(getContext());
+    auto vaList = rewriter.create<mlir::LLVM::BitcastOp>(
+        op.getLoc(), opaquePtr, adaptor.getOperands().front());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::VaStartOp>(op, vaList);
+    return mlir::success();
+  }
+};
+
+class CIRVAEndLowering : public mlir::OpConversionPattern<mlir::cir::VAEndOp> {
+public:
+  using OpConversionPattern<mlir::cir::VAEndOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VAEndOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto opaquePtr = mlir::LLVM::LLVMPointerType::get(getContext());
+    auto vaList = rewriter.create<mlir::LLVM::BitcastOp>(
+        op.getLoc(), opaquePtr, adaptor.getOperands().front());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::VaEndOp>(op, vaList);
+    return mlir::success();
+  }
+};
+
+class CIRVACopyLowering
+    : public mlir::OpConversionPattern<mlir::cir::VACopyOp> {
+public:
+  using OpConversionPattern<mlir::cir::VACopyOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VACopyOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto opaquePtr = mlir::LLVM::LLVMPointerType::get(getContext());
+    auto dstList = rewriter.create<mlir::LLVM::BitcastOp>(
+        op.getLoc(), opaquePtr, adaptor.getOperands().front());
+    auto srcList = rewriter.create<mlir::LLVM::BitcastOp>(
+        op.getLoc(), opaquePtr, adaptor.getOperands().back());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::VaCopyOp>(op, dstList, srcList);
+    return mlir::success();
+  }
+};
+
+class CIRVAArgLowering : public mlir::OpConversionPattern<mlir::cir::VAArgOp> {
+public:
+  using OpConversionPattern<mlir::cir::VAArgOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VAArgOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    return op.emitError("cir.vaarg lowering is NYI");
+  }
+};
+
+class CIRFuncLowering : public mlir::OpConversionPattern<mlir::cir::FuncOp> {
+public:
+  using OpConversionPattern<mlir::cir::FuncOp>::OpConversionPattern;
+
+  /// Returns the name used for the linkage attribute. This *must* correspond
+  /// to the name of the attribute in ODS.
+  static StringRef getLinkageAttrNameString() { return "linkage"; }
+
+  /// Convert the `cir.func` attributes to `llvm.func` attributes.
+  /// Only retain those attributes that are not constructed by
+  /// `LLVMFuncOp::build`. If `filterArgAttrs` is set, also filter out
+  /// argument attributes.
+  void
+  lowerFuncAttributes(mlir::cir::FuncOp func, bool filterArgAndResAttrs,
+                      SmallVectorImpl<mlir::NamedAttribute> &result) const {
+    for (auto attr : func->getAttrs()) {
+      if (attr.getName() == mlir::SymbolTable::getSymbolAttrName() ||
+          attr.getName() == func.getFunctionTypeAttrName() ||
+          attr.getName() == getLinkageAttrNameString() ||
+          attr.getName() == func.getCallingConvAttrName() ||
+          (filterArgAndResAttrs &&
+           (attr.getName() == func.getArgAttrsAttrName() ||
+            attr.getName() == func.getResAttrsAttrName())))
+        continue;
+
+      // `CIRDialectLLVMIRTranslationInterface` requires "cir." prefix for
+      // dialect specific attributes, rename them.
+      if (attr.getName() == func.getExtraAttrsAttrName()) {
+        std::string cirName = "cir." + func.getExtraAttrsAttrName().str();
+        attr.setName(mlir::StringAttr::get(getContext(), cirName));
+
+        lowerFuncOpenCLKernelMetadata(attr);
+      }
+      result.push_back(attr);
+    }
+  }
+
+  /// When do module translation, we can only translate LLVM-compatible types.
+  /// Here we lower possible OpenCLKernelMetadataAttr to use the converted type.
+  void
+  lowerFuncOpenCLKernelMetadata(mlir::NamedAttribute &extraAttrsEntry) const {
+    const auto attrKey = mlir::cir::OpenCLKernelMetadataAttr::getMnemonic();
+    auto oldExtraAttrs =
+        cast<mlir::cir::ExtraFuncAttributesAttr>(extraAttrsEntry.getValue());
+    if (!oldExtraAttrs.getElements().contains(attrKey))
+      return;
+
+    mlir::NamedAttrList newExtraAttrs;
+    for (auto entry : oldExtraAttrs.getElements()) {
+      if (entry.getName() == attrKey) {
+        auto clKernelMetadata =
+            cast<mlir::cir::OpenCLKernelMetadataAttr>(entry.getValue());
+        if (auto vecTypeHint = clKernelMetadata.getVecTypeHint()) {
+          auto newType = typeConverter->convertType(vecTypeHint.getValue());
+          auto newTypeHint = mlir::TypeAttr::get(newType);
+          auto newCLKMAttr = mlir::cir::OpenCLKernelMetadataAttr::get(
+              getContext(), clKernelMetadata.getWorkGroupSizeHint(),
+              clKernelMetadata.getReqdWorkGroupSize(), newTypeHint,
+              clKernelMetadata.getVecTypeHintSignedness(),
+              clKernelMetadata.getIntelReqdSubGroupSize());
+          entry.setValue(newCLKMAttr);
+        }
+      }
+      newExtraAttrs.push_back(entry);
+    }
+    extraAttrsEntry.setValue(mlir::cir::ExtraFuncAttributesAttr::get(
+        getContext(), newExtraAttrs.getDictionary(getContext())));
+  }
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::FuncOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+
+    auto fnType = op.getFunctionType();
+    auto isDsoLocal = op.getDsolocal();
+    mlir::TypeConverter::SignatureConversion signatureConversion(
+        fnType.getNumInputs());
+
+    for (const auto &argType : enumerate(fnType.getInputs())) {
+      auto convertedType = typeConverter->convertType(argType.value());
+      if (!convertedType)
+        return mlir::failure();
+      signatureConversion.addInputs(argType.index(), convertedType);
+    }
+
+    mlir::Type resultType =
+        getTypeConverter()->convertType(fnType.getReturnType());
+
+    // Create the LLVM function operation.
+    auto llvmFnTy = mlir::LLVM::LLVMFunctionType::get(
+        resultType ? resultType : mlir::LLVM::LLVMVoidType::get(getContext()),
+        signatureConversion.getConvertedTypes(),
+        /*isVarArg=*/fnType.isVarArg());
+    // LLVMFuncOp expects a single FileLine Location instead of a fused
+    // location.
+    auto Loc = op.getLoc();
+    if (mlir::isa<mlir::FusedLoc>(Loc)) {
+      auto FusedLoc = mlir::cast<mlir::FusedLoc>(Loc);
+      Loc = FusedLoc.getLocations()[0];
+    }
+    assert((mlir::isa<mlir::FileLineColLoc>(Loc) ||
+            mlir::isa<mlir::UnknownLoc>(Loc)) &&
+           "expected single location or unknown location here");
+
+    auto linkage = convertLinkage(op.getLinkage());
+    auto cconv = convertCallingConv(op.getCallingConv());
+    SmallVector<mlir::NamedAttribute, 4> attributes;
+    lowerFuncAttributes(op, /*filterArgAndResAttrs=*/false, attributes);
+
+    auto fn = rewriter.create<mlir::LLVM::LLVMFuncOp>(
+        Loc, op.getName(), llvmFnTy, linkage, isDsoLocal, cconv,
+        mlir::SymbolRefAttr(), attributes);
+
+    fn.setVisibility_Attr(mlir::LLVM::VisibilityAttr::get(
+        getContext(), lowerCIRVisibilityToLLVMVisibility(
+                          op.getGlobalVisibilityAttr().getValue())));
+
+    rewriter.inlineRegionBefore(op.getBody(), fn.getBody(), fn.end());
+    if (failed(rewriter.convertRegionTypes(&fn.getBody(), *typeConverter,
+                                           &signatureConversion)))
+      return mlir::failure();
+
+    rewriter.eraseOp(op);
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRGetGlobalOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::GetGlobalOp> {
+public:
+  using OpConversionPattern<mlir::cir::GetGlobalOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::GetGlobalOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // FIXME(cir): Premature DCE to avoid lowering stuff we're not using.
+    // CIRGen should mitigate this and not emit the get_global.
+    if (op->getUses().empty()) {
+      rewriter.eraseOp(op);
+      return mlir::success();
+    }
+
+    auto type = getTypeConverter()->convertType(op.getType());
+    auto symbol = op.getName();
+    mlir::Operation *newop =
+        rewriter.create<mlir::LLVM::AddressOfOp>(op.getLoc(), type, symbol);
+
+    if (op.getTls()) {
+      // Handle access to TLS via intrinsic.
+      newop = rewriter.create<mlir::LLVM::ThreadlocalAddressOp>(
+          op.getLoc(), type, newop->getResult(0));
+    }
+
+    rewriter.replaceOp(op, newop);
+    return mlir::success();
+  }
+};
+
+class CIRComplexCreateOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ComplexCreateOp> {
+public:
+  using OpConversionPattern<mlir::cir::ComplexCreateOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ComplexCreateOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto complexLLVMTy =
+        getTypeConverter()->convertType(op.getResult().getType());
+    auto initialComplex =
+        rewriter.create<mlir::LLVM::UndefOp>(op->getLoc(), complexLLVMTy);
+
+    int64_t position[1]{0};
+    auto realComplex = rewriter.create<mlir::LLVM::InsertValueOp>(
+        op->getLoc(), initialComplex, adaptor.getReal(), position);
+
+    position[0] = 1;
+    auto complex = rewriter.create<mlir::LLVM::InsertValueOp>(
+        op->getLoc(), realComplex, adaptor.getImag(), position);
+
+    rewriter.replaceOp(op, complex);
+    return mlir::success();
+  }
+};
+
+class CIRComplexRealOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ComplexRealOp> {
+public:
+  using OpConversionPattern<mlir::cir::ComplexRealOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ComplexRealOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto resultLLVMTy =
+        getTypeConverter()->convertType(op.getResult().getType());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::ExtractValueOp>(
+        op, resultLLVMTy, adaptor.getOperand(),
+        llvm::ArrayRef<std::int64_t>{0});
+    return mlir::success();
+  }
+};
+
+class CIRComplexImagOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ComplexImagOp> {
+public:
+  using OpConversionPattern<mlir::cir::ComplexImagOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ComplexImagOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto resultLLVMTy =
+        getTypeConverter()->convertType(op.getResult().getType());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::ExtractValueOp>(
+        op, resultLLVMTy, adaptor.getOperand(),
+        llvm::ArrayRef<std::int64_t>{1});
+    return mlir::success();
+  }
+};
+
+class CIRComplexRealPtrOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ComplexRealPtrOp> {
+public:
+  using OpConversionPattern<mlir::cir::ComplexRealPtrOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ComplexRealPtrOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto operandTy =
+        mlir::cast<mlir::cir::PointerType>(op.getOperand().getType());
+    auto resultLLVMTy =
+        getTypeConverter()->convertType(op.getResult().getType());
+    auto elementLLVMTy =
+        getTypeConverter()->convertType(operandTy.getPointee());
+
+    mlir::LLVM::GEPArg gepIndices[2]{{0}, {0}};
+    rewriter.replaceOpWithNewOp<mlir::LLVM::GEPOp>(
+        op, resultLLVMTy, elementLLVMTy, adaptor.getOperand(), gepIndices,
+        /*inbounds=*/true);
+
+    return mlir::success();
+  }
+};
+
+class CIRComplexImagPtrOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ComplexImagPtrOp> {
+public:
+  using OpConversionPattern<mlir::cir::ComplexImagPtrOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ComplexImagPtrOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto operandTy =
+        mlir::cast<mlir::cir::PointerType>(op.getOperand().getType());
+    auto resultLLVMTy =
+        getTypeConverter()->convertType(op.getResult().getType());
+    auto elementLLVMTy =
+        getTypeConverter()->convertType(operandTy.getPointee());
+
+    mlir::LLVM::GEPArg gepIndices[2]{{0}, {1}};
+    rewriter.replaceOpWithNewOp<mlir::LLVM::GEPOp>(
+        op, resultLLVMTy, elementLLVMTy, adaptor.getOperand(), gepIndices,
+        /*inbounds=*/true);
+
+    return mlir::success();
+  }
+};
+
+class CIRSwitchFlatOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::SwitchFlatOp> {
+public:
+  using OpConversionPattern<mlir::cir::SwitchFlatOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::SwitchFlatOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+
+    llvm::SmallVector<mlir::APInt, 8> caseValues;
+    if (op.getCaseValues()) {
+      for (auto val : op.getCaseValues()) {
+        auto intAttr = dyn_cast<mlir::cir::IntAttr>(val);
+        caseValues.push_back(intAttr.getValue());
+      }
+    }
+
+    llvm::SmallVector<mlir::Block *, 8> caseDestinations;
+    llvm::SmallVector<mlir::ValueRange, 8> caseOperands;
+
+    for (auto x : op.getCaseDestinations()) {
+      caseDestinations.push_back(x);
+    }
+
+    for (auto x : op.getCaseOperands()) {
+      caseOperands.push_back(x);
+    }
+
+    // Set switch op to branch to the newly created blocks.
+    rewriter.setInsertionPoint(op);
+    rewriter.replaceOpWithNewOp<mlir::LLVM::SwitchOp>(
+        op, adaptor.getCondition(), op.getDefaultDestination(),
+        op.getDefaultOperands(), caseValues, caseDestinations, caseOperands);
+    return mlir::success();
+  }
+};
+
+class CIRGlobalOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::GlobalOp> {
+public:
+  using OpConversionPattern<mlir::cir::GlobalOp>::OpConversionPattern;
+
+  // Get addrspace by converting a pointer type.
+  // TODO: The approach here is a little hacky. We should access the target info
+  // directly to convert the address space of global op, similar to what we do
+  // for type converter.
+  unsigned getGlobalOpTargetAddrSpace(mlir::cir::GlobalOp op) const {
+    auto tempPtrTy = mlir::cir::PointerType::get(getContext(), op.getSymType(),
+                                                 op.getAddrSpaceAttr());
+    return cast<mlir::LLVM::LLVMPointerType>(
+               typeConverter->convertType(tempPtrTy))
+        .getAddressSpace();
+  }
+
+  /// Replace CIR global with a region initialized LLVM global and update
+  /// insertion point to the end of the initializer block.
+  inline void setupRegionInitializedLLVMGlobalOp(
+      mlir::cir::GlobalOp op, mlir::ConversionPatternRewriter &rewriter) const {
+    const auto llvmType = getTypeConverter()->convertType(op.getSymType());
+    SmallVector<mlir::NamedAttribute> attributes;
+    auto newGlobalOp = rewriter.replaceOpWithNewOp<mlir::LLVM::GlobalOp>(
+        op, llvmType, op.getConstant(), convertLinkage(op.getLinkage()),
+        op.getSymName(), nullptr,
+        /*alignment*/ op.getAlignment().value_or(0),
+        /*addrSpace*/ getGlobalOpTargetAddrSpace(op),
+        /*dsoLocal*/ false, /*threadLocal*/ (bool)op.getTlsModelAttr(),
+        /*comdat*/ mlir::SymbolRefAttr(), attributes);
+    newGlobalOp.getRegion().push_back(new mlir::Block());
+    rewriter.setInsertionPointToEnd(newGlobalOp.getInitializerBlock());
+  }
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::GlobalOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+
+    // Fetch required values to create LLVM op.
+    const auto llvmType = getTypeConverter()->convertType(op.getSymType());
+    const auto isConst = op.getConstant();
+    const auto isDsoLocal = op.getDsolocal();
+    const auto linkage = convertLinkage(op.getLinkage());
+    const auto symbol = op.getSymName();
+    const auto loc = op.getLoc();
+    std::optional<mlir::StringRef> section = op.getSection();
+    std::optional<mlir::Attribute> init = op.getInitialValue();
+    mlir::LLVM::VisibilityAttr visibility = mlir::LLVM::VisibilityAttr::get(
+        getContext(), lowerCIRVisibilityToLLVMVisibility(
+                          op.getGlobalVisibilityAttr().getValue()));
+
+    SmallVector<mlir::NamedAttribute> attributes;
+    if (section.has_value())
+      attributes.push_back(rewriter.getNamedAttr(
+          "section", rewriter.getStringAttr(section.value())));
+
+    attributes.push_back(rewriter.getNamedAttr("visibility_", visibility));
+
+    // Check for missing funcionalities.
+    if (!init.has_value()) {
+      rewriter.replaceOpWithNewOp<mlir::LLVM::GlobalOp>(
+          op, llvmType, isConst, linkage, symbol, mlir::Attribute(),
+          /*alignment*/ 0, /*addrSpace*/ getGlobalOpTargetAddrSpace(op),
+          /*dsoLocal*/ isDsoLocal, /*threadLocal*/ (bool)op.getTlsModelAttr(),
+          /*comdat*/ mlir::SymbolRefAttr(), attributes);
+      return mlir::success();
+    }
+
+    // Initializer is a constant array: convert it to a compatible llvm init.
+    if (auto constArr =
+            mlir::dyn_cast<mlir::cir::ConstArrayAttr>(init.value())) {
+      if (auto attr = mlir::dyn_cast<mlir::StringAttr>(constArr.getElts())) {
+        init = rewriter.getStringAttr(attr.getValue());
+      } else if (auto attr =
+                     mlir::dyn_cast<mlir::ArrayAttr>(constArr.getElts())) {
+        // Failed to use a compact attribute as an initializer:
+        // initialize elements individually.
+        if (!(init = lowerConstArrayAttr(constArr, getTypeConverter()))) {
+          setupRegionInitializedLLVMGlobalOp(op, rewriter);
+          rewriter.create<mlir::LLVM::ReturnOp>(
+              op->getLoc(),
+              lowerCirAttrAsValue(op, constArr, rewriter, typeConverter));
+          return mlir::success();
+        }
+      } else {
+        op.emitError()
+            << "unsupported lowering for #cir.const_array with value "
+            << constArr.getElts();
+        return mlir::failure();
+      }
+    } else if (auto fltAttr = mlir::dyn_cast<mlir::cir::FPAttr>(init.value())) {
+      // Initializer is a constant floating-point number: convert to MLIR
+      // builtin constant.
+      init = rewriter.getFloatAttr(llvmType, fltAttr.getValue());
+    }
+    // Initializer is a constant integer: convert to MLIR builtin constant.
+    else if (auto intAttr = mlir::dyn_cast<mlir::cir::IntAttr>(init.value())) {
+      init = rewriter.getIntegerAttr(llvmType, intAttr.getValue());
+    } else if (auto boolAttr =
+                   mlir::dyn_cast<mlir::cir::BoolAttr>(init.value())) {
+      init = rewriter.getBoolAttr(boolAttr.getValue());
+    } else if (isa<mlir::cir::ZeroAttr, mlir::cir::ConstPtrAttr>(
+                   init.value())) {
+      // TODO(cir): once LLVM's dialect has a proper zeroinitializer attribute
+      // this should be updated. For now, we use a custom op to initialize
+      // globals to zero.
+      setupRegionInitializedLLVMGlobalOp(op, rewriter);
+      auto value =
+          lowerCirAttrAsValue(op, init.value(), rewriter, typeConverter);
+      rewriter.create<mlir::LLVM::ReturnOp>(loc, value);
+      return mlir::success();
+    } else if (auto dataMemberAttr =
+                   mlir::dyn_cast<mlir::cir::DataMemberAttr>(init.value())) {
+      init = lowerDataMemberAttr(op->getParentOfType<mlir::ModuleOp>(),
+                                 dataMemberAttr, *typeConverter);
+    } else if (const auto structAttr =
+                   mlir::dyn_cast<mlir::cir::ConstStructAttr>(init.value())) {
+      setupRegionInitializedLLVMGlobalOp(op, rewriter);
+      rewriter.create<mlir::LLVM::ReturnOp>(
+          op->getLoc(),
+          lowerCirAttrAsValue(op, structAttr, rewriter, typeConverter));
+      return mlir::success();
+    } else if (auto attr =
+                   mlir::dyn_cast<mlir::cir::GlobalViewAttr>(init.value())) {
+      setupRegionInitializedLLVMGlobalOp(op, rewriter);
+      rewriter.create<mlir::LLVM::ReturnOp>(
+          loc, lowerCirAttrAsValue(op, attr, rewriter, typeConverter));
+      return mlir::success();
+    } else if (const auto vtableAttr =
+                   mlir::dyn_cast<mlir::cir::VTableAttr>(init.value())) {
+      setupRegionInitializedLLVMGlobalOp(op, rewriter);
+      rewriter.create<mlir::LLVM::ReturnOp>(
+          op->getLoc(),
+          lowerCirAttrAsValue(op, vtableAttr, rewriter, typeConverter));
+      return mlir::success();
+    } else if (const auto typeinfoAttr =
+                   mlir::dyn_cast<mlir::cir::TypeInfoAttr>(init.value())) {
+      setupRegionInitializedLLVMGlobalOp(op, rewriter);
+      rewriter.create<mlir::LLVM::ReturnOp>(
+          op->getLoc(),
+          lowerCirAttrAsValue(op, typeinfoAttr, rewriter, typeConverter));
+      return mlir::success();
+    } else {
+      op.emitError() << "usupported initializer '" << init.value() << "'";
+      return mlir::failure();
+    }
+
+    // Rewrite op.
+    auto llvmGlobalOp = rewriter.replaceOpWithNewOp<mlir::LLVM::GlobalOp>(
+        op, llvmType, isConst, linkage, symbol, init.value(),
+        /*alignment*/ op.getAlignment().value_or(0),
+        /*addrSpace*/ getGlobalOpTargetAddrSpace(op),
+        /*dsoLocal*/ false, /*threadLocal*/ (bool)op.getTlsModelAttr(),
+        /*comdat*/ mlir::SymbolRefAttr(), attributes);
+
+    auto mod = op->getParentOfType<mlir::ModuleOp>();
+    if (op.getComdat())
+      addComdat(llvmGlobalOp, comdatOp, rewriter, mod);
+
+    return mlir::success();
+  }
+
+private:
+  mutable mlir::LLVM::ComdatOp comdatOp = nullptr;
+  static void addComdat(mlir::LLVM::GlobalOp &op,
+                        mlir::LLVM::ComdatOp &comdatOp,
+                        mlir::OpBuilder &builder, mlir::ModuleOp &module) {
+    StringRef comdatName("__llvm_comdat_globals");
+    if (!comdatOp) {
+      builder.setInsertionPointToStart(module.getBody());
+      comdatOp =
+          builder.create<mlir::LLVM::ComdatOp>(module.getLoc(), comdatName);
+    }
+    builder.setInsertionPointToStart(&comdatOp.getBody().back());
+    auto selectorOp = builder.create<mlir::LLVM::ComdatSelectorOp>(
+        comdatOp.getLoc(), op.getSymName(), mlir::LLVM::comdat::Comdat::Any);
+    op.setComdatAttr(mlir::SymbolRefAttr::get(
+        builder.getContext(), comdatName,
+        mlir::FlatSymbolRefAttr::get(selectorOp.getSymNameAttr())));
+  }
+};
+
+class CIRUnaryOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::UnaryOp> {
+public:
+  using OpConversionPattern<mlir::cir::UnaryOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::UnaryOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    assert(op.getType() == op.getInput().getType() &&
+           "Unary operation's operand type and result type are different");
+    mlir::Type type = op.getType();
+    mlir::Type elementType = elementTypeIfVector(type);
+    bool IsVector = mlir::isa<mlir::cir::VectorType>(type);
+    auto llvmType = getTypeConverter()->convertType(type);
+    auto loc = op.getLoc();
+
+    // Integer unary operations: + - ~ ++ --
+    if (mlir::isa<mlir::cir::IntType>(elementType)) {
+      switch (op.getKind()) {
+      case mlir::cir::UnaryOpKind::Inc: {
+        assert(!IsVector && "++ not allowed on vector types");
+        auto One = rewriter.create<mlir::LLVM::ConstantOp>(
+            loc, llvmType, mlir::IntegerAttr::get(llvmType, 1));
+        rewriter.replaceOpWithNewOp<mlir::LLVM::AddOp>(op, llvmType,
+                                                       adaptor.getInput(), One);
+        return mlir::success();
+      }
+      case mlir::cir::UnaryOpKind::Dec: {
+        assert(!IsVector && "-- not allowed on vector types");
+        auto One = rewriter.create<mlir::LLVM::ConstantOp>(
+            loc, llvmType, mlir::IntegerAttr::get(llvmType, 1));
+        rewriter.replaceOpWithNewOp<mlir::LLVM::SubOp>(op, llvmType,
+                                                       adaptor.getInput(), One);
+        return mlir::success();
+      }
+      case mlir::cir::UnaryOpKind::Plus: {
+        rewriter.replaceOp(op, adaptor.getInput());
+        return mlir::success();
+      }
+      case mlir::cir::UnaryOpKind::Minus: {
+        mlir::Value Zero;
+        if (IsVector)
+          Zero = rewriter.create<mlir::LLVM::ZeroOp>(loc, llvmType);
+        else
+          Zero = rewriter.create<mlir::LLVM::ConstantOp>(
+              loc, llvmType, mlir::IntegerAttr::get(llvmType, 0));
+        rewriter.replaceOpWithNewOp<mlir::LLVM::SubOp>(op, llvmType, Zero,
+                                                       adaptor.getInput());
+        return mlir::success();
+      }
+      case mlir::cir::UnaryOpKind::Not: {
+        // bit-wise compliment operator, implemented as an XOR with -1.
+        mlir::Value MinusOne;
+        if (IsVector) {
+          // Creating a vector object with all -1 values is easier said than
+          // done. It requires a series of insertelement ops.
+          mlir::Type llvmElementType =
+              getTypeConverter()->convertType(elementType);
+          auto MinusOneInt = rewriter.create<mlir::LLVM::ConstantOp>(
+              loc, llvmElementType,
+              mlir::IntegerAttr::get(llvmElementType, -1));
+          MinusOne = rewriter.create<mlir::LLVM::UndefOp>(loc, llvmType);
+          auto NumElements =
+              mlir::dyn_cast<mlir::cir::VectorType>(type).getSize();
+          for (uint64_t i = 0; i < NumElements; ++i) {
+            mlir::Value indexValue = rewriter.create<mlir::LLVM::ConstantOp>(
+                loc, rewriter.getI64Type(), i);
+            MinusOne = rewriter.create<mlir::LLVM::InsertElementOp>(
+                loc, MinusOne, MinusOneInt, indexValue);
+          }
+        } else {
+          MinusOne = rewriter.create<mlir::LLVM::ConstantOp>(
+              loc, llvmType, mlir::IntegerAttr::get(llvmType, -1));
+        }
+        rewriter.replaceOpWithNewOp<mlir::LLVM::XOrOp>(op, llvmType, MinusOne,
+                                                       adaptor.getInput());
+        return mlir::success();
+      }
+      }
+    }
+
+    // Floating point unary operations: + - ++ --
+    if (mlir::isa<mlir::cir::CIRFPTypeInterface>(elementType)) {
+      switch (op.getKind()) {
+      case mlir::cir::UnaryOpKind::Inc: {
+        assert(!IsVector && "++ not allowed on vector types");
+        auto oneAttr = rewriter.getFloatAttr(llvmType, 1.0);
+        auto oneConst =
+            rewriter.create<mlir::LLVM::ConstantOp>(loc, llvmType, oneAttr);
+        rewriter.replaceOpWithNewOp<mlir::LLVM::FAddOp>(op, llvmType, oneConst,
+                                                        adaptor.getInput());
+        return mlir::success();
+      }
+      case mlir::cir::UnaryOpKind::Dec: {
+        assert(!IsVector && "-- not allowed on vector types");
+        auto negOneAttr = rewriter.getFloatAttr(llvmType, -1.0);
+        auto negOneConst =
+            rewriter.create<mlir::LLVM::ConstantOp>(loc, llvmType, negOneAttr);
+        rewriter.replaceOpWithNewOp<mlir::LLVM::FAddOp>(
+            op, llvmType, negOneConst, adaptor.getInput());
+        return mlir::success();
+      }
+      case mlir::cir::UnaryOpKind::Plus:
+        rewriter.replaceOp(op, adaptor.getInput());
+        return mlir::success();
+      case mlir::cir::UnaryOpKind::Minus: {
+        rewriter.replaceOpWithNewOp<mlir::LLVM::FNegOp>(op, llvmType,
+                                                        adaptor.getInput());
+        return mlir::success();
+      }
+      default:
+        return op.emitError()
+               << "Unknown floating-point unary operation during CIR lowering";
+      }
+    }
+
+    // Boolean unary operations: ! only. (For all others, the operand has
+    // already been promoted to int.)
+    if (mlir::isa<mlir::cir::BoolType>(elementType)) {
+      switch (op.getKind()) {
+      case mlir::cir::UnaryOpKind::Not:
+        assert(!IsVector && "NYI: op! on vector mask");
+        rewriter.replaceOpWithNewOp<mlir::LLVM::XOrOp>(
+            op, llvmType, adaptor.getInput(),
+            rewriter.create<mlir::LLVM::ConstantOp>(
+                loc, llvmType, mlir::IntegerAttr::get(llvmType, 1)));
+        return mlir::success();
+      default:
+        return op.emitError()
+               << "Unknown boolean unary operation during CIR lowering";
+      }
+    }
+
+    // Pointer unary operations: + only.  (++ and -- of pointers are implemented
+    // with cir.ptr_stride, not cir.unary.)
+    if (mlir::isa<mlir::cir::PointerType>(elementType)) {
+      switch (op.getKind()) {
+      case mlir::cir::UnaryOpKind::Plus:
+        rewriter.replaceOp(op, adaptor.getInput());
+        return mlir::success();
+      default:
+        op.emitError() << "Unknown pointer unary operation during CIR lowering";
+        return mlir::failure();
+      }
+    }
+
+    return op.emitError() << "Unary operation has unsupported type: "
+                          << elementType;
+  }
+};
+
+class CIRBinOpLowering : public mlir::OpConversionPattern<mlir::cir::BinOp> {
+
+  mlir::LLVM::IntegerOverflowFlags
+  getIntOverflowFlag(mlir::cir::BinOp op) const {
+    if (op.getNoUnsignedWrap())
+      return mlir::LLVM::IntegerOverflowFlags::nuw;
+
+    if (op.getNoSignedWrap())
+      return mlir::LLVM::IntegerOverflowFlags::nsw;
+
+    return mlir::LLVM::IntegerOverflowFlags::none;
+  }
+
+public:
+  using OpConversionPattern<mlir::cir::BinOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BinOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    assert((op.getLhs().getType() == op.getRhs().getType()) &&
+           "inconsistent operands' types not supported yet");
+    mlir::Type type = op.getRhs().getType();
+    assert((mlir::isa<mlir::cir::IntType, mlir::cir::CIRFPTypeInterface,
+                      mlir::cir::VectorType>(type)) &&
+           "operand type not supported yet");
+
+    auto llvmTy = getTypeConverter()->convertType(op.getType());
+    auto rhs = adaptor.getRhs();
+    auto lhs = adaptor.getLhs();
+
+    type = elementTypeIfVector(type);
+
+    switch (op.getKind()) {
+    case mlir::cir::BinOpKind::Add:
+      if (mlir::isa<mlir::cir::IntType>(type))
+        rewriter.replaceOpWithNewOp<mlir::LLVM::AddOp>(op, llvmTy, lhs, rhs,
+                                                       getIntOverflowFlag(op));
+      else
+        rewriter.replaceOpWithNewOp<mlir::LLVM::FAddOp>(op, llvmTy, lhs, rhs);
+      break;
+    case mlir::cir::BinOpKind::Sub:
+      if (mlir::isa<mlir::cir::IntType>(type))
+        rewriter.replaceOpWithNewOp<mlir::LLVM::SubOp>(op, llvmTy, lhs, rhs,
+                                                       getIntOverflowFlag(op));
+      else
+        rewriter.replaceOpWithNewOp<mlir::LLVM::FSubOp>(op, llvmTy, lhs, rhs);
+      break;
+    case mlir::cir::BinOpKind::Mul:
+      if (mlir::isa<mlir::cir::IntType>(type))
+        rewriter.replaceOpWithNewOp<mlir::LLVM::MulOp>(op, llvmTy, lhs, rhs,
+                                                       getIntOverflowFlag(op));
+      else
+        rewriter.replaceOpWithNewOp<mlir::LLVM::FMulOp>(op, llvmTy, lhs, rhs);
+      break;
+    case mlir::cir::BinOpKind::Div:
+      if (auto ty = mlir::dyn_cast<mlir::cir::IntType>(type)) {
+        if (ty.isUnsigned())
+          rewriter.replaceOpWithNewOp<mlir::LLVM::UDivOp>(op, llvmTy, lhs, rhs);
+        else
+          rewriter.replaceOpWithNewOp<mlir::LLVM::SDivOp>(op, llvmTy, lhs, rhs);
+      } else
+        rewriter.replaceOpWithNewOp<mlir::LLVM::FDivOp>(op, llvmTy, lhs, rhs);
+      break;
+    case mlir::cir::BinOpKind::Rem:
+      if (auto ty = mlir::dyn_cast<mlir::cir::IntType>(type)) {
+        if (ty.isUnsigned())
+          rewriter.replaceOpWithNewOp<mlir::LLVM::URemOp>(op, llvmTy, lhs, rhs);
+        else
+          rewriter.replaceOpWithNewOp<mlir::LLVM::SRemOp>(op, llvmTy, lhs, rhs);
+      } else
+        rewriter.replaceOpWithNewOp<mlir::LLVM::FRemOp>(op, llvmTy, lhs, rhs);
+      break;
+    case mlir::cir::BinOpKind::And:
+      rewriter.replaceOpWithNewOp<mlir::LLVM::AndOp>(op, llvmTy, lhs, rhs);
+      break;
+    case mlir::cir::BinOpKind::Or:
+      rewriter.replaceOpWithNewOp<mlir::LLVM::OrOp>(op, llvmTy, lhs, rhs);
+      break;
+    case mlir::cir::BinOpKind::Xor:
+      rewriter.replaceOpWithNewOp<mlir::LLVM::XOrOp>(op, llvmTy, lhs, rhs);
+      break;
+    }
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRBinOpOverflowOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::BinOpOverflowOp> {
+public:
+  using OpConversionPattern<mlir::cir::BinOpOverflowOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BinOpOverflowOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto loc = op.getLoc();
+    auto arithKind = op.getKind();
+    auto operandTy = op.getLhs().getType();
+    auto resultTy = op.getResult().getType();
+
+    auto encompassedTyInfo = computeEncompassedTypeWidth(operandTy, resultTy);
+    auto encompassedLLVMTy = rewriter.getIntegerType(encompassedTyInfo.width);
+
+    auto lhs = adaptor.getLhs();
+    auto rhs = adaptor.getRhs();
+    if (operandTy.getWidth() < encompassedTyInfo.width) {
+      if (operandTy.isSigned()) {
+        lhs = rewriter.create<mlir::LLVM::SExtOp>(loc, encompassedLLVMTy, lhs);
+        rhs = rewriter.create<mlir::LLVM::SExtOp>(loc, encompassedLLVMTy, rhs);
+      } else {
+        lhs = rewriter.create<mlir::LLVM::ZExtOp>(loc, encompassedLLVMTy, lhs);
+        rhs = rewriter.create<mlir::LLVM::ZExtOp>(loc, encompassedLLVMTy, rhs);
+      }
+    }
+
+    auto intrinName = getLLVMIntrinName(arithKind, encompassedTyInfo.sign,
+                                        encompassedTyInfo.width);
+    auto intrinNameAttr = mlir::StringAttr::get(op.getContext(), intrinName);
+
+    auto overflowLLVMTy = rewriter.getI1Type();
+    auto intrinRetTy = mlir::LLVM::LLVMStructType::getLiteral(
+        rewriter.getContext(), {encompassedLLVMTy, overflowLLVMTy});
+
+    auto callLLVMIntrinOp = rewriter.create<mlir::LLVM::CallIntrinsicOp>(
+        loc, intrinRetTy, intrinNameAttr, mlir::ValueRange{lhs, rhs});
+    auto intrinRet = callLLVMIntrinOp.getResult(0);
+
+    auto result = rewriter
+                      .create<mlir::LLVM::ExtractValueOp>(loc, intrinRet,
+                                                          ArrayRef<int64_t>{0})
+                      .getResult();
+    auto overflow = rewriter
+                        .create<mlir::LLVM::ExtractValueOp>(
+                            loc, intrinRet, ArrayRef<int64_t>{1})
+                        .getResult();
+
+    if (resultTy.getWidth() < encompassedTyInfo.width) {
+      auto resultLLVMTy = getTypeConverter()->convertType(resultTy);
+      auto truncResult =
+          rewriter.create<mlir::LLVM::TruncOp>(loc, resultLLVMTy, result);
+
+      // Extend the truncated result back to the encompassing type to check for
+      // any overflows during the truncation.
+      mlir::Value truncResultExt;
+      if (resultTy.isSigned())
+        truncResultExt = rewriter.create<mlir::LLVM::SExtOp>(
+            loc, encompassedLLVMTy, truncResult);
+      else
+        truncResultExt = rewriter.create<mlir::LLVM::ZExtOp>(
+            loc, encompassedLLVMTy, truncResult);
+      auto truncOverflow = rewriter.create<mlir::LLVM::ICmpOp>(
+          loc, mlir::LLVM::ICmpPredicate::ne, truncResultExt, result);
+
+      result = truncResult;
+      overflow =
+          rewriter.create<mlir::LLVM::OrOp>(loc, overflow, truncOverflow);
+    }
+
+    auto boolLLVMTy =
+        getTypeConverter()->convertType(op.getOverflow().getType());
+    if (boolLLVMTy != rewriter.getI1Type())
+      overflow = rewriter.create<mlir::LLVM::ZExtOp>(loc, boolLLVMTy, overflow);
+
+    rewriter.replaceOp(op, mlir::ValueRange{result, overflow});
+
+    return mlir::success();
+  }
+
+private:
+  static std::string getLLVMIntrinName(mlir::cir::BinOpOverflowKind opKind,
+                                       bool isSigned, unsigned width) {
+    // The intrinsic name is `@llvm.{s|u}{opKind}.with.overflow.i{width}`
+
+    std::string name = "llvm.";
+
+    if (isSigned)
+      name.push_back('s');
+    else
+      name.push_back('u');
+
+    switch (opKind) {
+    case mlir::cir::BinOpOverflowKind::Add:
+      name.append("add.");
+      break;
+    case mlir::cir::BinOpOverflowKind::Sub:
+      name.append("sub.");
+      break;
+    case mlir::cir::BinOpOverflowKind::Mul:
+      name.append("mul.");
+      break;
+    }
+
+    name.append("with.overflow.i");
+    name.append(std::to_string(width));
+
+    return name;
+  }
+
+  struct EncompassedTypeInfo {
+    bool sign;
+    unsigned width;
+  };
+
+  static EncompassedTypeInfo
+  computeEncompassedTypeWidth(mlir::cir::IntType operandTy,
+                              mlir::cir::IntType resultTy) {
+    auto sign = operandTy.getIsSigned() || resultTy.getIsSigned();
+    auto width =
+        std::max(operandTy.getWidth() + (sign && operandTy.isUnsigned()),
+                 resultTy.getWidth() + (sign && resultTy.isUnsigned()));
+    return {sign, width};
+  }
+};
+
+class CIRShiftOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ShiftOp> {
+public:
+  using OpConversionPattern<mlir::cir::ShiftOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ShiftOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto cirAmtTy =
+        mlir::dyn_cast<mlir::cir::IntType>(op.getAmount().getType());
+    auto cirValTy = mlir::dyn_cast<mlir::cir::IntType>(op.getValue().getType());
+    auto llvmTy = getTypeConverter()->convertType(op.getType());
+    mlir::Value amt = adaptor.getAmount();
+    mlir::Value val = adaptor.getValue();
+
+    assert(cirValTy && cirAmtTy && "non-integer shift is NYI");
+    assert(cirValTy == op.getType() && "inconsistent operands' types NYI");
+
+    // Ensure shift amount is the same type as the value. Some undefined
+    // behavior might occur in the casts below as per [C99 6.5.7.3].
+    amt = getLLVMIntCast(rewriter, amt, mlir::cast<mlir::IntegerType>(llvmTy),
+                         !cirAmtTy.isSigned(), cirValTy.getWidth());
+
+    // Lower to the proper LLVM shift operation.
+    if (op.getIsShiftleft())
+      rewriter.replaceOpWithNewOp<mlir::LLVM::ShlOp>(op, llvmTy, val, amt);
+    else {
+      if (cirValTy.isUnsigned())
+        rewriter.replaceOpWithNewOp<mlir::LLVM::LShrOp>(op, llvmTy, val, amt);
+      else
+        rewriter.replaceOpWithNewOp<mlir::LLVM::AShrOp>(op, llvmTy, val, amt);
+    }
+
+    return mlir::success();
+  }
+};
+
+class CIRCmpOpLowering : public mlir::OpConversionPattern<mlir::cir::CmpOp> {
+public:
+  using OpConversionPattern<mlir::cir::CmpOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::CmpOp cmpOp, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto type = cmpOp.getLhs().getType();
+    mlir::Value llResult;
+
+    // Lower to LLVM comparison op.
+    if (auto intTy = mlir::dyn_cast<mlir::cir::IntType>(type)) {
+      auto kind =
+          convertCmpKindToICmpPredicate(cmpOp.getKind(), intTy.isSigned());
+      llResult = rewriter.create<mlir::LLVM::ICmpOp>(
+          cmpOp.getLoc(), kind, adaptor.getLhs(), adaptor.getRhs());
+    } else if (auto ptrTy = mlir::dyn_cast<mlir::cir::PointerType>(type)) {
+      auto kind = convertCmpKindToICmpPredicate(cmpOp.getKind(),
+                                                /* isSigned=*/false);
+      llResult = rewriter.create<mlir::LLVM::ICmpOp>(
+          cmpOp.getLoc(), kind, adaptor.getLhs(), adaptor.getRhs());
+    } else if (mlir::isa<mlir::cir::CIRFPTypeInterface>(type)) {
+      auto kind = convertCmpKindToFCmpPredicate(cmpOp.getKind());
+      llResult = rewriter.create<mlir::LLVM::FCmpOp>(
+          cmpOp.getLoc(), kind, adaptor.getLhs(), adaptor.getRhs());
+    } else {
+      return cmpOp.emitError() << "unsupported type for CmpOp: " << type;
+    }
+
+    // LLVM comparison ops return i1, but cir::CmpOp returns the same type as
+    // the LHS value. Since this return value can be used later, we need to
+    // restore the type with the extension below.
+    auto llResultTy = getTypeConverter()->convertType(cmpOp.getType());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::ZExtOp>(cmpOp, llResultTy,
+                                                    llResult);
+
+    return mlir::success();
+  }
+};
+
+static mlir::LLVM::CallIntrinsicOp
+createCallLLVMIntrinsicOp(mlir::ConversionPatternRewriter &rewriter,
+                          mlir::Location loc, const llvm::Twine &intrinsicName,
+                          mlir::Type resultTy, mlir::ValueRange operands) {
+  auto intrinsicNameAttr =
+      mlir::StringAttr::get(rewriter.getContext(), intrinsicName);
+  return rewriter.create<mlir::LLVM::CallIntrinsicOp>(
+      loc, resultTy, intrinsicNameAttr, operands);
+}
+
+static mlir::LLVM::CallIntrinsicOp replaceOpWithCallLLVMIntrinsicOp(
+    mlir::ConversionPatternRewriter &rewriter, mlir::Operation *op,
+    const llvm::Twine &intrinsicName, mlir::Type resultTy,
+    mlir::ValueRange operands) {
+  auto callIntrinOp = createCallLLVMIntrinsicOp(
+      rewriter, op->getLoc(), intrinsicName, resultTy, operands);
+  rewriter.replaceOp(op, callIntrinOp.getOperation());
+  return callIntrinOp;
+}
+
+class CIRIntrinsicCallLowering
+    : public mlir::OpConversionPattern<mlir::cir::IntrinsicCallOp> {
+public:
+  using OpConversionPattern<mlir::cir::IntrinsicCallOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::IntrinsicCallOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Type llvmResTy =
+        getTypeConverter()->convertType(op->getResultTypes()[0]);
+    if (!llvmResTy)
+      return op.emitError("expected LLVM result type");
+    StringRef name = op.getIntrinsicName();
+    // Some llvm intrinsics require ElementType attribute to be attached to
+    // the argument of pointer type. That prevents us from generating LLVM IR
+    // because from LLVM dialect, we have LLVM IR like the below which fails
+    // LLVM IR verification.
+    // %3 = call i64 @llvm.aarch64.ldxr.p0(ptr %2)
+    // The expected LLVM IR should be like
+    // %3 = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i32) %2)
+    // TODO(cir): MLIR LLVM dialect should handle this part as CIR has no way
+    // to set LLVM IR attribute.
+    assert(!::cir::MissingFeatures::llvmIntrinsicElementTypeSupport());
+    replaceOpWithCallLLVMIntrinsicOp(rewriter, op, name, llvmResTy,
+                                     adaptor.getOperands());
+    return mlir::success();
+  }
+};
+
+static mlir::Value createLLVMBitOp(mlir::Location loc,
+                                   const llvm::Twine &llvmIntrinBaseName,
+                                   mlir::Type resultTy, mlir::Value operand,
+                                   std::optional<bool> poisonZeroInputFlag,
+                                   mlir::ConversionPatternRewriter &rewriter) {
+  auto operandIntTy = mlir::cast<mlir::IntegerType>(operand.getType());
+  auto resultIntTy = mlir::cast<mlir::IntegerType>(resultTy);
+
+  std::string llvmIntrinName =
+      llvmIntrinBaseName.concat(".i")
+          .concat(std::to_string(operandIntTy.getWidth()))
+          .str();
+
+  // Note that LLVM intrinsic calls to bit intrinsics have the same type as the
+  // operand.
+  mlir::LLVM::CallIntrinsicOp op;
+  if (poisonZeroInputFlag.has_value()) {
+    auto poisonZeroInputValue = rewriter.create<mlir::LLVM::ConstantOp>(
+        loc, rewriter.getI1Type(), static_cast<int64_t>(*poisonZeroInputFlag));
+    op = createCallLLVMIntrinsicOp(rewriter, loc, llvmIntrinName,
+                                   operand.getType(),
+                                   {operand, poisonZeroInputValue});
+  } else {
+    op = createCallLLVMIntrinsicOp(rewriter, loc, llvmIntrinName,
+                                   operand.getType(), operand);
+  }
+
+  return getLLVMIntCast(rewriter, op->getResult(0),
+                        mlir::cast<mlir::IntegerType>(resultTy),
+                        /*isUnsigned=*/true, resultIntTy.getWidth());
+}
+
+class CIRBitClrsbOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::BitClrsbOp> {
+public:
+  using OpConversionPattern<mlir::cir::BitClrsbOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BitClrsbOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto zero = rewriter.create<mlir::LLVM::ConstantOp>(
+        op.getLoc(), adaptor.getInput().getType(), 0);
+    auto isNeg = rewriter.create<mlir::LLVM::ICmpOp>(
+        op.getLoc(),
+        mlir::LLVM::ICmpPredicateAttr::get(rewriter.getContext(),
+                                           mlir::LLVM::ICmpPredicate::slt),
+        adaptor.getInput(), zero);
+
+    auto negOne = rewriter.create<mlir::LLVM::ConstantOp>(
+        op.getLoc(), adaptor.getInput().getType(), -1);
+    auto flipped = rewriter.create<mlir::LLVM::XOrOp>(
+        op.getLoc(), adaptor.getInput(), negOne);
+
+    auto select = rewriter.create<mlir::LLVM::SelectOp>(
+        op.getLoc(), isNeg, flipped, adaptor.getInput());
+
+    auto resTy = getTypeConverter()->convertType(op.getType());
+    auto clz = createLLVMBitOp(op.getLoc(), "llvm.ctlz", resTy, select,
+                               /*poisonZeroInputFlag=*/false, rewriter);
+
+    auto one = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(), resTy, 1);
+    auto res = rewriter.create<mlir::LLVM::SubOp>(op.getLoc(), clz, one);
+    rewriter.replaceOp(op, res);
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRObjSizeOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ObjSizeOp> {
+public:
+  using OpConversionPattern<mlir::cir::ObjSizeOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ObjSizeOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto llvmResTy = getTypeConverter()->convertType(op.getType());
+    auto loc = op->getLoc();
+
+    mlir::cir::SizeInfoType kindInfo = op.getKind();
+    auto falseValue = rewriter.create<mlir::LLVM::ConstantOp>(
+        loc, rewriter.getI1Type(), false);
+    auto trueValue = rewriter.create<mlir::LLVM::ConstantOp>(
+        loc, rewriter.getI1Type(), true);
+
+    replaceOpWithCallLLVMIntrinsicOp(
+        rewriter, op, "llvm.objectsize", llvmResTy,
+        mlir::ValueRange{adaptor.getPtr(),
+                         kindInfo == mlir::cir::SizeInfoType::max ? falseValue
+                                                                  : trueValue,
+                         trueValue, op.getDynamic() ? trueValue : falseValue});
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRBitClzOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::BitClzOp> {
+public:
+  using OpConversionPattern<mlir::cir::BitClzOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BitClzOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto resTy = getTypeConverter()->convertType(op.getType());
+    auto llvmOp =
+        createLLVMBitOp(op.getLoc(), "llvm.ctlz", resTy, adaptor.getInput(),
+                        /*poisonZeroInputFlag=*/true, rewriter);
+    rewriter.replaceOp(op, llvmOp);
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRBitCtzOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::BitCtzOp> {
+public:
+  using OpConversionPattern<mlir::cir::BitCtzOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BitCtzOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto resTy = getTypeConverter()->convertType(op.getType());
+    auto llvmOp =
+        createLLVMBitOp(op.getLoc(), "llvm.cttz", resTy, adaptor.getInput(),
+                        /*poisonZeroInputFlag=*/true, rewriter);
+    rewriter.replaceOp(op, llvmOp);
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRBitFfsOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::BitFfsOp> {
+public:
+  using OpConversionPattern<mlir::cir::BitFfsOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BitFfsOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto resTy = getTypeConverter()->convertType(op.getType());
+    auto ctz =
+        createLLVMBitOp(op.getLoc(), "llvm.cttz", resTy, adaptor.getInput(),
+                        /*poisonZeroInputFlag=*/false, rewriter);
+
+    auto one = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(), resTy, 1);
+    auto ctzAddOne = rewriter.create<mlir::LLVM::AddOp>(op.getLoc(), ctz, one);
+
+    auto zeroInputTy = rewriter.create<mlir::LLVM::ConstantOp>(
+        op.getLoc(), adaptor.getInput().getType(), 0);
+    auto isZero = rewriter.create<mlir::LLVM::ICmpOp>(
+        op.getLoc(),
+        mlir::LLVM::ICmpPredicateAttr::get(rewriter.getContext(),
+                                           mlir::LLVM::ICmpPredicate::eq),
+        adaptor.getInput(), zeroInputTy);
+
+    auto zero = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(), resTy, 0);
+    auto res = rewriter.create<mlir::LLVM::SelectOp>(op.getLoc(), isZero, zero,
+                                                     ctzAddOne);
+    rewriter.replaceOp(op, res);
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRBitParityOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::BitParityOp> {
+public:
+  using OpConversionPattern<mlir::cir::BitParityOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BitParityOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto resTy = getTypeConverter()->convertType(op.getType());
+    auto popcnt =
+        createLLVMBitOp(op.getLoc(), "llvm.ctpop", resTy, adaptor.getInput(),
+                        /*poisonZeroInputFlag=*/std::nullopt, rewriter);
+
+    auto one = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(), resTy, 1);
+    auto popcntMod2 =
+        rewriter.create<mlir::LLVM::AndOp>(op.getLoc(), popcnt, one);
+    rewriter.replaceOp(op, popcntMod2);
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRBitPopcountOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::BitPopcountOp> {
+public:
+  using OpConversionPattern<mlir::cir::BitPopcountOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BitPopcountOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto resTy = getTypeConverter()->convertType(op.getType());
+    auto llvmOp =
+        createLLVMBitOp(op.getLoc(), "llvm.ctpop", resTy, adaptor.getInput(),
+                        /*poisonZeroInputFlag=*/std::nullopt, rewriter);
+    rewriter.replaceOp(op, llvmOp);
+    return mlir::LogicalResult::success();
+  }
+};
+
+static mlir::LLVM::AtomicOrdering getLLVMAtomicOrder(mlir::cir::MemOrder memo) {
+  switch (memo) {
+  case mlir::cir::MemOrder::Relaxed:
+    return mlir::LLVM::AtomicOrdering::monotonic;
+  case mlir::cir::MemOrder::Consume:
+  case mlir::cir::MemOrder::Acquire:
+    return mlir::LLVM::AtomicOrdering::acquire;
+  case mlir::cir::MemOrder::Release:
+    return mlir::LLVM::AtomicOrdering::release;
+  case mlir::cir::MemOrder::AcquireRelease:
+    return mlir::LLVM::AtomicOrdering::acq_rel;
+  case mlir::cir::MemOrder::SequentiallyConsistent:
+    return mlir::LLVM::AtomicOrdering::seq_cst;
+  }
+  llvm_unreachable("shouldn't get here");
+}
+
+class CIRAtomicCmpXchgLowering
+    : public mlir::OpConversionPattern<mlir::cir::AtomicCmpXchg> {
+public:
+  using OpConversionPattern<mlir::cir::AtomicCmpXchg>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::AtomicCmpXchg op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto expected = adaptor.getExpected();
+    auto desired = adaptor.getDesired();
+
+    // FIXME: add syncscope.
+    auto cmpxchg = rewriter.create<mlir::LLVM::AtomicCmpXchgOp>(
+        op.getLoc(), adaptor.getPtr(), expected, desired,
+        getLLVMAtomicOrder(adaptor.getSuccOrder()),
+        getLLVMAtomicOrder(adaptor.getFailOrder()));
+    cmpxchg.setWeak(adaptor.getWeak());
+    cmpxchg.setVolatile_(adaptor.getIsVolatile());
+
+    // Check result and apply stores accordingly.
+    auto old = rewriter.create<mlir::LLVM::ExtractValueOp>(
+        op.getLoc(), cmpxchg.getResult(), 0);
+    auto cmp = rewriter.create<mlir::LLVM::ExtractValueOp>(
+        op.getLoc(), cmpxchg.getResult(), 1);
+
+    auto extCmp = rewriter.create<mlir::LLVM::ZExtOp>(
+        op.getLoc(), rewriter.getI8Type(), cmp);
+    rewriter.replaceOp(op, {old, extCmp});
+    return mlir::success();
+  }
+};
+
+class CIRAtomicXchgLowering
+    : public mlir::OpConversionPattern<mlir::cir::AtomicXchg> {
+public:
+  using OpConversionPattern<mlir::cir::AtomicXchg>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::AtomicXchg op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // FIXME: add syncscope.
+    auto llvmOrder = getLLVMAtomicOrder(adaptor.getMemOrder());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::AtomicRMWOp>(
+        op, mlir::LLVM::AtomicBinOp::xchg, adaptor.getPtr(), adaptor.getVal(),
+        llvmOrder);
+    return mlir::success();
+  }
+};
+
+class CIRAtomicFetchLowering
+    : public mlir::OpConversionPattern<mlir::cir::AtomicFetch> {
+public:
+  using OpConversionPattern<mlir::cir::AtomicFetch>::OpConversionPattern;
+
+  mlir::Value buildPostOp(mlir::cir::AtomicFetch op, OpAdaptor adaptor,
+                          mlir::ConversionPatternRewriter &rewriter,
+                          mlir::Value rmwVal, bool isInt) const {
+    SmallVector<mlir::Value> atomicOperands = {rmwVal, adaptor.getVal()};
+    SmallVector<mlir::Type> atomicResTys = {rmwVal.getType()};
+    return rewriter
+        .create(op.getLoc(),
+                rewriter.getStringAttr(getLLVMBinop(op.getBinop(), isInt)),
+                atomicOperands, atomicResTys, {})
+        ->getResult(0);
+  }
+
+  mlir::Value buildMinMaxPostOp(mlir::cir::AtomicFetch op, OpAdaptor adaptor,
+                                mlir::ConversionPatternRewriter &rewriter,
+                                mlir::Value rmwVal, bool isSigned) const {
+    auto loc = op.getLoc();
+    mlir::LLVM::ICmpPredicate pred;
+    if (op.getBinop() == mlir::cir::AtomicFetchKind::Max) {
+      pred = isSigned ? mlir::LLVM::ICmpPredicate::sgt
+                      : mlir::LLVM::ICmpPredicate::ugt;
+    } else { // Min
+      pred = isSigned ? mlir::LLVM::ICmpPredicate::slt
+                      : mlir::LLVM::ICmpPredicate::ult;
+    }
+
+    auto cmp = rewriter.create<mlir::LLVM::ICmpOp>(
+        loc, mlir::LLVM::ICmpPredicateAttr::get(rewriter.getContext(), pred),
+        rmwVal, adaptor.getVal());
+    return rewriter.create<mlir::LLVM::SelectOp>(loc, cmp, rmwVal,
+                                                 adaptor.getVal());
+  }
+
+  llvm::StringLiteral getLLVMBinop(mlir::cir::AtomicFetchKind k,
+                                   bool isInt) const {
+    switch (k) {
+    case mlir::cir::AtomicFetchKind::Add:
+      return isInt ? mlir::LLVM::AddOp::getOperationName()
+                   : mlir::LLVM::FAddOp::getOperationName();
+    case mlir::cir::AtomicFetchKind::Sub:
+      return isInt ? mlir::LLVM::SubOp::getOperationName()
+                   : mlir::LLVM::FSubOp::getOperationName();
+    case mlir::cir::AtomicFetchKind::And:
+      return mlir::LLVM::AndOp::getOperationName();
+    case mlir::cir::AtomicFetchKind::Xor:
+      return mlir::LLVM::XOrOp::getOperationName();
+    case mlir::cir::AtomicFetchKind::Or:
+      return mlir::LLVM::OrOp::getOperationName();
+    case mlir::cir::AtomicFetchKind::Nand:
+      // There's no nand binop in LLVM, this is later fixed with a not.
+      return mlir::LLVM::AndOp::getOperationName();
+    case mlir::cir::AtomicFetchKind::Max:
+    case mlir::cir::AtomicFetchKind::Min:
+      llvm_unreachable("handled in buildMinMaxPostOp");
+    }
+    llvm_unreachable("Unknown atomic fetch opcode");
+  }
+
+  mlir::LLVM::AtomicBinOp getLLVMAtomicBinOp(mlir::cir::AtomicFetchKind k,
+                                             bool isInt,
+                                             bool isSignedInt) const {
+    switch (k) {
+    case mlir::cir::AtomicFetchKind::Add:
+      return isInt ? mlir::LLVM::AtomicBinOp::add
+                   : mlir::LLVM::AtomicBinOp::fadd;
+    case mlir::cir::AtomicFetchKind::Sub:
+      return isInt ? mlir::LLVM::AtomicBinOp::sub
+                   : mlir::LLVM::AtomicBinOp::fsub;
+    case mlir::cir::AtomicFetchKind::And:
+      return mlir::LLVM::AtomicBinOp::_and;
+    case mlir::cir::AtomicFetchKind::Xor:
+      return mlir::LLVM::AtomicBinOp::_xor;
+    case mlir::cir::AtomicFetchKind::Or:
+      return mlir::LLVM::AtomicBinOp::_or;
+    case mlir::cir::AtomicFetchKind::Nand:
+      return mlir::LLVM::AtomicBinOp::nand;
+    case mlir::cir::AtomicFetchKind::Max: {
+      if (!isInt)
+        return mlir::LLVM::AtomicBinOp::fmax;
+      return isSignedInt ? mlir::LLVM::AtomicBinOp::max
+                         : mlir::LLVM::AtomicBinOp::umax;
+    }
+    case mlir::cir::AtomicFetchKind::Min: {
+      if (!isInt)
+        return mlir::LLVM::AtomicBinOp::fmin;
+      return isSignedInt ? mlir::LLVM::AtomicBinOp::min
+                         : mlir::LLVM::AtomicBinOp::umin;
+    }
+    }
+    llvm_unreachable("Unknown atomic fetch opcode");
+  }
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::AtomicFetch op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+
+    bool isInt, isSignedInt = false; // otherwise it's float.
+    if (auto intTy =
+            mlir::dyn_cast<mlir::cir::IntType>(op.getVal().getType())) {
+      isInt = true;
+      isSignedInt = intTy.isSigned();
+    } else if (mlir::isa<mlir::cir::SingleType, mlir::cir::DoubleType>(
+                   op.getVal().getType()))
+      isInt = false;
+    else {
+      return op.emitError()
+             << "Unsupported type: " << adaptor.getVal().getType();
+    }
+
+    // FIXME: add syncscope.
+    auto llvmOrder = getLLVMAtomicOrder(adaptor.getMemOrder());
+    auto llvmBinOpc = getLLVMAtomicBinOp(op.getBinop(), isInt, isSignedInt);
+    auto rmwVal = rewriter.create<mlir::LLVM::AtomicRMWOp>(
+        op.getLoc(), llvmBinOpc, adaptor.getPtr(), adaptor.getVal(), llvmOrder);
+
+    mlir::Value result = rmwVal.getRes();
+    if (!op.getFetchFirst()) {
+      if (op.getBinop() == mlir::cir::AtomicFetchKind::Max ||
+          op.getBinop() == mlir::cir::AtomicFetchKind::Min)
+        result = buildMinMaxPostOp(op, adaptor, rewriter, rmwVal.getRes(),
+                                   isSignedInt);
+      else
+        result = buildPostOp(op, adaptor, rewriter, rmwVal.getRes(), isInt);
+
+      // Compensate lack of nand binop in LLVM IR.
+      if (op.getBinop() == mlir::cir::AtomicFetchKind::Nand) {
+        auto negOne = rewriter.create<mlir::LLVM::ConstantOp>(
+            op.getLoc(), result.getType(), -1);
+        result =
+            rewriter.create<mlir::LLVM::XOrOp>(op.getLoc(), result, negOne);
+      }
+    }
+
+    rewriter.replaceOp(op, result);
+    return mlir::success();
+  }
+};
+
+class CIRByteswapOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ByteswapOp> {
+public:
+  using OpConversionPattern<mlir::cir::ByteswapOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ByteswapOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // Note that LLVM intrinsic calls to @llvm.bswap.i* have the same type as
+    // the operand.
+
+    auto resTy = mlir::cast<mlir::IntegerType>(
+        getTypeConverter()->convertType(op.getType()));
+
+    std::string llvmIntrinName = "llvm.bswap.i";
+    llvmIntrinName.append(std::to_string(resTy.getWidth()));
+
+    rewriter.replaceOpWithNewOp<mlir::LLVM::ByteSwapOp>(op, adaptor.getInput());
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRRotateOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::RotateOp> {
+public:
+  using OpConversionPattern<mlir::cir::RotateOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::RotateOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // Note that LLVM intrinsic calls to @llvm.fsh{r,l}.i* have the same type as
+    // the operand.
+    auto src = adaptor.getSrc();
+    if (op.getLeft())
+      rewriter.replaceOpWithNewOp<mlir::LLVM::FshlOp>(op, src, src,
+                                                      adaptor.getAmt());
+    else
+      rewriter.replaceOpWithNewOp<mlir::LLVM::FshrOp>(op, src, src,
+                                                      adaptor.getAmt());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRSelectOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::SelectOp> {
+public:
+  using OpConversionPattern<mlir::cir::SelectOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::SelectOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto getConstantBool = [](mlir::Value value) -> std::optional<bool> {
+      auto definingOp = mlir::dyn_cast_if_present<mlir::cir::ConstantOp>(
+          value.getDefiningOp());
+      if (!definingOp)
+        return std::nullopt;
+
+      auto constValue =
+          mlir::dyn_cast<mlir::cir::BoolAttr>(definingOp.getValue());
+      if (!constValue)
+        return std::nullopt;
+
+      return constValue.getValue();
+    };
+
+    // Two special cases in the LLVMIR codegen of select op:
+    // - select %0, %1, false => and %0, %1
+    // - select %0, true, %1 => or %0, %1
+    auto trueValue = op.getTrueValue();
+    auto falseValue = op.getFalseValue();
+    if (mlir::isa<mlir::cir::BoolType>(trueValue.getType())) {
+      if (std::optional<bool> falseValueBool = getConstantBool(falseValue);
+          falseValueBool.has_value() && !*falseValueBool) {
+        // select %0, %1, false => and %0, %1
+        rewriter.replaceOpWithNewOp<mlir::LLVM::AndOp>(
+            op, adaptor.getCondition(), adaptor.getTrueValue());
+        return mlir::success();
+      }
+      if (std::optional<bool> trueValueBool = getConstantBool(trueValue);
+          trueValueBool.has_value() && *trueValueBool) {
+        // select %0, true, %1 => or %0, %1
+        rewriter.replaceOpWithNewOp<mlir::LLVM::OrOp>(
+            op, adaptor.getCondition(), adaptor.getFalseValue());
+        return mlir::success();
+      }
+    }
+
+    auto llvmCondition = rewriter.create<mlir::LLVM::TruncOp>(
+        op.getLoc(), mlir::IntegerType::get(op->getContext(), 1),
+        adaptor.getCondition());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::SelectOp>(
+        op, llvmCondition, adaptor.getTrueValue(), adaptor.getFalseValue());
+
+    return mlir::success();
+  }
+};
+
+class CIRBrOpLowering : public mlir::OpConversionPattern<mlir::cir::BrOp> {
+public:
+  using OpConversionPattern<mlir::cir::BrOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BrOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::LLVM::BrOp>(op, adaptor.getOperands(),
+                                                  op.getDest());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRGetMemberOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::GetMemberOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::GetMemberOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::GetMemberOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto llResTy = getTypeConverter()->convertType(op.getType());
+    const auto structTy =
+        mlir::cast<mlir::cir::StructType>(op.getAddrTy().getPointee());
+    assert(structTy && "expected struct type");
+
+    switch (structTy.getKind()) {
+    case mlir::cir::StructType::Struct:
+    case mlir::cir::StructType::Class: {
+      // Since the base address is a pointer to an aggregate, the first offset
+      // is always zero. The second offset tell us which member it will access.
+      llvm::SmallVector<mlir::LLVM::GEPArg, 2> offset{0, op.getIndex()};
+      const auto elementTy = getTypeConverter()->convertType(structTy);
+      rewriter.replaceOpWithNewOp<mlir::LLVM::GEPOp>(op, llResTy, elementTy,
+                                                     adaptor.getAddr(), offset);
+      return mlir::success();
+    }
+    case mlir::cir::StructType::Union:
+      // Union members share the address space, so we just need a bitcast to
+      // conform to type-checking.
+      rewriter.replaceOpWithNewOp<mlir::LLVM::BitcastOp>(op, llResTy,
+                                                         adaptor.getAddr());
+      return mlir::success();
+    }
+  }
+};
+
+class CIRGetRuntimeMemberOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::GetRuntimeMemberOp> {
+public:
+  using mlir::OpConversionPattern<
+      mlir::cir::GetRuntimeMemberOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::GetRuntimeMemberOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto llvmResTy = getTypeConverter()->convertType(op.getType());
+    auto llvmElementTy = mlir::IntegerType::get(op.getContext(), 8);
+
+    rewriter.replaceOpWithNewOp<mlir::LLVM::GEPOp>(
+        op, llvmResTy, llvmElementTy, adaptor.getAddr(), adaptor.getMember());
+    return mlir::success();
+  }
+};
+
+class CIRPtrDiffOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::PtrDiffOp> {
+public:
+  using OpConversionPattern<mlir::cir::PtrDiffOp>::OpConversionPattern;
+
+  uint64_t getTypeSize(mlir::Type type, mlir::Operation &op) const {
+    mlir::DataLayout layout(op.getParentOfType<mlir::ModuleOp>());
+    // For LLVM purposes we treat void as u8.
+    if (isa<mlir::cir::VoidType>(type))
+      type = mlir::cir::IntType::get(type.getContext(), 8, /*isSigned=*/false);
+    return llvm::divideCeil(layout.getTypeSizeInBits(type), 8);
+  }
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::PtrDiffOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto dstTy = mlir::cast<mlir::cir::IntType>(op.getType());
+    auto llvmDstTy = getTypeConverter()->convertType(dstTy);
+
+    auto lhs = rewriter.create<mlir::LLVM::PtrToIntOp>(op.getLoc(), llvmDstTy,
+                                                       adaptor.getLhs());
+    auto rhs = rewriter.create<mlir::LLVM::PtrToIntOp>(op.getLoc(), llvmDstTy,
+                                                       adaptor.getRhs());
+
+    auto diff =
+        rewriter.create<mlir::LLVM::SubOp>(op.getLoc(), llvmDstTy, lhs, rhs);
+
+    auto ptrTy = mlir::cast<mlir::cir::PointerType>(op.getLhs().getType());
+    auto typeSize = getTypeSize(ptrTy.getPointee(), *op);
+
+    // Avoid silly division by 1.
+    auto resultVal = diff.getResult();
+    if (typeSize != 1) {
+      auto typeSizeVal = rewriter.create<mlir::LLVM::ConstantOp>(
+          op.getLoc(), llvmDstTy, mlir::IntegerAttr::get(llvmDstTy, typeSize));
+
+      if (dstTy.isUnsigned())
+        resultVal = rewriter.create<mlir::LLVM::UDivOp>(op.getLoc(), llvmDstTy,
+                                                        diff, typeSizeVal);
+      else
+        resultVal = rewriter.create<mlir::LLVM::SDivOp>(op.getLoc(), llvmDstTy,
+                                                        diff, typeSizeVal);
+    }
+    rewriter.replaceOp(op, resultVal);
+    return mlir::success();
+  }
+};
+
+class CIRExpectOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ExpectOp> {
+public:
+  using OpConversionPattern<mlir::cir::ExpectOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ExpectOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    std::optional<llvm::APFloat> prob = op.getProb();
+    if (!prob)
+      rewriter.replaceOpWithNewOp<mlir::LLVM::ExpectOp>(op, adaptor.getVal(),
+                                                        adaptor.getExpected());
+    else
+      rewriter.replaceOpWithNewOp<mlir::LLVM::ExpectWithProbabilityOp>(
+          op, adaptor.getVal(), adaptor.getExpected(), prob.value());
+    return mlir::success();
+  }
+};
+
+class CIRVTableAddrPointOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::VTableAddrPointOp> {
+public:
+  using OpConversionPattern<mlir::cir::VTableAddrPointOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VTableAddrPointOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    const auto *converter = getTypeConverter();
+    auto targetType = converter->convertType(op.getType());
+    mlir::Value symAddr = op.getSymAddr();
+    llvm::SmallVector<mlir::LLVM::GEPArg> offsets;
+    mlir::Type eltType;
+    if (!symAddr) {
+      // Get the vtable address point from a global variable
+      auto module = op->getParentOfType<mlir::ModuleOp>();
+      auto *symbol =
+          mlir::SymbolTable::lookupSymbolIn(module, op.getNameAttr());
+      if (auto llvmSymbol = dyn_cast<mlir::LLVM::GlobalOp>(symbol)) {
+        eltType = llvmSymbol.getType();
+      } else if (auto cirSymbol = dyn_cast<mlir::cir::GlobalOp>(symbol)) {
+        eltType = converter->convertType(cirSymbol.getSymType());
+      }
+      symAddr = rewriter.create<mlir::LLVM::AddressOfOp>(
+          op.getLoc(), mlir::LLVM::LLVMPointerType::get(getContext()),
+          *op.getName());
+      offsets = llvm::SmallVector<mlir::LLVM::GEPArg>{
+          0, op.getVtableIndex(), op.getAddressPointIndex()};
+    } else {
+      // Get indirect vtable address point retrieval
+      symAddr = adaptor.getSymAddr();
+      eltType = converter->convertType(symAddr.getType());
+      offsets =
+          llvm::SmallVector<mlir::LLVM::GEPArg>{op.getAddressPointIndex()};
+    }
+
+    if (eltType)
+      rewriter.replaceOpWithNewOp<mlir::LLVM::GEPOp>(op, targetType, eltType,
+                                                     symAddr, offsets, true);
+    else
+      llvm_unreachable("Shouldn't ever be missing an eltType here");
+
+    return mlir::success();
+  }
+};
+
+class CIRStackSaveLowering
+    : public mlir::OpConversionPattern<mlir::cir::StackSaveOp> {
+public:
+  using OpConversionPattern<mlir::cir::StackSaveOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::StackSaveOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto ptrTy = getTypeConverter()->convertType(op.getType());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::StackSaveOp>(op, ptrTy);
+    return mlir::success();
+  }
+};
+
+#define GET_BUILTIN_LOWERING_CLASSES
+#include "clang/CIR/Dialect/IR/CIRBuiltinsLowering.inc"
+
+class CIRUnreachableLowering
+    : public mlir::OpConversionPattern<mlir::cir::UnreachableOp> {
+public:
+  using OpConversionPattern<mlir::cir::UnreachableOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::UnreachableOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::LLVM::UnreachableOp>(op);
+    return mlir::success();
+  }
+};
+
+class CIRTrapLowering : public mlir::OpConversionPattern<mlir::cir::TrapOp> {
+public:
+  using OpConversionPattern<mlir::cir::TrapOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::TrapOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto loc = op->getLoc();
+    rewriter.eraseOp(op);
+
+    rewriter.create<mlir::LLVM::Trap>(loc);
+
+    // Note that the call to llvm.trap is not a terminator in LLVM dialect.
+    // So we must emit an additional llvm.unreachable to terminate the current
+    // block.
+    rewriter.create<mlir::LLVM::UnreachableOp>(loc);
+
+    return mlir::success();
+  }
+};
+
+class CIRInlineAsmOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::InlineAsmOp> {
+
+  using mlir::OpConversionPattern<mlir::cir::InlineAsmOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::InlineAsmOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Type llResTy;
+    if (op.getNumResults())
+      llResTy = getTypeConverter()->convertType(op.getType(0));
+
+    auto dialect = op.getAsmFlavor();
+    auto llDialect = dialect == mlir::cir::AsmFlavor::x86_att
+                         ? mlir::LLVM::AsmDialect::AD_ATT
+                         : mlir::LLVM::AsmDialect::AD_Intel;
+
+    std::vector<mlir::Attribute> opAttrs;
+    auto llvmAttrName = mlir::LLVM::InlineAsmOp::getElementTypeAttrName();
+
+    // this is for the lowering to LLVM from LLVm dialect. Otherwise, if we
+    // don't have the result (i.e. void type as a result of operation), the
+    // element type attribute will be attached to the whole instruction, but not
+    // to the operand
+    if (!op.getNumResults())
+      opAttrs.push_back(mlir::Attribute());
+
+    llvm::SmallVector<mlir::Value> llvmOperands;
+    llvm::SmallVector<mlir::Value> cirOperands;
+    for (size_t i = 0; i < op.getOperands().size(); ++i) {
+      auto llvmOps = adaptor.getOperands()[i];
+      auto cirOps = op.getOperands()[i];
+      llvmOperands.insert(llvmOperands.end(), llvmOps.begin(), llvmOps.end());
+      cirOperands.insert(cirOperands.end(), cirOps.begin(), cirOps.end());
+    }
+
+    // so far we infer the llvm dialect element type attr from
+    // CIR operand type.
+    for (std::size_t i = 0; i < op.getOperandAttrs().size(); ++i) {
+      if (!op.getOperandAttrs()[i]) {
+        opAttrs.push_back(mlir::Attribute());
+        continue;
+      }
+
+      std::vector<mlir::NamedAttribute> attrs;
+      auto typ = cast<mlir::cir::PointerType>(cirOperands[i].getType());
+      auto typAttr = mlir::TypeAttr::get(
+          getTypeConverter()->convertType(typ.getPointee()));
+
+      attrs.push_back(rewriter.getNamedAttr(llvmAttrName, typAttr));
+      auto newDict = rewriter.getDictionaryAttr(attrs);
+      opAttrs.push_back(newDict);
+    }
+
+    rewriter.replaceOpWithNewOp<mlir::LLVM::InlineAsmOp>(
+        op, llResTy, llvmOperands, op.getAsmStringAttr(),
+        op.getConstraintsAttr(), op.getSideEffectsAttr(),
+        /*is_align_stack*/ mlir::UnitAttr(),
+        mlir::LLVM::AsmDialectAttr::get(getContext(), llDialect),
+        rewriter.getArrayAttr(opAttrs));
+
+    return mlir::success();
+  }
+};
+
+class CIRPrefetchLowering
+    : public mlir::OpConversionPattern<mlir::cir::PrefetchOp> {
+public:
+  using OpConversionPattern<mlir::cir::PrefetchOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::PrefetchOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::LLVM::Prefetch>(
+        op, adaptor.getAddr(), adaptor.getIsWrite(), adaptor.getLocality(),
+        /*DataCache*/ 1);
+    return mlir::success();
+  }
+};
+
+class CIRSetBitfieldLowering
+    : public mlir::OpConversionPattern<mlir::cir::SetBitfieldOp> {
+public:
+  using OpConversionPattern<mlir::cir::SetBitfieldOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::SetBitfieldOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::OpBuilder::InsertionGuard guard(rewriter);
+    rewriter.setInsertionPoint(op);
+
+    auto info = op.getBitfieldInfo();
+    auto size = info.getSize();
+    auto offset = info.getOffset();
+    auto storageType = info.getStorageType();
+    auto context = storageType.getContext();
+
+    unsigned storageSize = 0;
+
+    if (auto arTy = mlir::dyn_cast<mlir::cir::ArrayType>(storageType))
+      storageSize = arTy.getSize() * 8;
+    else if (auto intTy = mlir::dyn_cast<mlir::cir::IntType>(storageType))
+      storageSize = intTy.getWidth();
+    else
+      llvm_unreachable(
+          "Either ArrayType or IntType expected for bitfields storage");
+
+    auto intType = mlir::IntegerType::get(context, storageSize);
+    auto srcVal = createIntCast(rewriter, adaptor.getSrc(), intType);
+    auto srcWidth = storageSize;
+    auto resultVal = srcVal;
+
+    if (storageSize != size) {
+      assert(storageSize > size && "Invalid bitfield size.");
+
+      mlir::Value val = rewriter.create<mlir::LLVM::LoadOp>(
+          op.getLoc(), intType, adaptor.getAddr(), /* alignment */ 0,
+          op.getIsVolatile());
+
+      srcVal = createAnd(rewriter, srcVal,
+                         llvm::APInt::getLowBitsSet(srcWidth, size));
+      resultVal = srcVal;
+      srcVal = createShL(rewriter, srcVal, offset);
+
+      // Mask out the original value.
+      val =
+          createAnd(rewriter, val,
+                    ~llvm::APInt::getBitsSet(srcWidth, offset, offset + size));
+
+      // Or together the unchanged values and the source value.
+      srcVal = rewriter.create<mlir::LLVM::OrOp>(op.getLoc(), val, srcVal);
+    }
+
+    rewriter.create<mlir::LLVM::StoreOp>(op.getLoc(), srcVal, adaptor.getAddr(),
+                                         /* alignment */ 0, op.getIsVolatile());
+
+    auto resultTy = getTypeConverter()->convertType(op.getType());
+
+    resultVal = createIntCast(rewriter, resultVal,
+                              mlir::cast<mlir::IntegerType>(resultTy));
+
+    if (info.getIsSigned()) {
+      assert(size <= storageSize);
+      unsigned highBits = storageSize - size;
+
+      if (highBits) {
+        resultVal = createShL(rewriter, resultVal, highBits);
+        resultVal = createAShR(rewriter, resultVal, highBits);
+      }
+    }
+
+    rewriter.replaceOp(op, resultVal);
+    return mlir::success();
+  }
+};
+
+class CIRGetBitfieldLowering
+    : public mlir::OpConversionPattern<mlir::cir::GetBitfieldOp> {
+public:
+  using OpConversionPattern<mlir::cir::GetBitfieldOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::GetBitfieldOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+
+    mlir::OpBuilder::InsertionGuard guard(rewriter);
+    rewriter.setInsertionPoint(op);
+
+    auto info = op.getBitfieldInfo();
+    auto size = info.getSize();
+    auto offset = info.getOffset();
+    auto storageType = info.getStorageType();
+    auto context = storageType.getContext();
+    unsigned storageSize = 0;
+
+    if (auto arTy = mlir::dyn_cast<mlir::cir::ArrayType>(storageType))
+      storageSize = arTy.getSize() * 8;
+    else if (auto intTy = mlir::dyn_cast<mlir::cir::IntType>(storageType))
+      storageSize = intTy.getWidth();
+    else
+      llvm_unreachable(
+          "Either ArrayType or IntType expected for bitfields storage");
+
+    auto intType = mlir::IntegerType::get(context, storageSize);
+
+    mlir::Value val = rewriter.create<mlir::LLVM::LoadOp>(
+        op.getLoc(), intType, adaptor.getAddr(), 0, op.getIsVolatile());
+    val = rewriter.create<mlir::LLVM::BitcastOp>(op.getLoc(), intType, val);
+
+    if (info.getIsSigned()) {
+      assert(static_cast<unsigned>(offset + size) <= storageSize);
+      unsigned highBits = storageSize - offset - size;
+      val = createShL(rewriter, val, highBits);
+      val = createAShR(rewriter, val, offset + highBits);
+    } else {
+      val = createLShR(rewriter, val, offset);
+
+      if (static_cast<unsigned>(offset) + size < storageSize)
+        val = createAnd(rewriter, val,
+                        llvm::APInt::getLowBitsSet(storageSize, size));
+    }
+
+    auto resTy = getTypeConverter()->convertType(op.getType());
+    auto newOp =
+        createIntCast(rewriter, val, mlir::cast<mlir::IntegerType>(resTy),
+                      info.getIsSigned());
+    rewriter.replaceOp(op, newOp);
+    return mlir::success();
+  }
+};
+
+class CIRIsConstantOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::IsConstantOp> {
+
+  using mlir::OpConversionPattern<mlir::cir::IsConstantOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::IsConstantOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // FIXME(cir): llvm.intr.is.constant returns i1 value but the LLVM Lowering
+    // expects that cir.bool type will be lowered as i8 type.
+    // So we have to insert zext here.
+    auto isConstantOP = rewriter.create<mlir::LLVM::IsConstantOp>(
+        op.getLoc(), adaptor.getVal());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::ZExtOp>(op, rewriter.getI8Type(),
+                                                    isConstantOP);
+    return mlir::success();
+  }
+};
+
+class CIRCmpThreeWayOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::CmpThreeWayOp> {
+public:
+  using mlir::OpConversionPattern<
+      mlir::cir::CmpThreeWayOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::CmpThreeWayOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    if (!op.isIntegralComparison() || !op.isStrongOrdering()) {
+      op.emitError() << "unsupported three-way comparison type";
+      return mlir::failure();
+    }
+
+    auto cmpInfo = op.getInfo();
+    assert(cmpInfo.getLt() == -1 && cmpInfo.getEq() == 0 &&
+           cmpInfo.getGt() == 1);
+
+    auto operandTy = mlir::cast<mlir::cir::IntType>(op.getLhs().getType());
+    auto resultTy = op.getType();
+    auto llvmIntrinsicName = getLLVMIntrinsicName(
+        operandTy.isSigned(), operandTy.getWidth(), resultTy.getWidth());
+
+    rewriter.setInsertionPoint(op);
+
+    auto llvmLhs = adaptor.getLhs();
+    auto llvmRhs = adaptor.getRhs();
+    auto llvmResultTy = getTypeConverter()->convertType(resultTy);
+    auto callIntrinsicOp =
+        createCallLLVMIntrinsicOp(rewriter, op.getLoc(), llvmIntrinsicName,
+                                  llvmResultTy, {llvmLhs, llvmRhs});
+
+    rewriter.replaceOp(op, callIntrinsicOp);
+    return mlir::success();
+  }
+
+private:
+  static std::string getLLVMIntrinsicName(bool signedCmp, unsigned operandWidth,
+                                          unsigned resultWidth) {
+    // The intrinsic's name takes the form:
+    // `llvm.<scmp|ucmp>.i<resultWidth>.i<operandWidth>`
+
+    std::string result = "llvm.";
+
+    if (signedCmp)
+      result.append("scmp.");
+    else
+      result.append("ucmp.");
+
+    // Result type part.
+    result.push_back('i');
+    result.append(std::to_string(resultWidth));
+    result.push_back('.');
+
+    // Operand type part.
+    result.push_back('i');
+    result.append(std::to_string(operandWidth));
+
+    return result;
+  }
+};
+
+class CIRClearCacheOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ClearCacheOp> {
+public:
+  using OpConversionPattern<mlir::cir::ClearCacheOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ClearCacheOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto begin = adaptor.getBegin();
+    auto end = adaptor.getEnd();
+    auto intrinNameAttr =
+        mlir::StringAttr::get(op.getContext(), "llvm.clear_cache");
+    rewriter.replaceOpWithNewOp<mlir::LLVM::CallIntrinsicOp>(
+        op, mlir::Type{}, intrinNameAttr, mlir::ValueRange{begin, end});
+
+    return mlir::success();
+  }
+};
+
+class CIRUndefOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::UndefOp> {
+
+  using mlir::OpConversionPattern<mlir::cir::UndefOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::UndefOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto typ = getTypeConverter()->convertType(op.getRes().getType());
+
+    rewriter.replaceOpWithNewOp<mlir::LLVM::UndefOp>(op, typ);
+    return mlir::success();
+  }
+};
+
+class CIREhTypeIdOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::EhTypeIdOp> {
+public:
+  using OpConversionPattern<mlir::cir::EhTypeIdOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::EhTypeIdOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Value addrOp = rewriter.create<mlir::LLVM::AddressOfOp>(
+        op.getLoc(), mlir::LLVM::LLVMPointerType::get(rewriter.getContext()),
+        op.getTypeSymAttr());
+    mlir::LLVM::CallIntrinsicOp newOp = createCallLLVMIntrinsicOp(
+        rewriter, op.getLoc(), "llvm.eh.typeid.for.p0", rewriter.getI32Type(),
+        mlir::ValueRange{addrOp});
+    rewriter.replaceOp(op, newOp);
+    return mlir::success();
+  }
+};
+
+class CIRCatchParamOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::CatchParamOp> {
+public:
+  using OpConversionPattern<mlir::cir::CatchParamOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::CatchParamOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    if (op.isBegin()) {
+      // Get or create `declare ptr @__cxa_begin_catch(ptr)`
+      StringRef fnName = "__cxa_begin_catch";
+      auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext());
+      auto fnTy = mlir::LLVM::LLVMFunctionType::get(llvmPtrTy, {llvmPtrTy},
+                                                    /*isVarArg=*/false);
+      getOrCreateLLVMFuncOp(rewriter, op, fnName, fnTy);
+      rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
+          op, mlir::TypeRange{llvmPtrTy}, fnName,
+          mlir::ValueRange{adaptor.getExceptionPtr()});
+      return mlir::success();
+    } else if (op.isEnd()) {
+      StringRef fnName = "__cxa_end_catch";
+      auto fnTy = mlir::LLVM::LLVMFunctionType::get(
+          mlir::LLVM::LLVMVoidType::get(rewriter.getContext()), {},
+          /*isVarArg=*/false);
+      getOrCreateLLVMFuncOp(rewriter, op, fnName, fnTy);
+      rewriter.create<mlir::LLVM::CallOp>(op.getLoc(), mlir::TypeRange{},
+                                          fnName, mlir::ValueRange{});
+      rewriter.eraseOp(op);
+      return mlir::success();
+    }
+    llvm_unreachable("only begin/end supposed to make to lowering stage");
+    return mlir::failure();
+  }
+};
+
+class CIRResumeOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ResumeOp> {
+public:
+  using OpConversionPattern<mlir::cir::ResumeOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ResumeOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // %lpad.val = insertvalue { ptr, i32 } poison, ptr %exception_ptr, 0
+    // %lpad.val2 = insertvalue { ptr, i32 } %lpad.val, i32 %selector, 1
+    // resume { ptr, i32 } %lpad.val2
+    SmallVector<int64_t> slotIdx = {0};
+    SmallVector<int64_t> selectorIdx = {1};
+    auto llvmLandingPadStructTy = getLLVMLandingPadStructTy(rewriter);
+    mlir::Value poison = rewriter.create<mlir::LLVM::PoisonOp>(
+        op.getLoc(), llvmLandingPadStructTy);
+
+    mlir::Value slot = rewriter.create<mlir::LLVM::InsertValueOp>(
+        op.getLoc(), poison, adaptor.getExceptionPtr(), slotIdx);
+    mlir::Value selector = rewriter.create<mlir::LLVM::InsertValueOp>(
+        op.getLoc(), slot, adaptor.getTypeId(), selectorIdx);
+
+    rewriter.replaceOpWithNewOp<mlir::LLVM::ResumeOp>(op, selector);
+    return mlir::success();
+  }
+};
+
+class CIRAllocExceptionOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::AllocExceptionOp> {
+public:
+  using OpConversionPattern<mlir::cir::AllocExceptionOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::AllocExceptionOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // Get or create `declare ptr @__cxa_allocate_exception(i64)`
+    StringRef fnName = "__cxa_allocate_exception";
+    auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext());
+    auto int64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
+    auto fnTy = mlir::LLVM::LLVMFunctionType::get(llvmPtrTy, {int64Ty},
+                                                  /*isVarArg=*/false);
+    getOrCreateLLVMFuncOp(rewriter, op, fnName, fnTy);
+    auto size = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(),
+                                                        adaptor.getSizeAttr());
+    rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
+        op, mlir::TypeRange{llvmPtrTy}, fnName, mlir::ValueRange{size});
+    return mlir::success();
+  }
+};
+
+class CIRThrowOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ThrowOp> {
+public:
+  using OpConversionPattern<mlir::cir::ThrowOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ThrowOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // Get or create `declare void @__cxa_throw(ptr, ptr, ptr)`
+    StringRef fnName = "__cxa_throw";
+    auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext());
+    auto voidTy = mlir::LLVM::LLVMVoidType::get(rewriter.getContext());
+    auto fnTy = mlir::LLVM::LLVMFunctionType::get(
+        voidTy, {llvmPtrTy, llvmPtrTy, llvmPtrTy},
+        /*isVarArg=*/false);
+    getOrCreateLLVMFuncOp(rewriter, op, fnName, fnTy);
+    mlir::Value typeInfo = rewriter.create<mlir::LLVM::AddressOfOp>(
+        op.getLoc(), mlir::LLVM::LLVMPointerType::get(rewriter.getContext()),
+        adaptor.getTypeInfoAttr());
+
+    mlir::Value dtor;
+    if (op.getDtor()) {
+      dtor = rewriter.create<mlir::LLVM::AddressOfOp>(
+          op.getLoc(), mlir::LLVM::LLVMPointerType::get(rewriter.getContext()),
+          adaptor.getDtorAttr());
+    } else {
+      dtor = rewriter.create<mlir::LLVM::ZeroOp>(
+          op.getLoc(), mlir::LLVM::LLVMPointerType::get(rewriter.getContext()));
+    }
+    rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
+        op, mlir::TypeRange{}, fnName,
+        mlir::ValueRange{adaptor.getExceptionPtr(), typeInfo, dtor});
+    return mlir::success();
+  }
+};
+
+void populateCIRToLLVMConversionPatterns(mlir::RewritePatternSet &patterns,
+                                         mlir::TypeConverter &converter,
+                                         mlir::DataLayout &dataLayout) {
+  patterns.add<CIRReturnLowering>(patterns.getContext());
+  patterns.add<CIRAllocaLowering>(converter, dataLayout, patterns.getContext());
+  patterns.add<
+      CIRCmpOpLowering, CIRSelectOpLowering, CIRBitClrsbOpLowering,
+      CIRBitClzOpLowering, CIRBitCtzOpLowering, CIRBitFfsOpLowering,
+      CIRBitParityOpLowering, CIRBitPopcountOpLowering,
+      CIRAtomicCmpXchgLowering, CIRAtomicXchgLowering, CIRAtomicFetchLowering,
+      CIRByteswapOpLowering, CIRRotateOpLowering, CIRBrCondOpLowering,
+      CIRPtrStrideOpLowering, CIRCallLowering, CIRTryCallLowering,
+      CIREhInflightOpLowering, CIRUnaryOpLowering, CIRBinOpLowering,
+      CIRBinOpOverflowOpLowering, CIRShiftOpLowering, CIRLoadLowering,
+      CIRConstantLowering, CIRStoreLowering, CIRFuncLowering, CIRCastOpLowering,
+      CIRGlobalOpLowering, CIRGetGlobalOpLowering, CIRComplexCreateOpLowering,
+      CIRComplexRealOpLowering, CIRComplexImagOpLowering,
+      CIRComplexRealPtrOpLowering, CIRComplexImagPtrOpLowering,
+      CIRVAStartLowering, CIRVAEndLowering, CIRVACopyLowering, CIRVAArgLowering,
+      CIRBrOpLowering, CIRGetMemberOpLowering, CIRGetRuntimeMemberOpLowering,
+      CIRSwitchFlatOpLowering, CIRPtrDiffOpLowering, CIRCopyOpLowering,
+      CIRMemCpyOpLowering, CIRFAbsOpLowering, CIRExpectOpLowering,
+      CIRVTableAddrPointOpLowering, CIRVectorCreateLowering,
+      CIRVectorCmpOpLowering, CIRVectorSplatLowering, CIRVectorTernaryLowering,
+      CIRVectorShuffleIntsLowering, CIRVectorShuffleVecLowering,
+      CIRStackSaveLowering, CIRUnreachableLowering, CIRTrapLowering,
+      CIRInlineAsmOpLowering, CIRSetBitfieldLowering, CIRGetBitfieldLowering,
+      CIRPrefetchLowering, CIRObjSizeOpLowering, CIRIsConstantOpLowering,
+      CIRCmpThreeWayOpLowering, CIRClearCacheOpLowering, CIRUndefOpLowering,
+      CIREhTypeIdOpLowering, CIRCatchParamOpLowering, CIRResumeOpLowering,
+      CIRAllocExceptionOpLowering, CIRThrowOpLowering, CIRIntrinsicCallLowering
+#define GET_BUILTIN_LOWERING_LIST
+#include "clang/CIR/Dialect/IR/CIRBuiltinsLowering.inc"
+#undef GET_BUILTIN_LOWERING_LIST
+      >(converter, patterns.getContext());
+}
+
+namespace {
+
+std::unique_ptr<mlir::cir::LowerModule>
+prepareLowerModule(mlir::ModuleOp module) {
+  mlir::PatternRewriter rewriter{module->getContext()};
+  // If the triple is not present, e.g. CIR modules parsed from text, we
+  // cannot init LowerModule properly.
+  assert(!::cir::MissingFeatures::makeTripleAlwaysPresent());
+  if (!module->hasAttr("cir.triple"))
+    return {};
+  return mlir::cir::createLowerModule(module, rewriter);
+}
+
+// FIXME: change the type of lowerModule to `LowerModule &` to have better
+// lambda capturing experience. Also blocked by makeTripleAlwaysPresent.
+void prepareTypeConverter(mlir::LLVMTypeConverter &converter,
+                          mlir::DataLayout &dataLayout,
+                          mlir::cir::LowerModule *lowerModule) {
+  converter.addConversion([&, lowerModule](
+                              mlir::cir::PointerType type) -> mlir::Type {
+    // Drop pointee type since LLVM dialect only allows opaque pointers.
+
+    auto addrSpace =
+        mlir::cast_if_present<mlir::cir::AddressSpaceAttr>(type.getAddrSpace());
+    // Null addrspace attribute indicates the default addrspace.
+    if (!addrSpace)
+      return mlir::LLVM::LLVMPointerType::get(type.getContext());
+
+    assert(lowerModule && "CIR AS map is not available");
+    // Pass through target addrspace and map CIR addrspace to LLVM addrspace by
+    // querying the target info.
+    unsigned targetAS =
+        addrSpace.isTarget()
+            ? addrSpace.getTargetValue()
+            : lowerModule->getTargetLoweringInfo()
+                  .getTargetAddrSpaceFromCIRAddrSpace(addrSpace);
+
+    return mlir::LLVM::LLVMPointerType::get(type.getContext(), targetAS);
+  });
+  converter.addConversion([&](mlir::cir::DataMemberType type) -> mlir::Type {
+    return mlir::IntegerType::get(type.getContext(),
+                                  dataLayout.getTypeSizeInBits(type));
+  });
+  converter.addConversion([&](mlir::cir::ArrayType type) -> mlir::Type {
+    auto ty = converter.convertType(type.getEltType());
+    return mlir::LLVM::LLVMArrayType::get(ty, type.getSize());
+  });
+  converter.addConversion([&](mlir::cir::VectorType type) -> mlir::Type {
+    auto ty = converter.convertType(type.getEltType());
+    return mlir::LLVM::getFixedVectorType(ty, type.getSize());
+  });
+  converter.addConversion([&](mlir::cir::BoolType type) -> mlir::Type {
+    return mlir::IntegerType::get(type.getContext(), 8,
+                                  mlir::IntegerType::Signless);
+  });
+  converter.addConversion([&](mlir::cir::IntType type) -> mlir::Type {
+    // LLVM doesn't work with signed types, so we drop the CIR signs here.
+    return mlir::IntegerType::get(type.getContext(), type.getWidth());
+  });
+  converter.addConversion([&](mlir::cir::SingleType type) -> mlir::Type {
+    return mlir::FloatType::getF32(type.getContext());
+  });
+  converter.addConversion([&](mlir::cir::DoubleType type) -> mlir::Type {
+    return mlir::FloatType::getF64(type.getContext());
+  });
+  converter.addConversion([&](mlir::cir::FP80Type type) -> mlir::Type {
+    return mlir::FloatType::getF80(type.getContext());
+  });
+  converter.addConversion([&](mlir::cir::LongDoubleType type) -> mlir::Type {
+    return converter.convertType(type.getUnderlying());
+  });
+  converter.addConversion([&](mlir::cir::FP16Type type) -> mlir::Type {
+    return mlir::FloatType::getF16(type.getContext());
+  });
+  converter.addConversion([&](mlir::cir::BF16Type type) -> mlir::Type {
+    return mlir::FloatType::getBF16(type.getContext());
+  });
+  converter.addConversion([&](mlir::cir::ComplexType type) -> mlir::Type {
+    // A complex type is lowered to an LLVM struct that contains the real and
+    // imaginary part as data fields.
+    mlir::Type elementTy = converter.convertType(type.getElementTy());
+    mlir::Type structFields[2] = {elementTy, elementTy};
+    return mlir::LLVM::LLVMStructType::getLiteral(type.getContext(),
+                                                  structFields);
+  });
+  converter.addConversion([&](mlir::cir::FuncType type) -> mlir::Type {
+    auto result = converter.convertType(type.getReturnType());
+    llvm::SmallVector<mlir::Type> arguments;
+    if (converter.convertTypes(type.getInputs(), arguments).failed())
+      llvm_unreachable("Failed to convert function type parameters");
+    auto varArg = type.isVarArg();
+    return mlir::LLVM::LLVMFunctionType::get(result, arguments, varArg);
+  });
+  converter.addConversion([&](mlir::cir::StructType type) -> mlir::Type {
+    // FIXME(cir): create separate unions, struct, and classes types.
+    // Convert struct members.
+    llvm::SmallVector<mlir::Type> llvmMembers;
+    switch (type.getKind()) {
+    case mlir::cir::StructType::Class:
+      // TODO(cir): This should be properly validated.
+    case mlir::cir::StructType::Struct:
+      for (auto ty : type.getMembers())
+        llvmMembers.push_back(converter.convertType(ty));
+      break;
+    // Unions are lowered as only the largest member.
+    case mlir::cir::StructType::Union: {
+      auto largestMember = type.getLargestMember(dataLayout);
+      if (largestMember)
+        llvmMembers.push_back(converter.convertType(largestMember));
+      break;
+    }
+    }
+
+    // Struct has a name: lower as an identified struct.
+    mlir::LLVM::LLVMStructType llvmStruct;
+    if (type.getName()) {
+      llvmStruct = mlir::LLVM::LLVMStructType::getIdentified(
+          type.getContext(), type.getPrefixedName());
+      if (llvmStruct.setBody(llvmMembers, /*isPacked=*/type.getPacked())
+              .failed())
+        llvm_unreachable("Failed to set body of struct");
+    } else { // Struct has no name: lower as literal struct.
+      llvmStruct = mlir::LLVM::LLVMStructType::getLiteral(
+          type.getContext(), llvmMembers, /*isPacked=*/type.getPacked());
+    }
+
+    return llvmStruct;
+  });
+  converter.addConversion([&](mlir::cir::VoidType type) -> mlir::Type {
+    return mlir::LLVM::LLVMVoidType::get(type.getContext());
+  });
+}
+} // namespace
+
+static void buildCtorDtorList(
+    mlir::ModuleOp module, StringRef globalXtorName, StringRef llvmXtorName,
+    llvm::function_ref<std::pair<StringRef, int>(mlir::Attribute)> createXtor) {
+  llvm::SmallVector<std::pair<StringRef, int>, 2> globalXtors;
+  for (auto namedAttr : module->getAttrs()) {
+    if (namedAttr.getName() == globalXtorName) {
+      for (auto attr : mlir::cast<mlir::ArrayAttr>(namedAttr.getValue()))
+        globalXtors.emplace_back(createXtor(attr));
+      break;
+    }
+  }
+
+  if (globalXtors.empty())
+    return;
+
+  mlir::OpBuilder builder(module.getContext());
+  builder.setInsertionPointToEnd(&module.getBodyRegion().back());
+
+  // Create a global array llvm.global_ctors with element type of
+  // struct { i32, ptr, ptr }
+  auto CtorPFTy = mlir::LLVM::LLVMPointerType::get(builder.getContext());
+  llvm::SmallVector<mlir::Type> CtorStructFields;
+  CtorStructFields.push_back(builder.getI32Type());
+  CtorStructFields.push_back(CtorPFTy);
+  CtorStructFields.push_back(CtorPFTy);
+
+  auto CtorStructTy = mlir::LLVM::LLVMStructType::getLiteral(
+      builder.getContext(), CtorStructFields);
+  auto CtorStructArrayTy =
+      mlir::LLVM::LLVMArrayType::get(CtorStructTy, globalXtors.size());
+
+  auto loc = module.getLoc();
+  auto newGlobalOp = builder.create<mlir::LLVM::GlobalOp>(
+      loc, CtorStructArrayTy, true, mlir::LLVM::Linkage::Appending,
+      llvmXtorName, mlir::Attribute());
+
+  newGlobalOp.getRegion().push_back(new mlir::Block());
+  builder.setInsertionPointToEnd(newGlobalOp.getInitializerBlock());
+
+  mlir::Value result =
+      builder.create<mlir::LLVM::UndefOp>(loc, CtorStructArrayTy);
+
+  for (uint64_t I = 0; I < globalXtors.size(); I++) {
+    auto fn = globalXtors[I];
+    mlir::Value structInit =
+        builder.create<mlir::LLVM::UndefOp>(loc, CtorStructTy);
+    mlir::Value initPriority = builder.create<mlir::LLVM::ConstantOp>(
+        loc, CtorStructFields[0], fn.second);
+    mlir::Value initFuncAddr = builder.create<mlir::LLVM::AddressOfOp>(
+        loc, CtorStructFields[1], fn.first);
+    mlir::Value initAssociate =
+        builder.create<mlir::LLVM::ZeroOp>(loc, CtorStructFields[2]);
+    structInit = builder.create<mlir::LLVM::InsertValueOp>(loc, structInit,
+                                                           initPriority, 0);
+    structInit = builder.create<mlir::LLVM::InsertValueOp>(loc, structInit,
+                                                           initFuncAddr, 1);
+    // TODO: handle associated data for initializers.
+    structInit = builder.create<mlir::LLVM::InsertValueOp>(loc, structInit,
+                                                           initAssociate, 2);
+    result =
+        builder.create<mlir::LLVM::InsertValueOp>(loc, result, structInit, I);
+  }
+
+  builder.create<mlir::LLVM::ReturnOp>(loc, result);
+}
+
+// The unreachable code is not lowered by applyPartialConversion function
+// since it traverses blocks in the dominance order. At the same time we
+// do need to lower such code - otherwise verification errors occur.
+// For instance, the next CIR code:
+//
+//    cir.func @foo(%arg0: !s32i) -> !s32i {
+//      %4 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+//      cir.if %4 {
+//        %5 = cir.const #cir.int<1> : !s32i
+//        cir.return %5 : !s32i
+//      } else {
+//        %5 = cir.const #cir.int<0> : !s32i
+//       cir.return %5 : !s32i
+//      }
+//     cir.return %arg0 : !s32i
+//    }
+//
+// contains an unreachable return operation (the last one). After the flattening
+// pass it will be placed into the unreachable block. And the possible error
+// after the lowering pass is: error: 'cir.return' op expects parent op to be
+// one of 'cir.func, cir.scope, cir.if ... The reason that this operation was
+// not lowered and the new parent is llvm.func.
+//
+// In the future we may want to get rid of this function and use DCE pass or
+// something similar. But now we need to guarantee the absence of the dialect
+// verification errors.
+void collect_unreachable(mlir::Operation *parent,
+                         llvm::SmallVector<mlir::Operation *> &ops) {
+
+  llvm::SmallVector<mlir::Block *> unreachable_blocks;
+  parent->walk([&](mlir::Block *blk) { // check
+    if (blk->hasNoPredecessors() && !blk->isEntryBlock())
+      unreachable_blocks.push_back(blk);
+  });
+
+  std::set<mlir::Block *> visited;
+  for (auto *root : unreachable_blocks) {
+    // We create a work list for each unreachable block.
+    // Thus we traverse operations in some order.
+    std::deque<mlir::Block *> workList;
+    workList.push_back(root);
+
+    while (!workList.empty()) {
+      auto *blk = workList.back();
+      workList.pop_back();
+      if (visited.count(blk))
+        continue;
+      visited.emplace(blk);
+
+      for (auto &op : *blk)
+        ops.push_back(&op);
+
+      for (auto it = blk->succ_begin(); it != blk->succ_end(); ++it)
+        workList.push_back(*it);
+    }
+  }
+}
+
+// Create a string global for annotation related string.
+mlir::LLVM::GlobalOp
+getAnnotationStringGlobal(mlir::StringAttr strAttr, mlir::ModuleOp &module,
+                          llvm::StringMap<mlir::LLVM::GlobalOp> &globalsMap,
+                          mlir::OpBuilder &globalVarBuilder,
+                          mlir::Location &loc, bool isArg = false) {
+  llvm::StringRef str = strAttr.getValue();
+  if (!globalsMap.contains(str)) {
+    auto llvmStrTy = mlir::LLVM::LLVMArrayType::get(
+        mlir::IntegerType::get(module.getContext(), 8), str.size() + 1);
+    auto strGlobalOp = globalVarBuilder.create<mlir::LLVM::GlobalOp>(
+        loc, llvmStrTy,
+        /*isConstant=*/true, mlir::LLVM::Linkage::Private,
+        ".str" +
+            (globalsMap.empty() ? ""
+                                : "." + std::to_string(globalsMap.size())) +
+            ".annotation" + (isArg ? ".arg" : ""),
+        mlir::StringAttr::get(module.getContext(), std::string(str) + '\0'),
+        /*alignment=*/isArg ? 1 : 0);
+    if (!isArg)
+      strGlobalOp.setSection(ConvertCIRToLLVMPass::annotationSection);
+    strGlobalOp.setUnnamedAddr(mlir::LLVM::UnnamedAddr::Global);
+    strGlobalOp.setDsoLocal(true);
+    globalsMap[str] = strGlobalOp;
+  }
+  return globalsMap[str];
+}
+
+mlir::Value lowerAnnotationValue(
+    mlir::ArrayAttr annotValue, mlir::ModuleOp &module,
+    mlir::OpBuilder &varInitBuilder, mlir::OpBuilder &globalVarBuilder,
+    llvm::StringMap<mlir::LLVM::GlobalOp> &stringGlobalsMap,
+    llvm::StringMap<mlir::LLVM::GlobalOp> &argStringGlobalsMap,
+    llvm::MapVector<mlir::ArrayAttr, mlir::LLVM::GlobalOp> &argsVarMap,
+    llvm::SmallVector<mlir::Type> &annoStructFields,
+    mlir::LLVM::LLVMStructType &annoStructTy,
+    mlir::LLVM::LLVMPointerType &annoPtrTy, mlir::Location &loc) {
+  mlir::Value valueEntry =
+      varInitBuilder.create<mlir::LLVM::UndefOp>(loc, annoStructTy);
+  auto globalValueName = mlir::cast<mlir::StringAttr>(annotValue[0]);
+  mlir::Operation *globalValue =
+      mlir::SymbolTable::lookupSymbolIn(module, globalValueName);
+  // The first field is ptr to the global value
+  auto globalValueFld = varInitBuilder.create<mlir::LLVM::AddressOfOp>(
+      loc, annoPtrTy, globalValueName);
+
+  valueEntry = varInitBuilder.create<mlir::LLVM::InsertValueOp>(
+      loc, valueEntry, globalValueFld, 0);
+  mlir::cir::AnnotationAttr annotation =
+      mlir::cast<mlir::cir::AnnotationAttr>(annotValue[1]);
+
+  // The second field is ptr to the annotation name
+  mlir::StringAttr annotationName = annotation.getName();
+  auto annotationNameFld = varInitBuilder.create<mlir::LLVM::AddressOfOp>(
+      loc, annoPtrTy,
+      getAnnotationStringGlobal(annotationName, module, stringGlobalsMap,
+                                globalVarBuilder, loc)
+          .getSymName());
+
+  valueEntry = varInitBuilder.create<mlir::LLVM::InsertValueOp>(
+      loc, valueEntry, annotationNameFld, 1);
+
+  // The third field is ptr to the translation unit name,
+  // and the fourth field is the line number
+  auto annotLoc = globalValue->getLoc();
+  if (mlir::isa<mlir::FusedLoc>(annotLoc)) {
+    auto FusedLoc = mlir::cast<mlir::FusedLoc>(annotLoc);
+    annotLoc = FusedLoc.getLocations()[0];
+  }
+  auto annotFileLoc = mlir::cast<mlir::FileLineColLoc>(annotLoc);
+  assert(annotFileLoc && "annotation value has to be FileLineColLoc");
+  // To be consistent with clang code gen, we add trailing null char
+  auto fileName = mlir::StringAttr::get(
+      module.getContext(), std::string(annotFileLoc.getFilename().getValue()));
+  auto fileNameFld = varInitBuilder.create<mlir::LLVM::AddressOfOp>(
+      loc, annoPtrTy,
+      getAnnotationStringGlobal(fileName, module, stringGlobalsMap,
+                                globalVarBuilder, loc)
+          .getSymName());
+  valueEntry = varInitBuilder.create<mlir::LLVM::InsertValueOp>(loc, valueEntry,
+                                                                fileNameFld, 2);
+  unsigned int lineNo = annotFileLoc.getLine();
+  auto lineNoFld = varInitBuilder.create<mlir::LLVM::ConstantOp>(
+      loc, annoStructFields[3], lineNo);
+  valueEntry = varInitBuilder.create<mlir::LLVM::InsertValueOp>(loc, valueEntry,
+                                                                lineNoFld, 3);
+  // The fifth field is ptr to the annotation args var, it could be null
+  if (annotation.isNoArgs()) {
+    auto nullPtrFld = varInitBuilder.create<mlir::LLVM::ZeroOp>(loc, annoPtrTy);
+    valueEntry = varInitBuilder.create<mlir::LLVM::InsertValueOp>(
+        loc, valueEntry, nullPtrFld, 4);
+  } else {
+    mlir::ArrayAttr argsAttr = annotation.getArgs();
+    // First time we see this argsAttr, create a global for it
+    // and build its initializer
+    if (!argsVarMap.contains(argsAttr)) {
+      llvm::SmallVector<mlir::Type> argStrutFldTypes;
+      llvm::SmallVector<mlir::Value> argStrutFields;
+      for (mlir::Attribute arg : annotation.getArgs()) {
+        if (auto strArgAttr = mlir::dyn_cast<mlir::StringAttr>(arg)) {
+          // Call getAnnotationStringGlobal here to make sure
+          // have a global for this string before
+          // creation of the args var.
+          getAnnotationStringGlobal(strArgAttr, module, argStringGlobalsMap,
+                                    globalVarBuilder, loc, true);
+          // This will become a ptr to the global string
+          argStrutFldTypes.push_back(annoPtrTy);
+        } else if (auto intArgAttr = mlir::dyn_cast<mlir::IntegerAttr>(arg)) {
+          argStrutFldTypes.push_back(intArgAttr.getType());
+        } else {
+          llvm_unreachable("Unsupported annotation arg type");
+        }
+      }
+
+      mlir::LLVM::LLVMStructType argsStructTy =
+          mlir::LLVM::LLVMStructType::getLiteral(globalVarBuilder.getContext(),
+                                                 argStrutFldTypes);
+      auto argsGlobalOp = globalVarBuilder.create<mlir::LLVM::GlobalOp>(
+          loc, argsStructTy, true, mlir::LLVM::Linkage::Private,
+          ".args" +
+              (argsVarMap.empty() ? ""
+                                  : "." + std::to_string(argsVarMap.size())) +
+              ".annotation",
+          mlir::Attribute());
+      argsGlobalOp.setSection(ConvertCIRToLLVMPass::annotationSection);
+      argsGlobalOp.setUnnamedAddr(mlir::LLVM::UnnamedAddr::Global);
+      argsGlobalOp.setDsoLocal(true);
+
+      // Create the initializer for this args global
+      argsGlobalOp.getRegion().push_back(new mlir::Block());
+      mlir::OpBuilder argsInitBuilder(module.getContext());
+      argsInitBuilder.setInsertionPointToEnd(
+          argsGlobalOp.getInitializerBlock());
+
+      mlir::Value argsStructInit =
+          argsInitBuilder.create<mlir::LLVM::UndefOp>(loc, argsStructTy);
+      int idx = 0;
+      for (mlir::Attribute arg : annotation.getArgs()) {
+        if (auto strArgAttr = mlir::dyn_cast<mlir::StringAttr>(arg)) {
+          // This would be simply return with existing map entry value
+          // from argStringGlobalsMap as string global is already
+          // created in the previous loop.
+          mlir::LLVM::GlobalOp argStrVar =
+              getAnnotationStringGlobal(strArgAttr, module, argStringGlobalsMap,
+                                        globalVarBuilder, loc, true);
+          auto argStrVarAddr = argsInitBuilder.create<mlir::LLVM::AddressOfOp>(
+              loc, annoPtrTy, argStrVar.getSymName());
+          argsStructInit = argsInitBuilder.create<mlir::LLVM::InsertValueOp>(
+              loc, argsStructInit, argStrVarAddr, idx++);
+        } else if (auto intArgAttr = mlir::dyn_cast<mlir::IntegerAttr>(arg)) {
+          auto intArgFld = argsInitBuilder.create<mlir::LLVM::ConstantOp>(
+              loc, intArgAttr.getType(), intArgAttr.getValue());
+          argsStructInit = argsInitBuilder.create<mlir::LLVM::InsertValueOp>(
+              loc, argsStructInit, intArgFld, idx++);
+        } else {
+          llvm_unreachable("Unsupported annotation arg type");
+        }
+      }
+      argsInitBuilder.create<mlir::LLVM::ReturnOp>(loc, argsStructInit);
+      argsVarMap[argsAttr] = argsGlobalOp;
+    }
+    auto argsVarView = varInitBuilder.create<mlir::LLVM::AddressOfOp>(
+        loc, annoPtrTy, argsVarMap[argsAttr].getSymName());
+    valueEntry = varInitBuilder.create<mlir::LLVM::InsertValueOp>(
+        loc, valueEntry, argsVarView, 4);
+  }
+  return valueEntry;
+}
+
+void ConvertCIRToLLVMPass::buildGlobalAnnotationsVar() {
+  mlir::ModuleOp module = getOperation();
+  mlir::Attribute attr = module->getAttr("cir.global_annotations");
+  if (!attr)
+    return;
+  if (auto globalAnnotValues =
+          mlir::dyn_cast<mlir::cir::GlobalAnnotationValuesAttr>(attr)) {
+    auto annotationValuesArray =
+        mlir::dyn_cast<mlir::ArrayAttr>(globalAnnotValues.getAnnotations());
+    if (!annotationValuesArray || annotationValuesArray.empty())
+      return;
+    mlir::OpBuilder globalVarBuilder(module.getContext());
+    globalVarBuilder.setInsertionPointToEnd(&module.getBodyRegion().front());
+
+    // Create a global array for annotation values with element type of
+    // struct { ptr, ptr, ptr, i32, ptr }
+    mlir::LLVM::LLVMPointerType annoPtrTy =
+        mlir::LLVM::LLVMPointerType::get(globalVarBuilder.getContext());
+    llvm::SmallVector<mlir::Type> annoStructFields;
+    annoStructFields.push_back(annoPtrTy);
+    annoStructFields.push_back(annoPtrTy);
+    annoStructFields.push_back(annoPtrTy);
+    annoStructFields.push_back(globalVarBuilder.getI32Type());
+    annoStructFields.push_back(annoPtrTy);
+
+    mlir::LLVM::LLVMStructType annoStructTy =
+        mlir::LLVM::LLVMStructType::getLiteral(globalVarBuilder.getContext(),
+                                               annoStructFields);
+    mlir::LLVM::LLVMArrayType annoStructArrayTy =
+        mlir::LLVM::LLVMArrayType::get(annoStructTy,
+                                       annotationValuesArray.size());
+    mlir::Location loc = module.getLoc();
+    auto annotationGlobalOp = globalVarBuilder.create<mlir::LLVM::GlobalOp>(
+        loc, annoStructArrayTy, false, mlir::LLVM::Linkage::Appending,
+        "llvm.global.annotations", mlir::Attribute());
+    annotationGlobalOp.setSection("llvm.metadata");
+    annotationGlobalOp.getRegion().push_back(new mlir::Block());
+    mlir::OpBuilder varInitBuilder(module.getContext());
+    varInitBuilder.setInsertionPointToEnd(
+        annotationGlobalOp.getInitializerBlock());
+    // Globals created for annotation strings and args to be
+    // placed before the var llvm.global.annotations.
+    // This is consistent with clang code gen.
+    globalVarBuilder.setInsertionPoint(annotationGlobalOp);
+
+    mlir::Value result =
+        varInitBuilder.create<mlir::LLVM::UndefOp>(loc, annoStructArrayTy);
+    // Track globals created for annotation related strings
+    llvm::StringMap<mlir::LLVM::GlobalOp> stringGlobalsMap;
+    // Track globals created for annotation arg related strings.
+    // They are different from annotation strings, as strings used in args
+    // are not in annotationSection, and also has aligment 1.
+    llvm::StringMap<mlir::LLVM::GlobalOp> argStringGlobalsMap;
+    // Track globals created for annotation args.
+    llvm::MapVector<mlir::ArrayAttr, mlir::LLVM::GlobalOp> argsVarMap;
+
+    int idx = 0;
+    for (mlir::Attribute entry : annotationValuesArray) {
+      auto annotValue = cast<mlir::ArrayAttr>(entry);
+      mlir::Value init = lowerAnnotationValue(
+          annotValue, module, varInitBuilder, globalVarBuilder,
+          stringGlobalsMap, argStringGlobalsMap, argsVarMap, annoStructFields,
+          annoStructTy, annoPtrTy, loc);
+      result = varInitBuilder.create<mlir::LLVM::InsertValueOp>(loc, result,
+                                                                init, idx++);
+    }
+    varInitBuilder.create<mlir::LLVM::ReturnOp>(loc, result);
+  }
+}
+
+void ConvertCIRToLLVMPass::runOnOperation() {
+  auto module = getOperation();
+  mlir::DataLayout dataLayout(module);
+  mlir::LLVMTypeConverter converter(&getContext());
+  std::unique_ptr<mlir::cir::LowerModule> lowerModule =
+      prepareLowerModule(module);
+  prepareTypeConverter(converter, dataLayout, lowerModule.get());
+
+  mlir::RewritePatternSet patterns(&getContext());
+
+  populateCIRToLLVMConversionPatterns(patterns, converter, dataLayout);
+  mlir::populateFuncToLLVMConversionPatterns(converter, patterns);
+
+  mlir::ConversionTarget target(getContext());
+  using namespace mlir::cir;
+  // clang-format off
+  target.addLegalOp<mlir::ModuleOp
+                    // ,AllocaOp
+                    // ,BrCondOp
+                    // ,BrOp
+                    // ,CallOp
+                    // ,CastOp
+                    // ,CmpOp
+                    // ,ConstantOp
+                    // ,FuncOp
+                    // ,LoadOp
+                    // ,ReturnOp
+                    // ,StoreOp
+                    // ,YieldOp
+                    >();
+  // clang-format on
+  target.addLegalDialect<mlir::LLVM::LLVMDialect>();
+  target.addIllegalDialect<mlir::BuiltinDialect, mlir::cir::CIRDialect,
+                           mlir::func::FuncDialect>();
+
+  // Allow operations that will be lowered directly to LLVM IR.
+  target.addLegalOp<mlir::LLVM::ZeroOp>();
+
+  getOperation()->removeAttr("cir.sob");
+  getOperation()->removeAttr("cir.lang");
+
+  llvm::SmallVector<mlir::Operation *> ops;
+  ops.push_back(module);
+  collect_unreachable(module, ops);
+
+  if (failed(applyPartialConversion(ops, target, std::move(patterns))))
+    signalPassFailure();
+
+  // Emit the llvm.global_ctors array.
+  buildCtorDtorList(
+      module, "cir.global_ctors", "llvm.global_ctors",
+      [](mlir::Attribute attr) {
+        assert(mlir::isa<mlir::cir::GlobalCtorAttr>(attr) &&
+               "must be a GlobalCtorAttr");
+        auto ctorAttr = mlir::cast<mlir::cir::GlobalCtorAttr>(attr);
+        return std::make_pair(ctorAttr.getName(), ctorAttr.getPriority());
+      });
+  // Emit the llvm.global_dtors array.
+  buildCtorDtorList(
+      module, "cir.global_dtors", "llvm.global_dtors",
+      [](mlir::Attribute attr) {
+        assert(mlir::isa<mlir::cir::GlobalDtorAttr>(attr) &&
+               "must be a GlobalDtorAttr");
+        auto dtorAttr = mlir::cast<mlir::cir::GlobalDtorAttr>(attr);
+        return std::make_pair(dtorAttr.getName(), dtorAttr.getPriority());
+      });
+  buildGlobalAnnotationsVar();
+}
+
+std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() {
+  return std::make_unique<ConvertCIRToLLVMPass>();
+}
+
+void populateCIRToLLVMPasses(mlir::OpPassManager &pm) {
+  populateCIRPreLoweringPasses(pm);
+  pm.addPass(createConvertCIRToLLVMPass());
+}
+
+extern void registerCIRDialectTranslation(mlir::MLIRContext &context);
+
+std::unique_ptr<llvm::Module>
+lowerDirectlyFromCIRToLLVMIR(mlir::ModuleOp theModule, LLVMContext &llvmCtx,
+                             bool disableVerifier) {
+  mlir::MLIRContext *mlirCtx = theModule.getContext();
+  mlir::PassManager pm(mlirCtx);
+  populateCIRToLLVMPasses(pm);
+
+  // This is necessary to have line tables emitted and basic
+  // debugger working. In the future we will add proper debug information
+  // emission directly from our frontend.
+  pm.addPass(mlir::LLVM::createDIScopeForLLVMFuncOpPass());
+
+  // FIXME(cir): this shouldn't be necessary. It's meant to be a temporary
+  // workaround until we understand why some unrealized casts are being
+  // emmited and how to properly avoid them.
+  pm.addPass(mlir::createReconcileUnrealizedCastsPass());
+
+  pm.enableVerifier(!disableVerifier);
+  (void)mlir::applyPassManagerCLOptions(pm);
+
+  auto result = !mlir::failed(pm.run(theModule));
+  if (!result)
+    report_fatal_error(
+        "The pass manager failed to lower CIR to LLVMIR dialect!");
+
+  // Now that we ran all the lowering passes, verify the final output.
+  if (theModule.verify().failed())
+    report_fatal_error("Verification of the final LLVMIR dialect failed!");
+
+  mlir::registerBuiltinDialectTranslation(*mlirCtx);
+  mlir::registerLLVMDialectTranslation(*mlirCtx);
+  mlir::registerOpenMPDialectTranslation(*mlirCtx);
+  registerCIRDialectTranslation(*mlirCtx);
+
+  auto ModuleName = theModule.getName();
+  auto llvmModule = mlir::translateModuleToLLVMIR(
+      theModule, llvmCtx, ModuleName ? *ModuleName : "CIRToLLVMModule");
+
+  if (!llvmModule)
+    report_fatal_error("Lowering from LLVMIR dialect to llvm IR failed!");
+
+  return llvmModule;
+}
+} // namespace direct
+} // namespace cir
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp
new file mode 100644
index 000000000000..4e8e2e9558cc
--- /dev/null
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp
@@ -0,0 +1,272 @@
+//====- LoweToLLVMIR.cpp - Lowering CIR attributes to LLVMIR ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering of CIR attributes and operations directly to
+// LLVMIR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/DialectRegistry.h"
+#include "mlir/Target/LLVMIR/LLVMTranslationInterface.h"
+#include "mlir/Target/LLVMIR/ModuleTranslation.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/GlobalVariable.h"
+
+using namespace llvm;
+
+namespace cir {
+namespace direct {
+
+/// Implementation of the dialect interface that converts CIR attributes to LLVM
+/// IR metadata.
+class CIRDialectLLVMIRTranslationInterface
+    : public mlir::LLVMTranslationDialectInterface {
+public:
+  using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
+
+  /// Any named attribute in the CIR dialect, i.e, with name started with
+  /// "cir.", will be handled here.
+  virtual mlir::LogicalResult amendOperation(
+      mlir::Operation *op, llvm::ArrayRef<llvm::Instruction *> instructions,
+      mlir::NamedAttribute attribute,
+      mlir::LLVM::ModuleTranslation &moduleTranslation) const override {
+    if (auto func = dyn_cast<mlir::LLVM::LLVMFuncOp>(op)) {
+      amendFunction(func, instructions, attribute, moduleTranslation);
+    } else if (auto mod = dyn_cast<mlir::ModuleOp>(op)) {
+      amendModule(mod, attribute, moduleTranslation);
+    }
+    return mlir::success();
+  }
+
+  /// Translates the given operation to LLVM IR using the provided IR builder
+  /// and saving the state in `moduleTranslation`.
+  mlir::LogicalResult convertOperation(
+      mlir::Operation *op, llvm::IRBuilderBase &builder,
+      mlir::LLVM::ModuleTranslation &moduleTranslation) const final {
+
+    if (auto cirOp = llvm::dyn_cast<mlir::LLVM::ZeroOp>(op))
+      moduleTranslation.mapValue(cirOp.getResult()) =
+          llvm::Constant::getNullValue(
+              moduleTranslation.convertType(cirOp.getType()));
+
+    return mlir::success();
+  }
+
+private:
+  // Translate CIR's module attributes to LLVM's module metadata
+  void amendModule(mlir::ModuleOp module, mlir::NamedAttribute attribute,
+                   mlir::LLVM::ModuleTranslation &moduleTranslation) const {
+    llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+    llvm::LLVMContext &llvmContext = llvmModule->getContext();
+
+    if (auto openclVersionAttr = mlir::dyn_cast<mlir::cir::OpenCLVersionAttr>(
+            attribute.getValue())) {
+      auto *int32Ty = llvm::IntegerType::get(llvmContext, 32);
+      llvm::Metadata *oclVerElts[] = {
+          llvm::ConstantAsMetadata::get(
+              llvm::ConstantInt::get(int32Ty, openclVersionAttr.getMajorVersion())),
+          llvm::ConstantAsMetadata::get(
+              llvm::ConstantInt::get(int32Ty, openclVersionAttr.getMinorVersion()))};
+      llvm::NamedMDNode *oclVerMD =
+          llvmModule->getOrInsertNamedMetadata("opencl.ocl.version");
+      oclVerMD->addOperand(llvm::MDNode::get(llvmContext, oclVerElts));
+    }
+
+    // Drop ammended CIR attribute from LLVM op.
+    module->removeAttr(attribute.getName());
+  }
+
+  // Translate CIR's extra function attributes to LLVM's function attributes.
+  void amendFunction(mlir::LLVM::LLVMFuncOp func,
+                     llvm::ArrayRef<llvm::Instruction *> instructions,
+                     mlir::NamedAttribute attribute,
+                     mlir::LLVM::ModuleTranslation &moduleTranslation) const {
+    llvm::Function *llvmFunc = moduleTranslation.lookupFunction(func.getName());
+    if (auto extraAttr = mlir::dyn_cast<mlir::cir::ExtraFuncAttributesAttr>(
+            attribute.getValue())) {
+      for (auto attr : extraAttr.getElements()) {
+        if (auto inlineAttr =
+                mlir::dyn_cast<mlir::cir::InlineAttr>(attr.getValue())) {
+          if (inlineAttr.isNoInline())
+            llvmFunc->addFnAttr(llvm::Attribute::NoInline);
+          else if (inlineAttr.isAlwaysInline())
+            llvmFunc->addFnAttr(llvm::Attribute::AlwaysInline);
+          else if (inlineAttr.isInlineHint())
+            llvmFunc->addFnAttr(llvm::Attribute::InlineHint);
+          else
+            llvm_unreachable("Unknown inline kind");
+        } else if (mlir::dyn_cast<mlir::cir::OptNoneAttr>(attr.getValue())) {
+          llvmFunc->addFnAttr(llvm::Attribute::OptimizeNone);
+        } else if (mlir::dyn_cast<mlir::cir::NoThrowAttr>(attr.getValue())) {
+          llvmFunc->addFnAttr(llvm::Attribute::NoUnwind);
+        } else if (mlir::dyn_cast<mlir::cir::ConvergentAttr>(attr.getValue())) {
+          llvmFunc->addFnAttr(llvm::Attribute::Convergent);
+        } else if (auto clKernelMetadata =
+                       mlir::dyn_cast<mlir::cir::OpenCLKernelMetadataAttr>(
+                           attr.getValue())) {
+          emitOpenCLKernelMetadata(clKernelMetadata, llvmFunc,
+                                   moduleTranslation);
+        } else if (auto clArgMetadata =
+                       mlir::dyn_cast<mlir::cir::OpenCLKernelArgMetadataAttr>(
+                           attr.getValue())) {
+          emitOpenCLKernelArgMetadata(clArgMetadata, func.getNumArguments(),
+                                      llvmFunc, moduleTranslation);
+        }
+      }
+    }
+
+    // Drop ammended CIR attribute from LLVM op.
+    func->removeAttr(attribute.getName());
+  }
+
+  void emitOpenCLKernelMetadata(
+      mlir::cir::OpenCLKernelMetadataAttr clKernelMetadata,
+      llvm::Function *llvmFunc,
+      mlir::LLVM::ModuleTranslation &moduleTranslation) const {
+    auto &vmCtx = moduleTranslation.getLLVMContext();
+
+    auto lowerArrayAttr = [&](mlir::ArrayAttr arrayAttr) {
+      llvm::SmallVector<llvm::Metadata *, 3> attrMDArgs;
+      for (mlir::Attribute attr : arrayAttr) {
+        int64_t value = mlir::cast<mlir::IntegerAttr>(attr).getInt();
+        attrMDArgs.push_back(
+            llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+                llvm::IntegerType::get(vmCtx, 32), llvm::APInt(32, value))));
+      }
+      return llvm::MDNode::get(vmCtx, attrMDArgs);
+    };
+
+    if (auto workGroupSizeHint = clKernelMetadata.getWorkGroupSizeHint()) {
+      llvmFunc->setMetadata("work_group_size_hint",
+                            lowerArrayAttr(workGroupSizeHint));
+    }
+
+    if (auto reqdWorkGroupSize = clKernelMetadata.getReqdWorkGroupSize()) {
+      llvmFunc->setMetadata("reqd_work_group_size",
+                            lowerArrayAttr(reqdWorkGroupSize));
+    }
+
+    if (auto vecTypeHint = clKernelMetadata.getVecTypeHint()) {
+      auto hintQTy = vecTypeHint.getValue();
+      bool isSignedInteger = *clKernelMetadata.getVecTypeHintSignedness();
+      llvm::Metadata *attrMDArgs[] = {
+          llvm::ConstantAsMetadata::get(
+              llvm::UndefValue::get(moduleTranslation.convertType(hintQTy))),
+          llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+              llvm::IntegerType::get(vmCtx, 32),
+              llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))};
+      llvmFunc->setMetadata("vec_type_hint",
+                            llvm::MDNode::get(vmCtx, attrMDArgs));
+    }
+
+    if (auto intelReqdSubgroupSize =
+            clKernelMetadata.getIntelReqdSubGroupSize()) {
+      int64_t reqdSubgroupSize = intelReqdSubgroupSize.getInt();
+      llvm::Metadata *attrMDArgs[] = {
+          llvm::ConstantAsMetadata::get(
+              llvm::ConstantInt::get(llvm::IntegerType::get(vmCtx, 32),
+                                     llvm::APInt(32, reqdSubgroupSize))),
+      };
+      llvmFunc->setMetadata("intel_reqd_sub_group_size",
+                            llvm::MDNode::get(vmCtx, attrMDArgs));
+    }
+  }
+
+  void emitOpenCLKernelArgMetadata(
+      mlir::cir::OpenCLKernelArgMetadataAttr clArgMetadata, unsigned numArgs,
+      llvm::Function *llvmFunc,
+      mlir::LLVM::ModuleTranslation &moduleTranslation) const {
+    auto &vmCtx = moduleTranslation.getLLVMContext();
+
+    // MDNode for the kernel argument address space qualifiers.
+    SmallVector<llvm::Metadata *, 8> addressQuals;
+
+    // MDNode for the kernel argument access qualifiers (images only).
+    SmallVector<llvm::Metadata *, 8> accessQuals;
+
+    // MDNode for the kernel argument type names.
+    SmallVector<llvm::Metadata *, 8> argTypeNames;
+
+    // MDNode for the kernel argument base type names.
+    SmallVector<llvm::Metadata *, 8> argBaseTypeNames;
+
+    // MDNode for the kernel argument type qualifiers.
+    SmallVector<llvm::Metadata *, 8> argTypeQuals;
+
+    // MDNode for the kernel argument names.
+    SmallVector<llvm::Metadata *, 8> argNames;
+
+    auto lowerStringAttr = [&](mlir::Attribute strAttr) {
+      return llvm::MDString::get(
+          vmCtx, mlir::cast<mlir::StringAttr>(strAttr).getValue());
+    };
+
+    bool shouldEmitArgName = !!clArgMetadata.getName();
+    
+    auto addressSpaceValues =
+        clArgMetadata.getAddrSpace().getAsValueRange<mlir::IntegerAttr>();
+
+    for (auto &&[i, addrSpace] : llvm::enumerate(addressSpaceValues)) {
+      // Address space qualifier.
+      addressQuals.push_back(
+          llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+              llvm::IntegerType::get(vmCtx, 32), addrSpace)));
+
+      // Access qualifier.
+      accessQuals.push_back(lowerStringAttr(clArgMetadata.getAccessQual()[i]));
+
+      // Type name.
+      argTypeNames.push_back(lowerStringAttr(clArgMetadata.getType()[i]));
+
+      // Base type name.
+      argBaseTypeNames.push_back(
+          lowerStringAttr(clArgMetadata.getBaseType()[i]));
+
+      // Type qualifier.
+      argTypeQuals.push_back(lowerStringAttr(clArgMetadata.getTypeQual()[i]));
+
+      // Argument name.
+      if (shouldEmitArgName)
+        argNames.push_back(lowerStringAttr(clArgMetadata.getName()[i]));
+    }
+
+    llvmFunc->setMetadata("kernel_arg_addr_space",
+                          llvm::MDNode::get(vmCtx, addressQuals));
+    llvmFunc->setMetadata("kernel_arg_access_qual",
+                          llvm::MDNode::get(vmCtx, accessQuals));
+    llvmFunc->setMetadata("kernel_arg_type",
+                          llvm::MDNode::get(vmCtx, argTypeNames));
+    llvmFunc->setMetadata("kernel_arg_base_type",
+                          llvm::MDNode::get(vmCtx, argBaseTypeNames));
+    llvmFunc->setMetadata("kernel_arg_type_qual",
+                          llvm::MDNode::get(vmCtx, argTypeQuals));
+    if (shouldEmitArgName)
+      llvmFunc->setMetadata("kernel_arg_name",
+                            llvm::MDNode::get(vmCtx, argNames));
+  }
+};
+
+void registerCIRDialectTranslation(mlir::DialectRegistry &registry) {
+  registry.insert<mlir::cir::CIRDialect>();
+  registry.addExtension(
+      +[](mlir::MLIRContext *ctx, mlir::cir::CIRDialect *dialect) {
+        dialect->addInterfaces<CIRDialectLLVMIRTranslationInterface>();
+      });
+}
+
+void registerCIRDialectTranslation(mlir::MLIRContext &context) {
+  mlir::DialectRegistry registry;
+  registerCIRDialectTranslation(registry);
+  context.appendDialectRegistry(registry);
+}
+} // namespace direct
+} // namespace cir
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LoweringHelpers.h b/clang/lib/CIR/Lowering/DirectToLLVM/LoweringHelpers.h
new file mode 100644
index 000000000000..46de5dfc7634
--- /dev/null
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LoweringHelpers.h
@@ -0,0 +1,74 @@
+#ifndef LLVM_CLANG_LIB_LOWERINGHELPERS_H
+#define LLVM_CLANG_LIB_LOWERINGHELPERS_H
+
+#include "mlir/IR/Types.h"
+#include "mlir/IR/Value.h"
+
+#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
+
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+
+using namespace llvm;
+
+mlir::Value createIntCast(mlir::OpBuilder &bld, mlir::Value src,
+                          mlir::IntegerType dstTy, bool isSigned = false) {
+  auto srcTy = src.getType();
+  assert(isa<mlir::IntegerType>(srcTy));
+
+  auto srcWidth = mlir::cast<mlir::IntegerType>(srcTy).getWidth();
+  auto dstWidth = mlir::cast<mlir::IntegerType>(dstTy).getWidth();
+  auto loc = src.getLoc();
+
+  if (dstWidth > srcWidth && isSigned)
+    return bld.create<mlir::LLVM::SExtOp>(loc, dstTy, src);
+  else if (dstWidth > srcWidth)
+    return bld.create<mlir::LLVM::ZExtOp>(loc, dstTy, src);
+  else if (dstWidth < srcWidth)
+    return bld.create<mlir::LLVM::TruncOp>(loc, dstTy, src);
+  else
+    return bld.create<mlir::LLVM::BitcastOp>(loc, dstTy, src);
+}
+
+mlir::Value getConstAPInt(mlir::OpBuilder &bld, mlir::Location loc,
+                          mlir::Type typ, const llvm::APInt &val) {
+  return bld.create<mlir::LLVM::ConstantOp>(loc, typ, val);
+}
+
+mlir::Value getConst(mlir::OpBuilder &bld, mlir::Location loc, mlir::Type typ,
+                     unsigned val) {
+  return bld.create<mlir::LLVM::ConstantOp>(loc, typ, val);
+}
+
+mlir::Value createShL(mlir::OpBuilder &bld, mlir::Value lhs, unsigned rhs) {
+  if (!rhs)
+    return lhs;
+  auto rhsVal = getConst(bld, lhs.getLoc(), lhs.getType(), rhs);
+  return bld.create<mlir::LLVM::ShlOp>(lhs.getLoc(), lhs, rhsVal);
+}
+
+mlir::Value createLShR(mlir::OpBuilder &bld, mlir::Value lhs, unsigned rhs) {
+  if (!rhs)
+    return lhs;
+  auto rhsVal = getConst(bld, lhs.getLoc(), lhs.getType(), rhs);
+  return bld.create<mlir::LLVM::LShrOp>(lhs.getLoc(), lhs, rhsVal);
+}
+
+mlir::Value createAShR(mlir::OpBuilder &bld, mlir::Value lhs, unsigned rhs) {
+  if (!rhs)
+    return lhs;
+  auto rhsVal = getConst(bld, lhs.getLoc(), lhs.getType(), rhs);
+  return bld.create<mlir::LLVM::AShrOp>(lhs.getLoc(), lhs, rhsVal);
+}
+
+mlir::Value createAnd(mlir::OpBuilder &bld, mlir::Value lhs,
+                      const llvm::APInt &rhs) {
+  auto rhsVal = getConstAPInt(bld, lhs.getLoc(), lhs.getType(), rhs);
+  return bld.create<mlir::LLVM::AndOp>(lhs.getLoc(), lhs, rhsVal);
+}
+
+#endif // LLVM_CLANG_LIB_LOWERINGHELPERS_H
diff --git a/clang/lib/CIR/Lowering/LoweringHelpers.cpp b/clang/lib/CIR/Lowering/LoweringHelpers.cpp
new file mode 100644
index 000000000000..06c92ae12472
--- /dev/null
+++ b/clang/lib/CIR/Lowering/LoweringHelpers.cpp
@@ -0,0 +1,160 @@
+//====- LoweringHelpers.cpp - Lowering helper functions -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains helper functions for lowering from CIR to LLVM or MLIR.
+//
+//===----------------------------------------------------------------------===//
+#include "clang/CIR/LoweringHelpers.h"
+
+mlir::DenseElementsAttr
+convertStringAttrToDenseElementsAttr(mlir::cir::ConstArrayAttr attr,
+                                     mlir::Type type) {
+  auto values = llvm::SmallVector<mlir::APInt, 8>{};
+  auto stringAttr = mlir::dyn_cast<mlir::StringAttr>(attr.getElts());
+  assert(stringAttr && "expected string attribute here");
+  for (auto element : stringAttr)
+    values.push_back({8, (uint64_t)element});
+  auto arrayTy = mlir::dyn_cast<mlir::cir::ArrayType>(attr.getType());
+  assert(arrayTy && "String attribute must have an array type");
+  if (arrayTy.getSize() != stringAttr.size())
+    llvm_unreachable("array type of the length not equal to that of the string "
+                     "attribute is not supported yet");
+  return mlir::DenseElementsAttr::get(
+      mlir::RankedTensorType::get({(int64_t)values.size()}, type),
+      llvm::ArrayRef(values));
+}
+
+template <> mlir::APInt getZeroInitFromType(mlir::Type Ty) {
+  assert(mlir::isa<mlir::cir::IntType>(Ty) && "expected int type");
+  auto IntTy = mlir::cast<mlir::cir::IntType>(Ty);
+  return mlir::APInt::getZero(IntTy.getWidth());
+}
+
+template <> mlir::APFloat getZeroInitFromType(mlir::Type Ty) {
+  assert((mlir::isa<mlir::cir::SingleType, mlir::cir::DoubleType>(Ty)) &&
+         "only float and double supported");
+  if (Ty.isF32() || mlir::isa<mlir::cir::SingleType>(Ty))
+    return mlir::APFloat(0.f);
+  if (Ty.isF64() || mlir::isa<mlir::cir::DoubleType>(Ty))
+    return mlir::APFloat(0.0);
+  llvm_unreachable("NYI");
+}
+
+// return the nested type and quantity of elements for cir.array type.
+// e.g: for !cir.array<!cir.array<!s32i x 3> x 1>
+// it returns !s32i as return value and stores 3 to elemQuantity.
+mlir::Type getNestedTypeAndElemQuantity(mlir::Type Ty, unsigned &elemQuantity) {
+  assert(mlir::isa<mlir::cir::ArrayType>(Ty) && "expected ArrayType");
+
+  elemQuantity = 1;
+  mlir::Type nestTy = Ty;
+  while (auto ArrTy = mlir::dyn_cast<mlir::cir::ArrayType>(nestTy)) {
+    nestTy = ArrTy.getEltType();
+    elemQuantity *= ArrTy.getSize();
+  }
+
+  return nestTy;
+}
+
+template <typename StorageTy>
+void fillTrailingZeros(mlir::cir::ConstArrayAttr attr,
+                       llvm::SmallVectorImpl<StorageTy> &values) {
+  auto numTrailingZeros = attr.getTrailingZerosNum();
+  if (numTrailingZeros) {
+    auto localArrayTy = mlir::dyn_cast<mlir::cir::ArrayType>(attr.getType());
+    assert(localArrayTy && "expected !cir.array");
+
+    auto nestTy = localArrayTy.getEltType();
+    if (!mlir::isa<mlir::cir::ArrayType>(nestTy))
+      values.insert(values.end(), numTrailingZeros,
+                    getZeroInitFromType<StorageTy>(nestTy));
+  }
+}
+
+template <typename AttrTy, typename StorageTy>
+void convertToDenseElementsAttrImpl(mlir::cir::ConstArrayAttr attr,
+                                    llvm::SmallVectorImpl<StorageTy> &values) {
+  if (auto stringAttr = mlir::dyn_cast<mlir::StringAttr>(attr.getElts())) {
+    if (auto arrayType = mlir::dyn_cast<mlir::cir::ArrayType>(attr.getType())) {
+      for (auto element : stringAttr) {
+        auto intAttr = mlir::cir::IntAttr::get(arrayType.getEltType(), element);
+        values.push_back(mlir::dyn_cast<AttrTy>(intAttr).getValue());
+      }
+      return;
+    }
+  }
+
+  auto arrayAttr = mlir::cast<mlir::ArrayAttr>(attr.getElts());
+  for (auto eltAttr : arrayAttr) {
+    if (auto valueAttr = mlir::dyn_cast<AttrTy>(eltAttr)) {
+      values.push_back(valueAttr.getValue());
+    } else if (auto subArrayAttr =
+                   mlir::dyn_cast<mlir::cir::ConstArrayAttr>(eltAttr)) {
+      convertToDenseElementsAttrImpl<AttrTy>(subArrayAttr, values);
+      if (mlir::dyn_cast<mlir::StringAttr>(subArrayAttr.getElts()))
+        fillTrailingZeros(subArrayAttr, values);
+    } else if (auto zeroAttr = mlir::dyn_cast<mlir::cir::ZeroAttr>(eltAttr)) {
+      unsigned numStoredZeros = 0;
+      auto nestTy =
+          getNestedTypeAndElemQuantity(zeroAttr.getType(), numStoredZeros);
+      values.insert(values.end(), numStoredZeros,
+                    getZeroInitFromType<StorageTy>(nestTy));
+    } else {
+      llvm_unreachable("unknown element in ConstArrayAttr");
+    }
+  }
+
+  // Only fill in trailing zeros at the local cir.array level where the element
+  // type isn't another array (for the mult-dim case).
+  fillTrailingZeros(attr, values);
+}
+
+template <typename AttrTy, typename StorageTy>
+mlir::DenseElementsAttr
+convertToDenseElementsAttr(mlir::cir::ConstArrayAttr attr,
+                           const llvm::SmallVectorImpl<int64_t> &dims,
+                           mlir::Type type) {
+  auto values = llvm::SmallVector<StorageTy, 8>{};
+  convertToDenseElementsAttrImpl<AttrTy>(attr, values);
+  return mlir::DenseElementsAttr::get(mlir::RankedTensorType::get(dims, type),
+                                      llvm::ArrayRef(values));
+}
+
+std::optional<mlir::Attribute>
+lowerConstArrayAttr(mlir::cir::ConstArrayAttr constArr,
+                    const mlir::TypeConverter *converter) {
+
+  // Ensure ConstArrayAttr has a type.
+  auto typedConstArr = mlir::dyn_cast<mlir::TypedAttr>(constArr);
+  assert(typedConstArr && "cir::ConstArrayAttr is not a mlir::TypedAttr");
+
+  // Ensure ConstArrayAttr type is a ArrayType.
+  auto cirArrayType =
+      mlir::dyn_cast<mlir::cir::ArrayType>(typedConstArr.getType());
+  assert(cirArrayType && "cir::ConstArrayAttr is not a cir::ArrayType");
+
+  // Is a ConstArrayAttr with an cir::ArrayType: fetch element type.
+  mlir::Type type = cirArrayType;
+  auto dims = llvm::SmallVector<int64_t, 2>{};
+  while (auto arrayType = mlir::dyn_cast<mlir::cir::ArrayType>(type)) {
+    dims.push_back(arrayType.getSize());
+    type = arrayType.getEltType();
+  }
+
+  if (mlir::isa<mlir::StringAttr>(constArr.getElts()))
+    return convertStringAttrToDenseElementsAttr(constArr,
+                                                converter->convertType(type));
+  if (mlir::isa<mlir::cir::IntType>(type))
+    return convertToDenseElementsAttr<mlir::cir::IntAttr, mlir::APInt>(
+        constArr, dims, converter->convertType(type));
+  if (mlir::isa<mlir::cir::CIRFPTypeInterface>(type))
+    return convertToDenseElementsAttr<mlir::cir::FPAttr, mlir::APFloat>(
+        constArr, dims, converter->convertType(type));
+
+  return std::nullopt;
+}
diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/CMakeLists.txt b/clang/lib/CIR/Lowering/ThroughMLIR/CMakeLists.txt
new file mode 100644
index 000000000000..7aa436b04e00
--- /dev/null
+++ b/clang/lib/CIR/Lowering/ThroughMLIR/CMakeLists.txt
@@ -0,0 +1,44 @@
+set(LLVM_LINK_COMPONENTS
+  Core
+  Support
+  )
+
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
+add_clang_library(clangCIRLoweringThroughMLIR
+  LowerCIRLoopToSCF.cpp
+  LowerCIRToMLIR.cpp
+  LowerMLIRToLLVM.cpp
+
+  DEPENDS
+  MLIRCIROpsIncGen
+  MLIRCIREnumsGen
+  MLIRCIRASTAttrInterfacesIncGen
+  MLIRCIROpInterfacesIncGen
+  MLIRCIRLoopOpInterfaceIncGen
+  MLIRBuiltinLocationAttributesIncGen
+  MLIRBuiltinTypeInterfacesIncGen
+  MLIRFunctionInterfacesIncGen
+
+  LINK_LIBS
+  clangAST
+  clangBasic
+  clangCodeGen
+  clangLex
+  clangFrontend
+  clangCIR
+  clangCIRLoweringHelpers
+  ${dialect_libs}
+  MLIRCIR
+  MLIRAnalysis
+  MLIRBuiltinToLLVMIRTranslation
+  MLIRLLVMToLLVMIRTranslation
+  MLIRIR
+  MLIRParser
+  MLIRSideEffectInterfaces
+  MLIRTransforms
+  MLIRSupport
+  MLIRMemRefDialect
+  MLIROpenMPDialect
+  MLIROpenMPToLLVMIRTranslation
+  )
diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp
new file mode 100644
index 000000000000..1b6eba94c5ea
--- /dev/null
+++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRLoopToSCF.cpp
@@ -0,0 +1,359 @@
+//====- LowerCIRLoopToSCF.cpp - Lowering from CIR Loop to SCF -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering of CIR loop operations to SCF.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Dialect/SCF/Transforms/Passes.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/ValueRange.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/LowerToMLIR.h"
+#include "clang/CIR/Passes.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+using namespace cir;
+using namespace llvm;
+
+namespace cir {
+
+class SCFLoop {
+public:
+  SCFLoop(mlir::cir::ForOp op, mlir::ConversionPatternRewriter *rewriter)
+      : forOp(op), rewriter(rewriter) {}
+
+  int64_t getStep() { return step; }
+  mlir::Value getLowerBound() { return lowerBound; }
+  mlir::Value getUpperBound() { return upperBound; }
+
+  int64_t findStepAndIV(mlir::Value &addr);
+  mlir::cir::CmpOp findCmpOp();
+  mlir::Value findIVInitValue();
+  void analysis();
+
+  mlir::Value plusConstant(mlir::Value V, mlir::Location loc, int addend);
+  void transferToSCFForOp();
+
+private:
+  mlir::cir::ForOp forOp;
+  mlir::cir::CmpOp cmpOp;
+  mlir::Value IVAddr, lowerBound = nullptr, upperBound = nullptr;
+  mlir::ConversionPatternRewriter *rewriter;
+  int64_t step = 0;
+};
+
+class SCFWhileLoop {
+public:
+  SCFWhileLoop(mlir::cir::WhileOp op, mlir::cir::WhileOp::Adaptor adaptor,
+               mlir::ConversionPatternRewriter *rewriter)
+      : whileOp(op), adaptor(adaptor), rewriter(rewriter) {}
+  void transferToSCFWhileOp();
+
+private:
+  mlir::cir::WhileOp whileOp;
+  mlir::cir::WhileOp::Adaptor adaptor;
+  mlir::ConversionPatternRewriter *rewriter;
+};
+
+class SCFDoLoop {
+public:
+  SCFDoLoop(mlir::cir::DoWhileOp op, mlir::cir::DoWhileOp::Adaptor adaptor,
+            mlir::ConversionPatternRewriter *rewriter)
+      : DoOp(op), adaptor(adaptor), rewriter(rewriter) {}
+  void transferToSCFWhileOp();
+
+private:
+  mlir::cir::DoWhileOp DoOp;
+  mlir::cir::DoWhileOp::Adaptor adaptor;
+  mlir::ConversionPatternRewriter *rewriter;
+};
+
+static int64_t getConstant(mlir::cir::ConstantOp op) {
+  auto attr = op->getAttrs().front().getValue();
+  const auto IntAttr = mlir::dyn_cast<mlir::cir::IntAttr>(attr);
+  return IntAttr.getValue().getSExtValue();
+}
+
+int64_t SCFLoop::findStepAndIV(mlir::Value &addr) {
+  auto *stepBlock =
+      (forOp.maybeGetStep() ? &forOp.maybeGetStep()->front() : nullptr);
+  assert(stepBlock && "Can not find step block");
+
+  int64_t step = 0;
+  mlir::Value IV = nullptr;
+  // Try to match "IV load addr; ++IV; store IV, addr" to find step.
+  for (mlir::Operation &op : *stepBlock)
+    if (auto loadOp = dyn_cast<mlir::cir::LoadOp>(op)) {
+      addr = loadOp.getAddr();
+      IV = loadOp.getResult();
+    } else if (auto cop = dyn_cast<mlir::cir::ConstantOp>(op)) {
+      if (step)
+        llvm_unreachable(
+            "Not support multiple constant in step calculation yet");
+      step = getConstant(cop);
+    } else if (auto bop = dyn_cast<mlir::cir::BinOp>(op)) {
+      if (bop.getLhs() != IV)
+        llvm_unreachable("Find BinOp not operate on IV");
+      if (bop.getKind() != mlir::cir::BinOpKind::Add)
+        llvm_unreachable(
+            "Not support BinOp other than Add in step calculation yet");
+    } else if (auto uop = dyn_cast<mlir::cir::UnaryOp>(op)) {
+      if (uop.getInput() != IV)
+        llvm_unreachable("Find UnaryOp not operate on IV");
+      if (uop.getKind() == mlir::cir::UnaryOpKind::Inc)
+        step = 1;
+      else if (uop.getKind() == mlir::cir::UnaryOpKind::Dec)
+        llvm_unreachable("Not support decrement step yet");
+    } else if (auto storeOp = dyn_cast<mlir::cir::StoreOp>(op)) {
+      assert(storeOp.getAddr() == addr && "Can't find IV when lowering ForOp");
+    }
+  assert(step && "Can't find step when lowering ForOp");
+
+  return step;
+}
+
+static bool isIVLoad(mlir::Operation *op, mlir::Value IVAddr) {
+  if (!op)
+    return false;
+  if (isa<mlir::cir::LoadOp>(op)) {
+    if (!op->getOperand(0))
+      return false;
+    if (op->getOperand(0) == IVAddr)
+      return true;
+  }
+  return false;
+}
+
+mlir::cir::CmpOp SCFLoop::findCmpOp() {
+  cmpOp = nullptr;
+  for (auto *user : IVAddr.getUsers()) {
+    if (user->getParentRegion() != &forOp.getCond())
+      continue;
+    if (auto loadOp = dyn_cast<mlir::cir::LoadOp>(*user)) {
+      if (!loadOp->hasOneUse())
+        continue;
+      if (auto op = dyn_cast<mlir::cir::CmpOp>(*loadOp->user_begin())) {
+        cmpOp = op;
+        break;
+      }
+    }
+  }
+  if (!cmpOp)
+    llvm_unreachable("Can't find loop CmpOp");
+
+  auto type = cmpOp.getLhs().getType();
+  if (!mlir::isa<mlir::cir::IntType>(type))
+    llvm_unreachable("Non-integer type IV is not supported");
+
+  auto lhsDefOp = cmpOp.getLhs().getDefiningOp();
+  if (!lhsDefOp)
+    llvm_unreachable("Can't find IV load");
+  if (!isIVLoad(lhsDefOp, IVAddr))
+    llvm_unreachable("cmpOp LHS is not IV");
+
+  if (cmpOp.getKind() != mlir::cir::CmpOpKind::le &&
+      cmpOp.getKind() != mlir::cir::CmpOpKind::lt)
+    llvm_unreachable("Not support lowering other than le or lt comparison");
+
+  return cmpOp;
+}
+
+mlir::Value SCFLoop::plusConstant(mlir::Value V, mlir::Location loc,
+                                  int addend) {
+  auto type = V.getType();
+  auto c1 = rewriter->create<mlir::arith::ConstantOp>(
+      loc, type, mlir::IntegerAttr::get(type, addend));
+  return rewriter->create<mlir::arith::AddIOp>(loc, V, c1);
+}
+
+// Return IV initial value by searching the store before the loop.
+// The operations before the loop have been transferred to MLIR.
+// So we need to go through getRemappedValue to find the value.
+mlir::Value SCFLoop::findIVInitValue() {
+  auto remapAddr = rewriter->getRemappedValue(IVAddr);
+  if (!remapAddr)
+    return nullptr;
+  if (!remapAddr.hasOneUse())
+    return nullptr;
+  auto memrefStore = dyn_cast<mlir::memref::StoreOp>(*remapAddr.user_begin());
+  if (!memrefStore)
+    return nullptr;
+  return memrefStore->getOperand(0);
+}
+
+void SCFLoop::analysis() {
+  step = findStepAndIV(IVAddr);
+  cmpOp = findCmpOp();
+  auto IVInit = findIVInitValue();
+  // The loop end value should be hoisted out of loop by -cir-mlir-scf-prepare.
+  // So we could get the value by getRemappedValue.
+  auto IVEndBound = rewriter->getRemappedValue(cmpOp.getRhs());
+  // If the loop end bound is not loop invariant and can't be hoisted.
+  // The following assertion will be triggerred.
+  assert(IVEndBound && "can't find IV end boundary");
+
+  if (step > 0) {
+    lowerBound = IVInit;
+    if (cmpOp.getKind() == mlir::cir::CmpOpKind::lt)
+      upperBound = IVEndBound;
+    else if (cmpOp.getKind() == mlir::cir::CmpOpKind::le)
+      upperBound = plusConstant(IVEndBound, cmpOp.getLoc(), 1);
+  }
+  assert(lowerBound && "can't find loop lower bound");
+  assert(upperBound && "can't find loop upper bound");
+}
+
+void SCFLoop::transferToSCFForOp() {
+  auto ub = getUpperBound();
+  auto lb = getLowerBound();
+  auto loc = forOp.getLoc();
+  auto type = lb.getType();
+  auto step = rewriter->create<mlir::arith::ConstantOp>(
+      loc, type, mlir::IntegerAttr::get(type, getStep()));
+  auto scfForOp = rewriter->create<mlir::scf::ForOp>(loc, lb, ub, step);
+  SmallVector<mlir::Value> bbArg;
+  rewriter->eraseOp(&scfForOp.getBody()->back());
+  rewriter->inlineBlockBefore(&forOp.getBody().front(), scfForOp.getBody(),
+                              scfForOp.getBody()->end(), bbArg);
+  scfForOp->walk<mlir::WalkOrder::PreOrder>([&](mlir::Operation *op) {
+    if (isa<mlir::cir::BreakOp>(op) || isa<mlir::cir::ContinueOp>(op) ||
+        isa<mlir::cir::IfOp>(op))
+      llvm_unreachable(
+          "Not support lowering loop with break, continue or if yet");
+    // Replace the IV usage to scf loop induction variable.
+    if (isIVLoad(op, IVAddr)) {
+      // Replace CIR IV load with arith.addi scf.IV, 0.
+      // The replacement makes the SCF IV can be automatically propogated
+      // by OpAdaptor for individual IV user lowering.
+      // The redundant arith.addi can be removed by later MLIR passes.
+      rewriter->setInsertionPoint(op);
+      auto newIV = plusConstant(scfForOp.getInductionVar(), loc, 0);
+      rewriter->replaceOp(op, newIV.getDefiningOp());
+    }
+    return mlir::WalkResult::advance();
+  });
+}
+
+void SCFWhileLoop::transferToSCFWhileOp() {
+  auto scfWhileOp = rewriter->create<mlir::scf::WhileOp>(
+      whileOp->getLoc(), whileOp->getResultTypes(), adaptor.getOperands());
+  rewriter->createBlock(&scfWhileOp.getBefore());
+  rewriter->createBlock(&scfWhileOp.getAfter());
+  rewriter->inlineBlockBefore(&whileOp.getCond().front(),
+                              scfWhileOp.getBeforeBody(),
+                              scfWhileOp.getBeforeBody()->end());
+  rewriter->inlineBlockBefore(&whileOp.getBody().front(),
+                              scfWhileOp.getAfterBody(),
+                              scfWhileOp.getAfterBody()->end());
+}
+
+void SCFDoLoop::transferToSCFWhileOp() {
+
+  auto beforeBuilder = [&](mlir::OpBuilder &builder, mlir::Location loc,
+                           mlir::ValueRange args) {
+    auto *newBlock = builder.getBlock();
+    rewriter->mergeBlocks(&DoOp.getBody().front(), newBlock);
+    auto *yieldOp = newBlock->getTerminator();
+    rewriter->mergeBlocks(&DoOp.getCond().front(), newBlock,
+                          yieldOp->getResults());
+    rewriter->eraseOp(yieldOp);
+  };
+  auto afterBuilder = [&](mlir::OpBuilder &builder, mlir::Location loc,
+                          mlir::ValueRange args) {
+    rewriter->create<mlir::scf::YieldOp>(loc, args);
+  };
+
+  rewriter->create<mlir::scf::WhileOp>(DoOp.getLoc(), DoOp->getResultTypes(),
+                                       adaptor.getOperands(), beforeBuilder,
+                                       afterBuilder);
+}
+
+class CIRForOpLowering : public mlir::OpConversionPattern<mlir::cir::ForOp> {
+public:
+  using OpConversionPattern<mlir::cir::ForOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ForOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    SCFLoop loop(op, &rewriter);
+    loop.analysis();
+    loop.transferToSCFForOp();
+    rewriter.eraseOp(op);
+    return mlir::success();
+  }
+};
+
+class CIRWhileOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::WhileOp> {
+public:
+  using OpConversionPattern<mlir::cir::WhileOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::WhileOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    SCFWhileLoop loop(op, adaptor, &rewriter);
+    loop.transferToSCFWhileOp();
+    rewriter.eraseOp(op);
+    return mlir::success();
+  }
+};
+
+class CIRDoOpLowering : public mlir::OpConversionPattern<mlir::cir::DoWhileOp> {
+public:
+  using OpConversionPattern<mlir::cir::DoWhileOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::DoWhileOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    SCFDoLoop loop(op, adaptor, &rewriter);
+    loop.transferToSCFWhileOp();
+    rewriter.eraseOp(op);
+    return mlir::success();
+  }
+};
+
+class CIRConditionOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ConditionOp> {
+public:
+  using OpConversionPattern<mlir::cir::ConditionOp>::OpConversionPattern;
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ConditionOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto *parentOp = op->getParentOp();
+    return llvm::TypeSwitch<mlir::Operation *, mlir::LogicalResult>(parentOp)
+        .Case<mlir::scf::WhileOp>([&](auto) {
+          auto condition = adaptor.getCondition();
+          auto i1Condition = rewriter.create<mlir::arith::TruncIOp>(
+              op.getLoc(), rewriter.getI1Type(), condition);
+          rewriter.replaceOpWithNewOp<mlir::scf::ConditionOp>(
+              op, i1Condition, parentOp->getOperands());
+          return mlir::success();
+        })
+        .Default([](auto) { return mlir::failure(); });
+  }
+};
+
+void populateCIRLoopToSCFConversionPatterns(mlir::RewritePatternSet &patterns,
+                                            mlir::TypeConverter &converter) {
+  patterns.add<CIRForOpLowering, CIRWhileOpLowering, CIRConditionOpLowering,
+               CIRDoOpLowering>(converter, patterns.getContext());
+}
+
+} // namespace cir
diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
new file mode 100644
index 000000000000..92f4108a5d40
--- /dev/null
+++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerCIRToMLIR.cpp
@@ -0,0 +1,1470 @@
+//====- LowerCIRToMLIR.cpp - Lowering from CIR to MLIR --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering of CIR operations to MLIR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LowerToMLIRHelpers.h"
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Dialect/SCF/Transforms/Passes.h"
+#include "mlir/Dialect/Vector/IR/VectorOps.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/Region.h"
+#include "mlir/IR/TypeRange.h"
+#include "mlir/IR/ValueRange.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Export.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/LoweringHelpers.h"
+#include "clang/CIR/LowerToMLIR.h"
+#include "clang/CIR/Passes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+using namespace cir;
+using namespace llvm;
+
+namespace cir {
+
+class CIRReturnLowering
+    : public mlir::OpConversionPattern<mlir::cir::ReturnOp> {
+public:
+  using OpConversionPattern<mlir::cir::ReturnOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ReturnOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::func::ReturnOp>(op,
+                                                      adaptor.getOperands());
+    return mlir::LogicalResult::success();
+  }
+};
+
+struct ConvertCIRToMLIRPass
+    : public mlir::PassWrapper<ConvertCIRToMLIRPass,
+                               mlir::OperationPass<mlir::ModuleOp>> {
+  void getDependentDialects(mlir::DialectRegistry &registry) const override {
+    registry.insert<mlir::BuiltinDialect, mlir::func::FuncDialect,
+                    mlir::affine::AffineDialect, mlir::memref::MemRefDialect,
+                    mlir::arith::ArithDialect, mlir::cf::ControlFlowDialect,
+                    mlir::scf::SCFDialect, mlir::math::MathDialect,
+                    mlir::vector::VectorDialect>();
+  }
+  void runOnOperation() final;
+
+  virtual StringRef getArgument() const override { return "cir-to-mlir"; }
+};
+
+class CIRCallOpLowering : public mlir::OpConversionPattern<mlir::cir::CallOp> {
+public:
+  using OpConversionPattern<mlir::cir::CallOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::CallOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    SmallVector<mlir::Type> types;
+    if (mlir::failed(
+            getTypeConverter()->convertTypes(op.getResultTypes(), types)))
+      return mlir::failure();
+    rewriter.replaceOpWithNewOp<mlir::func::CallOp>(
+        op, op.getCalleeAttr(), types, adaptor.getOperands());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRAllocaOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::AllocaOp> {
+public:
+  using OpConversionPattern<mlir::cir::AllocaOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::AllocaOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto type = adaptor.getAllocaType();
+    auto mlirType = getTypeConverter()->convertType(type);
+
+    // FIXME: Some types can not be converted yet (e.g. struct)
+    if (!mlirType)
+      return mlir::LogicalResult::failure();
+
+    auto memreftype = mlir::MemRefType::get({}, mlirType);
+    rewriter.replaceOpWithNewOp<mlir::memref::AllocaOp>(op, memreftype,
+                                                        op.getAlignmentAttr());
+    return mlir::LogicalResult::success();
+  }
+};
+
+// Find base and indices from memref.reinterpret_cast
+// and put it into eraseList.
+static bool findBaseAndIndices(mlir::Value addr, mlir::Value &base,
+                               SmallVector<mlir::Value> &indices,
+                               SmallVector<mlir::Operation *> &eraseList,
+                               mlir::ConversionPatternRewriter &rewriter) {
+  while (mlir::Operation *addrOp = addr.getDefiningOp()) {
+    if (!isa<mlir::memref::ReinterpretCastOp>(addrOp))
+      break;
+    indices.push_back(addrOp->getOperand(1));
+    addr = addrOp->getOperand(0);
+    eraseList.push_back(addrOp);
+  }
+  base = addr;
+  if (indices.size() == 0)
+    return false;
+  std::reverse(indices.begin(), indices.end());
+  return true;
+}
+
+// For memref.reinterpret_cast has multiple users, erasing the operation
+// after the last load or store been generated.
+static void eraseIfSafe(mlir::Value oldAddr, mlir::Value newAddr,
+                        SmallVector<mlir::Operation *> &eraseList,
+                        mlir::ConversionPatternRewriter &rewriter) {
+  unsigned oldUsedNum =
+      std::distance(oldAddr.getUses().begin(), oldAddr.getUses().end());
+  unsigned newUsedNum = 0;
+  for (auto *user : newAddr.getUsers()) {
+    if (isa<mlir::memref::LoadOp>(*user) || isa<mlir::memref::StoreOp>(*user))
+      ++newUsedNum;
+  }
+  if (oldUsedNum == newUsedNum) {
+    for (auto op : eraseList)
+      rewriter.eraseOp(op);
+  }
+}
+
+class CIRLoadOpLowering : public mlir::OpConversionPattern<mlir::cir::LoadOp> {
+public:
+  using OpConversionPattern<mlir::cir::LoadOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::LoadOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Value base;
+    SmallVector<mlir::Value> indices;
+    SmallVector<mlir::Operation *> eraseList;
+    if (findBaseAndIndices(adaptor.getAddr(), base, indices, eraseList,
+                           rewriter)) {
+      rewriter.replaceOpWithNewOp<mlir::memref::LoadOp>(op, base, indices);
+      eraseIfSafe(op.getAddr(), base, eraseList, rewriter);
+    } else
+      rewriter.replaceOpWithNewOp<mlir::memref::LoadOp>(op, adaptor.getAddr());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRStoreOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::StoreOp> {
+public:
+  using OpConversionPattern<mlir::cir::StoreOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::StoreOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    mlir::Value base;
+    SmallVector<mlir::Value> indices;
+    SmallVector<mlir::Operation *> eraseList;
+    if (findBaseAndIndices(adaptor.getAddr(), base, indices, eraseList,
+                           rewriter)) {
+      rewriter.replaceOpWithNewOp<mlir::memref::StoreOp>(op, adaptor.getValue(),
+                                                         base, indices);
+      eraseIfSafe(op.getAddr(), base, eraseList, rewriter);
+    } else
+      rewriter.replaceOpWithNewOp<mlir::memref::StoreOp>(op, adaptor.getValue(),
+                                                         adaptor.getAddr());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRCosOpLowering : public mlir::OpConversionPattern<mlir::cir::CosOp> {
+public:
+  using OpConversionPattern<mlir::cir::CosOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::CosOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::math::CosOp>(op, adaptor.getSrc());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRSqrtOpLowering : public mlir::OpConversionPattern<mlir::cir::SqrtOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::SqrtOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::SqrtOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::math::SqrtOp>(op, adaptor.getSrc());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRFAbsOpLowering : public mlir::OpConversionPattern<mlir::cir::FAbsOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::FAbsOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::FAbsOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::math::AbsFOp>(op, adaptor.getSrc());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRFloorOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::FloorOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::FloorOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::FloorOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::math::FloorOp>(op, adaptor.getSrc());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRCeilOpLowering : public mlir::OpConversionPattern<mlir::cir::CeilOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::CeilOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::CeilOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::math::CeilOp>(op, adaptor.getSrc());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRLog10OpLowering
+    : public mlir::OpConversionPattern<mlir::cir::Log10Op> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::Log10Op>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::Log10Op op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::math::Log10Op>(op, adaptor.getSrc());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRLogOpLowering : public mlir::OpConversionPattern<mlir::cir::LogOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::LogOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::LogOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::math::LogOp>(op, adaptor.getSrc());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRLog2OpLowering : public mlir::OpConversionPattern<mlir::cir::Log2Op> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::Log2Op>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::Log2Op op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::math::Log2Op>(op, adaptor.getSrc());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRRoundOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::RoundOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::RoundOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::RoundOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::math::RoundOp>(op, adaptor.getSrc());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRExpOpLowering : public mlir::OpConversionPattern<mlir::cir::ExpOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::ExpOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ExpOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::math::ExpOp>(op, adaptor.getSrc());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRShiftOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ShiftOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::ShiftOp>::OpConversionPattern;
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ShiftOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto cirAmtTy =
+        mlir::dyn_cast<mlir::cir::IntType>(op.getAmount().getType());
+    auto cirValTy = mlir::dyn_cast<mlir::cir::IntType>(op.getValue().getType());
+    auto mlirTy = getTypeConverter()->convertType(op.getType());
+    mlir::Value amt = adaptor.getAmount();
+    mlir::Value val = adaptor.getValue();
+
+    assert(cirValTy && cirAmtTy && "non-integer shift is NYI");
+    assert(cirValTy == op.getType() && "inconsistent operands' types NYI");
+
+    // Ensure shift amount is the same type as the value. Some undefined
+    // behavior might occur in the casts below as per [C99 6.5.7.3].
+    amt = createIntCast(rewriter, amt, mlirTy, cirAmtTy.isSigned());
+
+    // Lower to the proper arith shift operation.
+    if (op.getIsShiftleft())
+      rewriter.replaceOpWithNewOp<mlir::arith::ShLIOp>(op, mlirTy, val, amt);
+    else {
+      if (cirValTy.isUnsigned())
+        rewriter.replaceOpWithNewOp<mlir::arith::ShRUIOp>(op, mlirTy, val, amt);
+      else
+        rewriter.replaceOpWithNewOp<mlir::arith::ShRSIOp>(op, mlirTy, val, amt);
+    }
+
+    return mlir::success();
+  }
+};
+
+class CIRExp2OpLowering : public mlir::OpConversionPattern<mlir::cir::Exp2Op> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::Exp2Op>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::Exp2Op op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::math::Exp2Op>(op, adaptor.getSrc());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRSinOpLowering : public mlir::OpConversionPattern<mlir::cir::SinOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::SinOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::SinOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::math::SinOp>(op, adaptor.getSrc());
+    return mlir::LogicalResult::success();
+  }
+};
+
+template <typename CIROp, typename MLIROp>
+class CIRBitOpLowering : public mlir::OpConversionPattern<CIROp> {
+public:
+  using mlir::OpConversionPattern<CIROp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(CIROp op,
+                  typename mlir::OpConversionPattern<CIROp>::OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto resultIntTy = mlir::cast<mlir::IntegerType>(
+        this->getTypeConverter()->convertType(op.getType()));
+    auto res = rewriter.create<MLIROp>(op->getLoc(), adaptor.getInput());
+    auto newOp = createIntCast(rewriter, res->getResult(0), resultIntTy,
+                               /*isSigned=*/false);
+    rewriter.replaceOp(op, newOp);
+    return mlir::LogicalResult::success();
+  }
+};
+
+using CIRBitClzOpLowering =
+    CIRBitOpLowering<mlir::cir::BitClzOp, mlir::math::CountLeadingZerosOp>;
+using CIRBitCtzOpLowering =
+    CIRBitOpLowering<mlir::cir::BitCtzOp, mlir::math::CountTrailingZerosOp>;
+using CIRBitPopcountOpLowering =
+    CIRBitOpLowering<mlir::cir::BitPopcountOp, mlir::math::CtPopOp>;
+
+class CIRBitClrsbOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::BitClrsbOp> {
+public:
+  using OpConversionPattern<mlir::cir::BitClrsbOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BitClrsbOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto inputTy = adaptor.getInput().getType();
+    auto zero = getConst(rewriter, op.getLoc(), inputTy, 0);
+    auto isNeg = rewriter.create<mlir::arith::CmpIOp>(
+        op.getLoc(),
+        mlir::arith::CmpIPredicateAttr::get(rewriter.getContext(),
+                                            mlir::arith::CmpIPredicate::slt),
+        adaptor.getInput(), zero);
+
+    auto negOne = getConst(rewriter, op.getLoc(), inputTy, -1);
+    auto flipped = rewriter.create<mlir::arith::XOrIOp>(
+        op.getLoc(), adaptor.getInput(), negOne);
+
+    auto select = rewriter.create<mlir::arith::SelectOp>(
+        op.getLoc(), isNeg, flipped, adaptor.getInput());
+
+    auto resTy = mlir::cast<mlir::IntegerType>(
+        getTypeConverter()->convertType(op.getType()));
+    auto clz =
+        rewriter.create<mlir::math::CountLeadingZerosOp>(op->getLoc(), select);
+    auto newClz = createIntCast(rewriter, clz, resTy);
+
+    auto one = getConst(rewriter, op.getLoc(), resTy, 1);
+    auto res = rewriter.create<mlir::arith::SubIOp>(op.getLoc(), newClz, one);
+    rewriter.replaceOp(op, res);
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRBitFfsOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::BitFfsOp> {
+public:
+  using OpConversionPattern<mlir::cir::BitFfsOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BitFfsOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto resTy = getTypeConverter()->convertType(op.getType());
+    auto inputTy = adaptor.getInput().getType();
+    auto ctz = rewriter.create<mlir::math::CountTrailingZerosOp>(
+        op.getLoc(), adaptor.getInput());
+    auto newCtz = createIntCast(rewriter, ctz, resTy);
+
+    auto one = getConst(rewriter, op.getLoc(), resTy, 1);
+    auto ctzAddOne =
+        rewriter.create<mlir::arith::AddIOp>(op.getLoc(), newCtz, one);
+
+    auto zeroInputTy = getConst(rewriter, op.getLoc(), inputTy, 0);
+    auto isZero = rewriter.create<mlir::arith::CmpIOp>(
+        op.getLoc(),
+        mlir::arith::CmpIPredicateAttr::get(rewriter.getContext(),
+                                            mlir::arith::CmpIPredicate::eq),
+        adaptor.getInput(), zeroInputTy);
+
+    auto zeroResTy = getConst(rewriter, op.getLoc(), resTy, 0);
+    auto res = rewriter.create<mlir::arith::SelectOp>(op.getLoc(), isZero,
+                                                      zeroResTy, ctzAddOne);
+    rewriter.replaceOp(op, res);
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRBitParityOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::BitParityOp> {
+public:
+  using OpConversionPattern<mlir::cir::BitParityOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BitParityOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto resTy = getTypeConverter()->convertType(op.getType());
+    auto count =
+        rewriter.create<mlir::math::CtPopOp>(op.getLoc(), adaptor.getInput());
+    auto countMod2 = rewriter.create<mlir::arith::AndIOp>(
+        op.getLoc(), count,
+        getConst(rewriter, op.getLoc(), count.getType(), 1));
+    auto res = createIntCast(rewriter, countMod2, resTy);
+    rewriter.replaceOp(op, res);
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRConstantOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ConstantOp> {
+public:
+  using OpConversionPattern<mlir::cir::ConstantOp>::OpConversionPattern;
+
+private:
+  // This code is in a separate function rather than part of matchAndRewrite
+  // because it is recursive.  There is currently only one level of recursion;
+  // when lowing a vector attribute the attributes for the elements also need
+  // to be lowered.
+  mlir::TypedAttr
+  lowerCirAttrToMlirAttr(mlir::Attribute cirAttr,
+                         mlir::ConversionPatternRewriter &rewriter) const {
+    assert(mlir::isa<mlir::TypedAttr>(cirAttr) &&
+           "Can't lower a non-typed attribute");
+    auto mlirType = getTypeConverter()->convertType(
+        mlir::cast<mlir::TypedAttr>(cirAttr).getType());
+    if (auto vecAttr = mlir::dyn_cast<mlir::cir::ConstVectorAttr>(cirAttr)) {
+      assert(mlir::isa<mlir::VectorType>(mlirType) &&
+             "MLIR type for CIR vector attribute is not mlir::VectorType");
+      assert(mlir::isa<mlir::ShapedType>(mlirType) &&
+             "mlir::VectorType is not a mlir::ShapedType ??");
+      SmallVector<mlir::Attribute> mlirValues;
+      for (auto elementAttr : vecAttr.getElts()) {
+        mlirValues.push_back(
+            this->lowerCirAttrToMlirAttr(elementAttr, rewriter));
+      }
+      return mlir::DenseElementsAttr::get(
+          mlir::cast<mlir::ShapedType>(mlirType), mlirValues);
+    } else if (auto boolAttr = mlir::dyn_cast<mlir::cir::BoolAttr>(cirAttr)) {
+      return rewriter.getIntegerAttr(mlirType, boolAttr.getValue());
+    } else if (auto floatAttr = mlir::dyn_cast<mlir::cir::FPAttr>(cirAttr)) {
+      return rewriter.getFloatAttr(mlirType, floatAttr.getValue());
+    } else if (auto intAttr = mlir::dyn_cast<mlir::cir::IntAttr>(cirAttr)) {
+      return rewriter.getIntegerAttr(mlirType, intAttr.getValue());
+    } else {
+      llvm_unreachable("NYI: unsupported attribute kind lowering to MLIR");
+      return {};
+    }
+  }
+
+public:
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ConstantOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::arith::ConstantOp>(
+        op, getTypeConverter()->convertType(op.getType()),
+        this->lowerCirAttrToMlirAttr(op.getValue(), rewriter));
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRFuncOpLowering : public mlir::OpConversionPattern<mlir::cir::FuncOp> {
+public:
+  using OpConversionPattern<mlir::cir::FuncOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::FuncOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+
+    auto fnType = op.getFunctionType();
+    mlir::TypeConverter::SignatureConversion signatureConversion(
+        fnType.getNumInputs());
+
+    for (const auto &argType : enumerate(fnType.getInputs())) {
+      auto convertedType = typeConverter->convertType(argType.value());
+      if (!convertedType)
+        return mlir::failure();
+      signatureConversion.addInputs(argType.index(), convertedType);
+    }
+
+    mlir::Type resultType =
+        getTypeConverter()->convertType(fnType.getReturnType());
+    auto fn = rewriter.create<mlir::func::FuncOp>(
+        op.getLoc(), op.getName(),
+        rewriter.getFunctionType(signatureConversion.getConvertedTypes(),
+                                 resultType ? mlir::TypeRange(resultType)
+                                            : mlir::TypeRange()));
+
+    if (failed(rewriter.convertRegionTypes(&op.getBody(), *typeConverter,
+                                           &signatureConversion)))
+      return mlir::failure();
+    rewriter.inlineRegionBefore(op.getBody(), fn.getBody(), fn.end());
+
+    rewriter.eraseOp(op);
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRUnaryOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::UnaryOp> {
+public:
+  using OpConversionPattern<mlir::cir::UnaryOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::UnaryOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto input = adaptor.getInput();
+    auto type = getTypeConverter()->convertType(op.getType());
+
+    switch (op.getKind()) {
+    case mlir::cir::UnaryOpKind::Inc: {
+      auto One = rewriter.create<mlir::arith::ConstantOp>(
+          op.getLoc(), type, mlir::IntegerAttr::get(type, 1));
+      rewriter.replaceOpWithNewOp<mlir::arith::AddIOp>(op, type, input, One);
+      break;
+    }
+    case mlir::cir::UnaryOpKind::Dec: {
+      auto One = rewriter.create<mlir::arith::ConstantOp>(
+          op.getLoc(), type, mlir::IntegerAttr::get(type, 1));
+      rewriter.replaceOpWithNewOp<mlir::arith::SubIOp>(op, type, input, One);
+      break;
+    }
+    case mlir::cir::UnaryOpKind::Plus: {
+      rewriter.replaceOp(op, op.getInput());
+      break;
+    }
+    case mlir::cir::UnaryOpKind::Minus: {
+      auto Zero = rewriter.create<mlir::arith::ConstantOp>(
+          op.getLoc(), type, mlir::IntegerAttr::get(type, 0));
+      rewriter.replaceOpWithNewOp<mlir::arith::SubIOp>(op, type, Zero, input);
+      break;
+    }
+    case mlir::cir::UnaryOpKind::Not: {
+      auto MinusOne = rewriter.create<mlir::arith::ConstantOp>(
+          op.getLoc(), type, mlir::IntegerAttr::get(type, -1));
+      rewriter.replaceOpWithNewOp<mlir::arith::XOrIOp>(op, type, MinusOne,
+                                                       input);
+      break;
+    }
+    }
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRBinOpLowering : public mlir::OpConversionPattern<mlir::cir::BinOp> {
+public:
+  using OpConversionPattern<mlir::cir::BinOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BinOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    assert((adaptor.getLhs().getType() == adaptor.getRhs().getType()) &&
+           "inconsistent operands' types not supported yet");
+    mlir::Type mlirType = getTypeConverter()->convertType(op.getType());
+    assert((mlir::isa<mlir::IntegerType>(mlirType) ||
+            mlir::isa<mlir::FloatType>(mlirType) ||
+            mlir::isa<mlir::VectorType>(mlirType)) &&
+           "operand type not supported yet");
+
+    auto type = op.getLhs().getType();
+    if (auto VecType = mlir::dyn_cast<mlir::cir::VectorType>(type)) {
+      type = VecType.getEltType();
+    }
+
+    switch (op.getKind()) {
+    case mlir::cir::BinOpKind::Add:
+      if (mlir::isa<mlir::cir::IntType>(type))
+        rewriter.replaceOpWithNewOp<mlir::arith::AddIOp>(
+            op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      else
+        rewriter.replaceOpWithNewOp<mlir::arith::AddFOp>(
+            op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case mlir::cir::BinOpKind::Sub:
+      if (mlir::isa<mlir::cir::IntType>(type))
+        rewriter.replaceOpWithNewOp<mlir::arith::SubIOp>(
+            op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      else
+        rewriter.replaceOpWithNewOp<mlir::arith::SubFOp>(
+            op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case mlir::cir::BinOpKind::Mul:
+      if (mlir::isa<mlir::cir::IntType>(type))
+        rewriter.replaceOpWithNewOp<mlir::arith::MulIOp>(
+            op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      else
+        rewriter.replaceOpWithNewOp<mlir::arith::MulFOp>(
+            op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case mlir::cir::BinOpKind::Div:
+      if (auto ty = mlir::dyn_cast<mlir::cir::IntType>(type)) {
+        if (ty.isUnsigned())
+          rewriter.replaceOpWithNewOp<mlir::arith::DivUIOp>(
+              op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+        else
+          rewriter.replaceOpWithNewOp<mlir::arith::DivSIOp>(
+              op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      } else
+        rewriter.replaceOpWithNewOp<mlir::arith::DivFOp>(
+            op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case mlir::cir::BinOpKind::Rem:
+      if (auto ty = mlir::dyn_cast<mlir::cir::IntType>(type)) {
+        if (ty.isUnsigned())
+          rewriter.replaceOpWithNewOp<mlir::arith::RemUIOp>(
+              op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+        else
+          rewriter.replaceOpWithNewOp<mlir::arith::RemSIOp>(
+              op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      } else
+        rewriter.replaceOpWithNewOp<mlir::arith::RemFOp>(
+            op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case mlir::cir::BinOpKind::And:
+      rewriter.replaceOpWithNewOp<mlir::arith::AndIOp>(
+          op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case mlir::cir::BinOpKind::Or:
+      rewriter.replaceOpWithNewOp<mlir::arith::OrIOp>(
+          op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    case mlir::cir::BinOpKind::Xor:
+      rewriter.replaceOpWithNewOp<mlir::arith::XOrIOp>(
+          op, mlirType, adaptor.getLhs(), adaptor.getRhs());
+      break;
+    }
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRCmpOpLowering : public mlir::OpConversionPattern<mlir::cir::CmpOp> {
+public:
+  using OpConversionPattern<mlir::cir::CmpOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::CmpOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto type = op.getLhs().getType();
+
+    mlir::Value mlirResult;
+
+    if (auto ty = mlir::dyn_cast<mlir::cir::IntType>(type)) {
+      auto kind = convertCmpKindToCmpIPredicate(op.getKind(), ty.isSigned());
+      mlirResult = rewriter.create<mlir::arith::CmpIOp>(
+          op.getLoc(), kind, adaptor.getLhs(), adaptor.getRhs());
+    } else if (auto ty = mlir::dyn_cast<mlir::cir::CIRFPTypeInterface>(type)) {
+      auto kind = convertCmpKindToCmpFPredicate(op.getKind());
+      mlirResult = rewriter.create<mlir::arith::CmpFOp>(
+          op.getLoc(), kind, adaptor.getLhs(), adaptor.getRhs());
+    } else if (auto ty = mlir::dyn_cast<mlir::cir::PointerType>(type)) {
+      llvm_unreachable("pointer comparison not supported yet");
+    } else {
+      return op.emitError() << "unsupported type for CmpOp: " << type;
+    }
+
+    // MLIR comparison ops return i1, but cir::CmpOp returns the same type as
+    // the LHS value. Since this return value can be used later, we need to
+    // restore the type with the extension below.
+    auto mlirResultTy = getTypeConverter()->convertType(op.getType());
+    rewriter.replaceOpWithNewOp<mlir::arith::ExtUIOp>(op, mlirResultTy,
+                                                      mlirResult);
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRBrOpLowering : public mlir::OpRewritePattern<mlir::cir::BrOp> {
+public:
+  using OpRewritePattern<mlir::cir::BrOp>::OpRewritePattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BrOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::cf::BranchOp>(op, op.getDest());
+    return mlir::LogicalResult::success();
+  }
+};
+
+class CIRScopeOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::ScopeOp> {
+  using mlir::OpConversionPattern<mlir::cir::ScopeOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::ScopeOp scopeOp, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // Empty scope: just remove it.
+    if (scopeOp.getRegion().empty()) {
+      rewriter.eraseOp(scopeOp);
+      return mlir::success();
+    }
+
+    for (auto &block : scopeOp.getRegion()) {
+      rewriter.setInsertionPointToEnd(&block);
+      auto *terminator = block.getTerminator();
+      rewriter.replaceOpWithNewOp<mlir::memref::AllocaScopeReturnOp>(
+          terminator, terminator->getOperands());
+    }
+
+    SmallVector<mlir::Type> mlirResultTypes;
+    if (mlir::failed(getTypeConverter()->convertTypes(scopeOp->getResultTypes(),
+                                                      mlirResultTypes)))
+      return mlir::LogicalResult::failure();
+    rewriter.setInsertionPoint(scopeOp);
+    auto newScopeOp = rewriter.create<mlir::memref::AllocaScopeOp>(
+        scopeOp.getLoc(), mlirResultTypes);
+    rewriter.inlineRegionBefore(scopeOp.getScopeRegion(),
+                                newScopeOp.getBodyRegion(),
+                                newScopeOp.getBodyRegion().end());
+    rewriter.replaceOp(scopeOp, newScopeOp);
+
+    return mlir::LogicalResult::success();
+  }
+};
+
+struct CIRBrCondOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::BrCondOp> {
+  using mlir::OpConversionPattern<mlir::cir::BrCondOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::BrCondOp brOp, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+
+    auto condition = adaptor.getCond();
+    auto i1Condition = rewriter.create<mlir::arith::TruncIOp>(
+        brOp.getLoc(), rewriter.getI1Type(), condition);
+    rewriter.replaceOpWithNewOp<mlir::cf::CondBranchOp>(
+        brOp, i1Condition.getResult(), brOp.getDestTrue(),
+        adaptor.getDestOperandsTrue(), brOp.getDestFalse(),
+        adaptor.getDestOperandsFalse());
+
+    return mlir::success();
+  }
+};
+
+class CIRTernaryOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::TernaryOp> {
+public:
+  using OpConversionPattern<mlir::cir::TernaryOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::TernaryOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.setInsertionPoint(op);
+    auto condition = adaptor.getCond();
+    auto i1Condition = rewriter.create<mlir::arith::TruncIOp>(
+        op.getLoc(), rewriter.getI1Type(), condition);
+    SmallVector<mlir::Type> resultTypes;
+    if (mlir::failed(getTypeConverter()->convertTypes(op->getResultTypes(),
+                                                      resultTypes)))
+      return mlir::failure();
+
+    auto ifOp = rewriter.create<mlir::scf::IfOp>(op.getLoc(), resultTypes,
+                                                 i1Condition.getResult(), true);
+    auto *thenBlock = &ifOp.getThenRegion().front();
+    auto *elseBlock = &ifOp.getElseRegion().front();
+    rewriter.inlineBlockBefore(&op.getTrueRegion().front(), thenBlock,
+                               thenBlock->end());
+    rewriter.inlineBlockBefore(&op.getFalseRegion().front(), elseBlock,
+                               elseBlock->end());
+
+    rewriter.replaceOp(op, ifOp);
+    return mlir::success();
+  }
+};
+
+class CIRYieldOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::YieldOp> {
+public:
+  using OpConversionPattern<mlir::cir::YieldOp>::OpConversionPattern;
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::YieldOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto *parentOp = op->getParentOp();
+    return llvm::TypeSwitch<mlir::Operation *, mlir::LogicalResult>(parentOp)
+        .Case<mlir::scf::IfOp, mlir::scf::ForOp, mlir::scf::WhileOp>([&](auto) {
+          rewriter.replaceOpWithNewOp<mlir::scf::YieldOp>(
+              op, adaptor.getOperands());
+          return mlir::success();
+        })
+        .Default([](auto) { return mlir::failure(); });
+  }
+};
+
+class CIRIfOpLowering : public mlir::OpConversionPattern<mlir::cir::IfOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::IfOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::IfOp ifop, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto condition = adaptor.getCondition();
+    auto i1Condition = rewriter.create<mlir::arith::TruncIOp>(
+        ifop->getLoc(), rewriter.getI1Type(), condition);
+    auto newIfOp = rewriter.create<mlir::scf::IfOp>(
+        ifop->getLoc(), ifop->getResultTypes(), i1Condition);
+    auto *thenBlock = rewriter.createBlock(&newIfOp.getThenRegion());
+    rewriter.inlineBlockBefore(&ifop.getThenRegion().front(), thenBlock,
+                               thenBlock->end());
+    if (!ifop.getElseRegion().empty()) {
+      auto *elseBlock = rewriter.createBlock(&newIfOp.getElseRegion());
+      rewriter.inlineBlockBefore(&ifop.getElseRegion().front(), elseBlock,
+                                 elseBlock->end());
+    }
+    rewriter.replaceOp(ifop, newIfOp);
+    return mlir::success();
+  }
+};
+
+class CIRGlobalOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::GlobalOp> {
+public:
+  using OpConversionPattern<mlir::cir::GlobalOp>::OpConversionPattern;
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::GlobalOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto moduleOp = op->getParentOfType<mlir::ModuleOp>();
+    if (!moduleOp)
+      return mlir::failure();
+
+    mlir::OpBuilder b(moduleOp.getContext());
+
+    const auto CIRSymType = op.getSymType();
+    auto convertedType = getTypeConverter()->convertType(CIRSymType);
+    if (!convertedType)
+      return mlir::failure();
+    auto memrefType = dyn_cast<mlir::MemRefType>(convertedType);
+    if (!memrefType)
+      memrefType = mlir::MemRefType::get({}, convertedType);
+    // Add an optional alignment to the global memref.
+    mlir::IntegerAttr memrefAlignment =
+        op.getAlignment()
+            ? mlir::IntegerAttr::get(b.getI64Type(), op.getAlignment().value())
+            : mlir::IntegerAttr();
+    // Add an optional initial value to the global memref.
+    mlir::Attribute initialValue = mlir::Attribute();
+    std::optional<mlir::Attribute> init = op.getInitialValue();
+    if (init.has_value()) {
+      if (auto constArr =
+              mlir::dyn_cast<mlir::cir::ConstArrayAttr>(init.value())) {
+        init = lowerConstArrayAttr(constArr, getTypeConverter());
+        if (init.has_value())
+          initialValue = init.value();
+        else
+          llvm_unreachable("GlobalOp lowering array with initial value fail");
+      } else if (auto constArr =
+                     mlir::dyn_cast<mlir::cir::ZeroAttr>(init.value())) {
+        if (memrefType.getShape().size()) {
+          auto elementType = memrefType.getElementType();
+          auto rtt =
+              mlir::RankedTensorType::get(memrefType.getShape(), elementType);
+          if (mlir::isa<mlir::IntegerType>(elementType))
+            initialValue = mlir::DenseIntElementsAttr::get(rtt, 0);
+          else if (mlir::isa<mlir::FloatType>(elementType)) {
+            auto floatZero = mlir::FloatAttr::get(elementType, 0.0).getValue();
+            initialValue = mlir::DenseFPElementsAttr::get(rtt, floatZero);
+          } else
+            llvm_unreachable("GlobalOp lowering unsuppored element type");
+        } else {
+          auto rtt = mlir::RankedTensorType::get({}, convertedType);
+          if (mlir::isa<mlir::IntegerType>(convertedType))
+            initialValue = mlir::DenseIntElementsAttr::get(rtt, 0);
+          else if (mlir::isa<mlir::FloatType>(convertedType)) {
+            auto floatZero =
+                mlir::FloatAttr::get(convertedType, 0.0).getValue();
+            initialValue = mlir::DenseFPElementsAttr::get(rtt, floatZero);
+          } else
+            llvm_unreachable("GlobalOp lowering unsuppored type");
+        }
+      } else if (auto intAttr =
+                     mlir::dyn_cast<mlir::cir::IntAttr>(init.value())) {
+        auto rtt = mlir::RankedTensorType::get({}, convertedType);
+        initialValue = mlir::DenseIntElementsAttr::get(rtt, intAttr.getValue());
+      } else if (auto fltAttr =
+                     mlir::dyn_cast<mlir::cir::FPAttr>(init.value())) {
+        auto rtt = mlir::RankedTensorType::get({}, convertedType);
+        initialValue = mlir::DenseFPElementsAttr::get(rtt, fltAttr.getValue());
+      } else if (auto boolAttr =
+                     mlir::dyn_cast<mlir::cir::BoolAttr>(init.value())) {
+        auto rtt = mlir::RankedTensorType::get({}, convertedType);
+        initialValue =
+            mlir::DenseIntElementsAttr::get(rtt, (char)boolAttr.getValue());
+      } else
+        llvm_unreachable(
+            "GlobalOp lowering with initial value is not fully supported yet");
+    }
+
+    // Add symbol visibility
+    std::string sym_visibility = op.isPrivate() ? "private" : "public";
+
+    rewriter.replaceOpWithNewOp<mlir::memref::GlobalOp>(
+        op, b.getStringAttr(op.getSymName()),
+        /*sym_visibility=*/b.getStringAttr(sym_visibility),
+        /*type=*/memrefType, initialValue,
+        /*constant=*/op.getConstant(),
+        /*alignment=*/memrefAlignment);
+
+    return mlir::success();
+  }
+};
+
+class CIRGetGlobalOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::GetGlobalOp> {
+public:
+  using OpConversionPattern<mlir::cir::GetGlobalOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::GetGlobalOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    // FIXME(cir): Premature DCE to avoid lowering stuff we're not using.
+    // CIRGen should mitigate this and not emit the get_global.
+    if (op->getUses().empty()) {
+      rewriter.eraseOp(op);
+      return mlir::success();
+    }
+
+    auto type = getTypeConverter()->convertType(op.getType());
+    auto symbol = op.getName();
+    rewriter.replaceOpWithNewOp<mlir::memref::GetGlobalOp>(op, type, symbol);
+    return mlir::success();
+  }
+};
+
+class CIRVectorCreateLowering
+    : public mlir::OpConversionPattern<mlir::cir::VecCreateOp> {
+public:
+  using OpConversionPattern<mlir::cir::VecCreateOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VecCreateOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    auto vecTy = mlir::dyn_cast<mlir::cir::VectorType>(op.getType());
+    assert(vecTy && "result type of cir.vec.create op is not VectorType");
+    auto elementTy = typeConverter->convertType(vecTy.getEltType());
+    auto loc = op.getLoc();
+    auto zeroElement = rewriter.getZeroAttr(elementTy);
+    mlir::Value result = rewriter.create<mlir::arith::ConstantOp>(
+        loc,
+        mlir::DenseElementsAttr::get(
+            mlir::VectorType::get(vecTy.getSize(), elementTy), zeroElement));
+    assert(vecTy.getSize() == op.getElements().size() &&
+           "cir.vec.create op count doesn't match vector type elements count");
+    for (uint64_t i = 0; i < vecTy.getSize(); ++i) {
+      mlir::Value indexValue =
+          getConst(rewriter, loc, rewriter.getI64Type(), i);
+      result = rewriter.create<mlir::vector::InsertElementOp>(
+          loc, adaptor.getElements()[i], result, indexValue);
+    }
+    rewriter.replaceOp(op, result);
+    return mlir::success();
+  }
+};
+
+class CIRVectorInsertLowering
+    : public mlir::OpConversionPattern<mlir::cir::VecInsertOp> {
+public:
+  using OpConversionPattern<mlir::cir::VecInsertOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VecInsertOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::vector::InsertElementOp>(
+        op, adaptor.getValue(), adaptor.getVec(), adaptor.getIndex());
+    return mlir::success();
+  }
+};
+
+class CIRVectorExtractLowering
+    : public mlir::OpConversionPattern<mlir::cir::VecExtractOp> {
+public:
+  using OpConversionPattern<mlir::cir::VecExtractOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VecExtractOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<mlir::vector::ExtractElementOp>(
+        op, adaptor.getVec(), adaptor.getIndex());
+    return mlir::success();
+  }
+};
+
+class CIRVectorCmpOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::VecCmpOp> {
+public:
+  using OpConversionPattern<mlir::cir::VecCmpOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::VecCmpOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    assert(mlir::isa<mlir::cir::VectorType>(op.getType()) &&
+           mlir::isa<mlir::cir::VectorType>(op.getLhs().getType()) &&
+           mlir::isa<mlir::cir::VectorType>(op.getRhs().getType()) &&
+           "Vector compare with non-vector type");
+    auto elementType =
+        mlir::cast<mlir::cir::VectorType>(op.getLhs().getType()).getEltType();
+    mlir::Value bitResult;
+    if (auto intType = mlir::dyn_cast<mlir::cir::IntType>(elementType)) {
+      bitResult = rewriter.create<mlir::arith::CmpIOp>(
+          op.getLoc(),
+          convertCmpKindToCmpIPredicate(op.getKind(), intType.isSigned()),
+          adaptor.getLhs(), adaptor.getRhs());
+    } else if (mlir::isa<mlir::cir::CIRFPTypeInterface>(elementType)) {
+      bitResult = rewriter.create<mlir::arith::CmpFOp>(
+          op.getLoc(), convertCmpKindToCmpFPredicate(op.getKind()),
+          adaptor.getLhs(), adaptor.getRhs());
+    } else {
+      return op.emitError() << "unsupported type for VecCmpOp: " << elementType;
+    }
+    rewriter.replaceOpWithNewOp<mlir::arith::ExtSIOp>(
+        op, typeConverter->convertType(op.getType()), bitResult);
+    return mlir::success();
+  }
+};
+
+class CIRCastOpLowering : public mlir::OpConversionPattern<mlir::cir::CastOp> {
+public:
+  using OpConversionPattern<mlir::cir::CastOp>::OpConversionPattern;
+
+  inline mlir::Type convertTy(mlir::Type ty) const {
+    return getTypeConverter()->convertType(ty);
+  }
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::CastOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    if (isa<mlir::cir::VectorType>(op.getSrc().getType()))
+      llvm_unreachable("CastOp lowering for vector type is not supported yet");
+    auto src = adaptor.getSrc();
+    auto dstType = op.getResult().getType();
+    using CIR = mlir::cir::CastKind;
+    switch (op.getKind()) {
+    case CIR::array_to_ptrdecay: {
+      auto newDstType = mlir::cast<mlir::MemRefType>(convertTy(dstType));
+      rewriter.replaceOpWithNewOp<mlir::memref::ReinterpretCastOp>(
+          op, newDstType, src, 0, std::nullopt, std::nullopt);
+      return mlir::success();
+    }
+    case CIR::int_to_bool: {
+      auto zero = rewriter.create<mlir::cir::ConstantOp>(
+          src.getLoc(), op.getSrc().getType(),
+          mlir::cir::IntAttr::get(op.getSrc().getType(), 0));
+      rewriter.replaceOpWithNewOp<mlir::cir::CmpOp>(
+          op, mlir::cir::BoolType::get(getContext()), mlir::cir::CmpOpKind::ne,
+          op.getSrc(), zero);
+      return mlir::success();
+    }
+    case CIR::integral: {
+      auto newDstType = convertTy(dstType);
+      auto srcType = op.getSrc().getType();
+      mlir::cir::IntType srcIntType = mlir::cast<mlir::cir::IntType>(srcType);
+      auto newOp =
+          createIntCast(rewriter, src, newDstType, srcIntType.isSigned());
+      rewriter.replaceOp(op, newOp);
+      return mlir::success();
+    }
+    case CIR::floating: {
+      auto newDstType = convertTy(dstType);
+      auto srcTy = op.getSrc().getType();
+      auto dstTy = op.getResult().getType();
+
+      if (!mlir::isa<mlir::cir::CIRFPTypeInterface>(dstTy) ||
+          !mlir::isa<mlir::cir::CIRFPTypeInterface>(srcTy))
+        return op.emitError() << "NYI cast from " << srcTy << " to " << dstTy;
+
+      auto getFloatWidth = [](mlir::Type ty) -> unsigned {
+        return mlir::cast<mlir::cir::CIRFPTypeInterface>(ty).getWidth();
+      };
+
+      if (getFloatWidth(srcTy) > getFloatWidth(dstTy))
+        rewriter.replaceOpWithNewOp<mlir::arith::TruncFOp>(op, newDstType, src);
+      else
+        rewriter.replaceOpWithNewOp<mlir::arith::ExtFOp>(op, newDstType, src);
+      return mlir::success();
+    }
+    case CIR::float_to_bool: {
+      auto dstTy = mlir::cast<mlir::cir::BoolType>(op.getType());
+      auto newDstType = convertTy(dstTy);
+      auto kind = mlir::arith::CmpFPredicate::UNE;
+
+      // Check if float is not equal to zero.
+      auto zeroFloat = rewriter.create<mlir::arith::ConstantOp>(
+          op.getLoc(), src.getType(), mlir::FloatAttr::get(src.getType(), 0.0));
+
+      // Extend comparison result to either bool (C++) or int (C).
+      mlir::Value cmpResult = rewriter.create<mlir::arith::CmpFOp>(
+          op.getLoc(), kind, src, zeroFloat);
+      rewriter.replaceOpWithNewOp<mlir::arith::ExtUIOp>(op, newDstType,
+                                                        cmpResult);
+      return mlir::success();
+    }
+    case CIR::bool_to_int: {
+      auto dstTy = mlir::cast<mlir::cir::IntType>(op.getType());
+      auto newDstType = mlir::cast<mlir::IntegerType>(convertTy(dstTy));
+      auto newOp = createIntCast(rewriter, src, newDstType);
+      rewriter.replaceOp(op, newOp);
+      return mlir::success();
+    }
+    case CIR::bool_to_float: {
+      auto dstTy = op.getType();
+      auto newDstType = convertTy(dstTy);
+      rewriter.replaceOpWithNewOp<mlir::arith::UIToFPOp>(op, newDstType, src);
+      return mlir::success();
+    }
+    case CIR::int_to_float: {
+      auto dstTy = op.getType();
+      auto newDstType = convertTy(dstTy);
+      if (mlir::cast<mlir::cir::IntType>(op.getSrc().getType()).isSigned())
+        rewriter.replaceOpWithNewOp<mlir::arith::SIToFPOp>(op, newDstType, src);
+      else
+        rewriter.replaceOpWithNewOp<mlir::arith::UIToFPOp>(op, newDstType, src);
+      return mlir::success();
+    }
+    case CIR::float_to_int: {
+      auto dstTy = op.getType();
+      auto newDstType = convertTy(dstTy);
+      if (mlir::cast<mlir::cir::IntType>(op.getResult().getType()).isSigned())
+        rewriter.replaceOpWithNewOp<mlir::arith::FPToSIOp>(op, newDstType, src);
+      else
+        rewriter.replaceOpWithNewOp<mlir::arith::FPToUIOp>(op, newDstType, src);
+      return mlir::success();
+    }
+    default:
+      break;
+    }
+    return mlir::failure();
+  }
+};
+
+class CIRPtrStrideOpLowering
+    : public mlir::OpConversionPattern<mlir::cir::PtrStrideOp> {
+public:
+  using mlir::OpConversionPattern<mlir::cir::PtrStrideOp>::OpConversionPattern;
+
+  // Return true if PtrStrideOp is produced by cast with array_to_ptrdecay kind
+  // and they are in the same block.
+  inline bool isCastArrayToPtrConsumer(mlir::cir::PtrStrideOp op) const {
+    auto defOp = op->getOperand(0).getDefiningOp();
+    if (!defOp)
+      return false;
+    auto castOp = dyn_cast<mlir::cir::CastOp>(defOp);
+    if (!castOp)
+      return false;
+    if (castOp.getKind() != mlir::cir::CastKind::array_to_ptrdecay)
+      return false;
+    if (!castOp->hasOneUse())
+      return false;
+    if (!castOp->isBeforeInBlock(op))
+      return false;
+    return true;
+  }
+
+  // Return true if all the PtrStrideOp users are load, store or cast
+  // with array_to_ptrdecay kind and they are in the same block.
+  inline bool
+  isLoadStoreOrCastArrayToPtrProduer(mlir::cir::PtrStrideOp op) const {
+    if (op.use_empty())
+      return false;
+    for (auto *user : op->getUsers()) {
+      if (!op->isBeforeInBlock(user))
+        return false;
+      if (isa<mlir::cir::LoadOp>(*user) || isa<mlir::cir::StoreOp>(*user))
+        continue;
+      auto castOp = dyn_cast<mlir::cir::CastOp>(*user);
+      if (castOp &&
+          (castOp.getKind() == mlir::cir::CastKind::array_to_ptrdecay))
+        continue;
+      return false;
+    }
+    return true;
+  }
+
+  inline mlir::Type convertTy(mlir::Type ty) const {
+    return getTypeConverter()->convertType(ty);
+  }
+
+  // Rewrite
+  //        %0 = cir.cast(array_to_ptrdecay, %base)
+  //        cir.ptr_stride(%0, %stride)
+  // to
+  //        memref.reinterpret_cast (%base, %stride)
+  //
+  // MemRef Dialect doesn't have GEP-like operation. memref.reinterpret_cast
+  // only been used to propogate %base and %stride to memref.load/store and
+  // should be erased after the conversion.
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::PtrStrideOp op, OpAdaptor adaptor,
+                  mlir::ConversionPatternRewriter &rewriter) const override {
+    if (!isCastArrayToPtrConsumer(op))
+      return mlir::failure();
+    if (!isLoadStoreOrCastArrayToPtrProduer(op))
+      return mlir::failure();
+    auto baseOp = adaptor.getBase().getDefiningOp();
+    if (!baseOp)
+      return mlir::failure();
+    if (!isa<mlir::memref::ReinterpretCastOp>(baseOp))
+      return mlir::failure();
+    auto base = baseOp->getOperand(0);
+    auto dstType = op.getResult().getType();
+    auto newDstType = mlir::cast<mlir::MemRefType>(convertTy(dstType));
+    auto stride = adaptor.getStride();
+    auto indexType = rewriter.getIndexType();
+    // Generate casting if the stride is not index type.
+    if (stride.getType() != indexType)
+      stride = rewriter.create<mlir::arith::IndexCastOp>(op.getLoc(), indexType,
+                                                         stride);
+    rewriter.replaceOpWithNewOp<mlir::memref::ReinterpretCastOp>(
+        op, newDstType, base, stride, std::nullopt, std::nullopt);
+    rewriter.eraseOp(baseOp);
+    return mlir::success();
+  }
+};
+
+void populateCIRToMLIRConversionPatterns(mlir::RewritePatternSet &patterns,
+                                         mlir::TypeConverter &converter) {
+  patterns.add<CIRReturnLowering, CIRBrOpLowering>(patterns.getContext());
+
+  patterns.add<
+      CIRCmpOpLowering, CIRCallOpLowering, CIRUnaryOpLowering, CIRBinOpLowering,
+      CIRLoadOpLowering, CIRConstantOpLowering, CIRStoreOpLowering,
+      CIRAllocaOpLowering, CIRFuncOpLowering, CIRScopeOpLowering,
+      CIRBrCondOpLowering, CIRTernaryOpLowering, CIRYieldOpLowering,
+      CIRCosOpLowering, CIRGlobalOpLowering, CIRGetGlobalOpLowering,
+      CIRCastOpLowering, CIRPtrStrideOpLowering, CIRSqrtOpLowering,
+      CIRCeilOpLowering, CIRExp2OpLowering, CIRExpOpLowering, CIRFAbsOpLowering,
+      CIRFloorOpLowering, CIRLog10OpLowering, CIRLog2OpLowering,
+      CIRLogOpLowering, CIRRoundOpLowering, CIRPtrStrideOpLowering,
+      CIRSinOpLowering, CIRShiftOpLowering, CIRBitClzOpLowering,
+      CIRBitCtzOpLowering, CIRBitPopcountOpLowering, CIRBitClrsbOpLowering,
+      CIRBitFfsOpLowering, CIRBitParityOpLowering, CIRIfOpLowering,
+      CIRVectorCreateLowering, CIRVectorInsertLowering,
+      CIRVectorExtractLowering, CIRVectorCmpOpLowering>(converter,
+                                                        patterns.getContext());
+}
+
+static mlir::TypeConverter prepareTypeConverter() {
+  mlir::TypeConverter converter;
+  converter.addConversion([&](mlir::cir::PointerType type) -> mlir::Type {
+    auto ty = converter.convertType(type.getPointee());
+    // FIXME: The pointee type might not be converted (e.g. struct)
+    if (!ty)
+      return nullptr;
+    if (isa<mlir::cir::ArrayType>(type.getPointee()))
+      return ty;
+    return mlir::MemRefType::get({}, ty);
+  });
+  converter.addConversion(
+      [&](mlir::IntegerType type) -> mlir::Type { return type; });
+  converter.addConversion(
+      [&](mlir::FloatType type) -> mlir::Type { return type; });
+  converter.addConversion(
+      [&](mlir::cir::VoidType type) -> mlir::Type { return {}; });
+  converter.addConversion([&](mlir::cir::IntType type) -> mlir::Type {
+    // arith dialect ops doesn't take signed integer -- drop cir sign here
+    return mlir::IntegerType::get(
+        type.getContext(), type.getWidth(),
+        mlir::IntegerType::SignednessSemantics::Signless);
+  });
+  converter.addConversion([&](mlir::cir::BoolType type) -> mlir::Type {
+    return mlir::IntegerType::get(type.getContext(), 8);
+  });
+  converter.addConversion([&](mlir::cir::SingleType type) -> mlir::Type {
+    return mlir::FloatType::getF32(type.getContext());
+  });
+  converter.addConversion([&](mlir::cir::DoubleType type) -> mlir::Type {
+    return mlir::FloatType::getF64(type.getContext());
+  });
+  converter.addConversion([&](mlir::cir::FP80Type type) -> mlir::Type {
+    return mlir::FloatType::getF80(type.getContext());
+  });
+  converter.addConversion([&](mlir::cir::LongDoubleType type) -> mlir::Type {
+    return converter.convertType(type.getUnderlying());
+  });
+  converter.addConversion([&](mlir::cir::ArrayType type) -> mlir::Type {
+    SmallVector<int64_t> shape;
+    mlir::Type curType = type;
+    while (auto arrayType = dyn_cast<mlir::cir::ArrayType>(curType)) {
+      shape.push_back(arrayType.getSize());
+      curType = arrayType.getEltType();
+    }
+    auto elementType = converter.convertType(curType);
+    // FIXME: The element type might not be converted (e.g. struct)
+    if (!elementType)
+      return nullptr;
+    return mlir::MemRefType::get(shape, elementType);
+  });
+  converter.addConversion([&](mlir::cir::VectorType type) -> mlir::Type {
+    auto ty = converter.convertType(type.getEltType());
+    return mlir::VectorType::get(type.getSize(), ty);
+  });
+
+  return converter;
+}
+
+void ConvertCIRToMLIRPass::runOnOperation() {
+  auto module = getOperation();
+
+  auto converter = prepareTypeConverter();
+
+  mlir::RewritePatternSet patterns(&getContext());
+
+  populateCIRLoopToSCFConversionPatterns(patterns, converter);
+  populateCIRToMLIRConversionPatterns(patterns, converter);
+
+  mlir::ConversionTarget target(getContext());
+  target.addLegalOp<mlir::ModuleOp>();
+  target
+      .addLegalDialect<mlir::affine::AffineDialect, mlir::arith::ArithDialect,
+                       mlir::memref::MemRefDialect, mlir::func::FuncDialect,
+                       mlir::scf::SCFDialect, mlir::cf::ControlFlowDialect,
+                       mlir::math::MathDialect, mlir::vector::VectorDialect>();
+  target.addIllegalDialect<mlir::cir::CIRDialect>();
+
+  if (failed(applyPartialConversion(module, target, std::move(patterns))))
+    signalPassFailure();
+}
+
+std::unique_ptr<llvm::Module>
+lowerFromCIRToMLIRToLLVMIR(mlir::ModuleOp theModule,
+                           std::unique_ptr<mlir::MLIRContext> mlirCtx,
+                           LLVMContext &llvmCtx) {
+  mlir::PassManager pm(mlirCtx.get());
+
+  pm.addPass(createConvertCIRToMLIRPass());
+  pm.addPass(createConvertMLIRToLLVMPass());
+
+  auto result = !mlir::failed(pm.run(theModule));
+  if (!result)
+    report_fatal_error(
+        "The pass manager failed to lower CIR to LLVMIR dialect!");
+
+  // Now that we ran all the lowering passes, verify the final output.
+  if (theModule.verify().failed())
+    report_fatal_error("Verification of the final LLVMIR dialect failed!");
+
+  mlir::registerBuiltinDialectTranslation(*mlirCtx);
+  mlir::registerLLVMDialectTranslation(*mlirCtx);
+  mlir::registerOpenMPDialectTranslation(*mlirCtx);
+
+  auto llvmModule = mlir::translateModuleToLLVMIR(theModule, llvmCtx);
+
+  if (!llvmModule)
+    report_fatal_error("Lowering from LLVMIR dialect to llvm IR failed!");
+
+  return llvmModule;
+}
+
+std::unique_ptr<mlir::Pass> createConvertCIRToMLIRPass() {
+  return std::make_unique<ConvertCIRToMLIRPass>();
+}
+
+mlir::ModuleOp lowerFromCIRToMLIR(mlir::ModuleOp theModule,
+                                  mlir::MLIRContext *mlirCtx) {
+  mlir::PassManager pm(mlirCtx);
+
+  pm.addPass(createConvertCIRToMLIRPass());
+
+  auto result = !mlir::failed(pm.run(theModule));
+  if (!result)
+    report_fatal_error(
+        "The pass manager failed to lower CIR to LLVMIR dialect!");
+
+  // Now that we ran all the lowering passes, verify the final output.
+  if (theModule.verify().failed())
+    report_fatal_error("Verification of the final LLVMIR dialect failed!");
+
+  return theModule;
+}
+
+} // namespace cir
diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerMLIRToLLVM.cpp b/clang/lib/CIR/Lowering/ThroughMLIR/LowerMLIRToLLVM.cpp
new file mode 100644
index 000000000000..930ce1c12f68
--- /dev/null
+++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerMLIRToLLVM.cpp
@@ -0,0 +1,79 @@
+//====- LowerMLIRToCIR.cpp - Lowering from MLIR to CIR --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering of CIR-lowered MLIR operations to LLVMIR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Dialect/SCF/Transforms/Passes.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Export.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Passes.h"
+#include "llvm/ADT/Sequence.h"
+
+using namespace cir;
+using namespace llvm;
+
+namespace cir {
+struct ConvertMLIRToLLVMPass
+    : public mlir::PassWrapper<ConvertMLIRToLLVMPass,
+                               mlir::OperationPass<mlir::ModuleOp>> {
+  void getDependentDialects(mlir::DialectRegistry &registry) const override {
+    registry.insert<mlir::LLVM::LLVMDialect>();
+  }
+  void runOnOperation() final;
+
+  virtual StringRef getArgument() const override { return "cir-mlir-to-llvm"; }
+};
+
+void ConvertMLIRToLLVMPass::runOnOperation() {
+  mlir::LLVMConversionTarget target(getContext());
+  target.addLegalOp<mlir::ModuleOp>();
+
+  mlir::LLVMTypeConverter typeConverter(&getContext());
+
+  mlir::RewritePatternSet patterns(&getContext());
+  populateAffineToStdConversionPatterns(patterns);
+  mlir::arith::populateArithToLLVMConversionPatterns(typeConverter, patterns);
+  populateSCFToControlFlowConversionPatterns(patterns);
+  mlir::cf::populateControlFlowToLLVMConversionPatterns(typeConverter,
+                                                        patterns);
+  populateFinalizeMemRefToLLVMConversionPatterns(typeConverter, patterns);
+  populateFuncToLLVMConversionPatterns(typeConverter, patterns);
+
+  auto module = getOperation();
+  if (failed(applyFullConversion(module, target, std::move(patterns))))
+    signalPassFailure();
+}
+
+std::unique_ptr<mlir::Pass> createConvertMLIRToLLVMPass() {
+  return std::make_unique<ConvertMLIRToLLVMPass>();
+}
+
+} // namespace cir
diff --git a/clang/lib/CIR/Lowering/ThroughMLIR/LowerToMLIRHelpers.h b/clang/lib/CIR/Lowering/ThroughMLIR/LowerToMLIRHelpers.h
new file mode 100644
index 000000000000..46d2bd7fc2a1
--- /dev/null
+++ b/clang/lib/CIR/Lowering/ThroughMLIR/LowerToMLIRHelpers.h
@@ -0,0 +1,83 @@
+#ifndef LLVM_CLANG_LIB_LOWERTOMLIRHELPERS_H
+#define LLVM_CLANG_LIB_LOWERTOMLIRHELPERS_H
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+
+template <typename T>
+mlir::Value getConst(mlir::ConversionPatternRewriter &rewriter,
+                     mlir::Location loc, mlir::Type ty, T value) {
+  assert(mlir::isa<mlir::IntegerType>(ty) || mlir::isa<mlir::FloatType>(ty));
+  if (mlir::isa<mlir::IntegerType>(ty))
+    return rewriter.create<mlir::arith::ConstantOp>(
+        loc, ty, mlir::IntegerAttr::get(ty, value));
+  return rewriter.create<mlir::arith::ConstantOp>(
+      loc, ty, mlir::FloatAttr::get(ty, value));
+}
+
+mlir::Value createIntCast(mlir::ConversionPatternRewriter &rewriter,
+                          mlir::Value src, mlir::Type dstTy,
+                          bool isSigned = false) {
+  auto srcTy = src.getType();
+  assert(mlir::isa<mlir::IntegerType>(srcTy));
+  assert(mlir::isa<mlir::IntegerType>(dstTy));
+
+  auto srcWidth = mlir::cast<mlir::IntegerType>(srcTy).getWidth();
+  auto dstWidth = mlir::cast<mlir::IntegerType>(dstTy).getWidth();
+  auto loc = src.getLoc();
+
+  if (dstWidth > srcWidth && isSigned)
+    return rewriter.create<mlir::arith::ExtSIOp>(loc, dstTy, src);
+  else if (dstWidth > srcWidth)
+    return rewriter.create<mlir::arith::ExtUIOp>(loc, dstTy, src);
+  else if (dstWidth < srcWidth)
+    return rewriter.create<mlir::arith::TruncIOp>(loc, dstTy, src);
+  else
+    return rewriter.create<mlir::arith::BitcastOp>(loc, dstTy, src);
+}
+
+mlir::arith::CmpIPredicate
+convertCmpKindToCmpIPredicate(mlir::cir::CmpOpKind kind, bool isSigned) {
+  using CIR = mlir::cir::CmpOpKind;
+  using arithCmpI = mlir::arith::CmpIPredicate;
+  switch (kind) {
+  case CIR::eq:
+    return arithCmpI::eq;
+  case CIR::ne:
+    return arithCmpI::ne;
+  case CIR::lt:
+    return (isSigned ? arithCmpI::slt : arithCmpI::ult);
+  case CIR::le:
+    return (isSigned ? arithCmpI::sle : arithCmpI::ule);
+  case CIR::gt:
+    return (isSigned ? arithCmpI::sgt : arithCmpI::ugt);
+  case CIR::ge:
+    return (isSigned ? arithCmpI::sge : arithCmpI::uge);
+  }
+  llvm_unreachable("Unknown CmpOpKind");
+}
+
+mlir::arith::CmpFPredicate
+convertCmpKindToCmpFPredicate(mlir::cir::CmpOpKind kind) {
+  using CIR = mlir::cir::CmpOpKind;
+  using arithCmpF = mlir::arith::CmpFPredicate;
+  switch (kind) {
+  case CIR::eq:
+    return arithCmpF::OEQ;
+  case CIR::ne:
+    return arithCmpF::UNE;
+  case CIR::lt:
+    return arithCmpF::OLT;
+  case CIR::le:
+    return arithCmpF::OLE;
+  case CIR::gt:
+    return arithCmpF::OGT;
+  case CIR::ge:
+    return arithCmpF::OGE;
+  }
+  llvm_unreachable("Unknown CmpOpKind");
+}
+
+#endif
\ No newline at end of file
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 67bf0604acd6..567d710ee89e 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -377,6 +377,7 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
              (PhaseArg = DAL.getLastArg(options::OPT__migrate)) ||
              (PhaseArg = DAL.getLastArg(options::OPT__analyze)) ||
              (PhaseArg = DAL.getLastArg(options::OPT_emit_cir)) ||
+             (PhaseArg = DAL.getLastArg(options::OPT_emit_cir_flat)) ||
              (PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) {
     FinalPhase = phases::Compile;
 
@@ -4817,6 +4818,8 @@ Action *Driver::ConstructPhaseAction(
       return C.MakeAction<CompileJobAction>(Input, types::TY_AST);
     if (Args.hasArg(options::OPT_emit_cir))
       return C.MakeAction<CompileJobAction>(Input, types::TY_CIR);
+    if (Args.hasArg(options::OPT_emit_cir_flat))
+      return C.MakeAction<CompileJobAction>(Input, types::TY_CIR_FLAT);
     if (Args.hasArg(options::OPT_module_file_info))
       return C.MakeAction<CompileJobAction>(Input, types::TY_ModuleFile);
     if (Args.hasArg(options::OPT_verify_pch))
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 2ce9e2f4bcfc..3e6cc7c9e8dd 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4967,6 +4967,40 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
+  if (Args.hasArg(options::OPT_fclangir) ||
+      Args.hasArg(options::OPT_emit_cir) ||
+      Args.hasArg(options::OPT_emit_cir_flat))
+    CmdArgs.push_back("-fclangir");
+
+  Args.addOptOutFlag(CmdArgs, options::OPT_fclangir_direct_lowering,
+                     options::OPT_fno_clangir_direct_lowering);
+
+  if (Args.hasArg(options::OPT_clangir_disable_passes))
+    CmdArgs.push_back("-clangir-disable-passes");
+
+  if (Args.hasArg(options::OPT_fclangir_call_conv_lowering))
+    CmdArgs.push_back("-fclangir-call-conv-lowering");
+
+  if (Args.hasArg(options::OPT_fclangir_mem2reg))
+    CmdArgs.push_back("-fclangir-mem2reg");
+
+  // ClangIR lib opt requires idiom recognizer.
+  if (Args.hasArg(options::OPT_fclangir_lib_opt,
+                  options::OPT_fclangir_lib_opt_EQ)) {
+    if (!Args.hasArg(options::OPT_fclangir_idiom_recognizer,
+                     options::OPT_fclangir_idiom_recognizer_EQ))
+      CmdArgs.push_back("-fclangir-idiom-recognizer");
+  }
+
+  if (Args.hasArg(options::OPT_fclangir_analysis_only)) {
+    CmdArgs.push_back("-fclangir-analysis-only");
+
+    // TODO: We should pass some default analysis configuration here.
+
+    // TODO2: Should we emit some diagnostics if the configurations conflict
+    // with each other?
+  }
+
   if (IsOpenMPDevice) {
     // We have to pass the triple of the host if compiling for an OpenMP device.
     std::string NormalizedTriple =
@@ -5105,6 +5139,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     } else if (JA.getType() == types::TY_LLVM_IR ||
                JA.getType() == types::TY_LTO_IR) {
       CmdArgs.push_back("-emit-llvm");
+    } else if (JA.getType() == types::TY_CIR) {
+      CmdArgs.push_back("-emit-cir");
+    } else if (JA.getType() == types::TY_CIR_FLAT) {
+      CmdArgs.push_back("-emit-cir-flat");
     } else if (JA.getType() == types::TY_LLVM_BC ||
                JA.getType() == types::TY_LTO_BC) {
       // Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S
@@ -7564,6 +7602,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
+  for (const Arg *A : Args.filtered(options::OPT_mmlir)) {
+    A->claim();
+    A->render(Args, CmdArgs);
+  }
+
   // With -save-temps, we want to save the unoptimized bitcode output from the
   // CompileJobAction, use -disable-llvm-passes to get pristine IR generated
   // by the frontend.
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index cde4a84673b6..e1b6a93611ab 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1530,6 +1530,13 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts,
   if (Opts.NewStructPathTBAA)
     GenerateArg(Consumer, OPT_new_struct_path_tbaa);
 
+  if (Opts.ClangIRBuildDeferredThreshold)
+    GenerateArg(Consumer, OPT_fclangir_disable_deferred_EQ,
+                Twine(Opts.ClangIRBuildDeferredThreshold));
+
+  if (Opts.ClangIRSkipFunctionsFromSystemHeaders)
+    GenerateArg(Consumer, OPT_fclangir_skip_system_headers);
+
   if (Opts.OptimizeSize == 1)
     GenerateArg(Consumer, OPT_O, "s");
   else if (Opts.OptimizeSize == 2)
@@ -2553,6 +2560,9 @@ static const auto &getFrontendActionTable() {
       {frontend::EmitAssembly, OPT_S},
       {frontend::EmitBC, OPT_emit_llvm_bc},
       {frontend::EmitCIR, OPT_emit_cir},
+      {frontend::EmitCIRFlat, OPT_emit_cir_flat},
+      {frontend::EmitCIROnly, OPT_emit_cir_only},
+      {frontend::EmitMLIR, OPT_emit_mlir},
       {frontend::EmitHTML, OPT_emit_html},
       {frontend::EmitLLVM, OPT_emit_llvm},
       {frontend::EmitLLVMOnly, OPT_emit_llvm_only},
@@ -2696,6 +2706,17 @@ static void GenerateFrontendArgs(const FrontendOptions &Opts,
   for (const auto &ModuleFile : Opts.ModuleFiles)
     GenerateArg(Consumer, OPT_fmodule_file, ModuleFile);
 
+  if (Opts.ClangIRLifetimeCheck)
+    GenerateArg(Consumer, OPT_fclangir_lifetime_check_EQ,
+                Opts.ClangIRLifetimeCheckOpts);
+
+  if (Opts.ClangIRIdiomRecognizer)
+    GenerateArg(Consumer, OPT_fclangir_idiom_recognizer_EQ,
+                Opts.ClangIRIdiomRecognizerOpts);
+
+  if (Opts.ClangIRLibOpt)
+    GenerateArg(Consumer, OPT_fclangir_lib_opt_EQ, Opts.ClangIRLibOptOpts);
+
   if (Opts.AuxTargetCPU)
     GenerateArg(Consumer, OPT_aux_target_cpu, *Opts.AuxTargetCPU);
 
@@ -2919,9 +2940,52 @@ static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
   if (Opts.ProgramAction != frontend::GenerateModule && Opts.IsSystemModule)
     Diags.Report(diag::err_drv_argument_only_allowed_with) << "-fsystem-module"
                                                            << "-emit-module";
-  if (Args.hasArg(OPT_fclangir) || Args.hasArg(OPT_emit_cir))
+  if (Args.hasArg(OPT_fclangir) || Args.hasArg(OPT_emit_cir) ||
+      Args.hasArg(OPT_emit_cir_flat))
     Opts.UseClangIRPipeline = true;
 
+  if (Args.hasArg(OPT_fclangir_direct_lowering))
+    Opts.ClangIRDirectLowering = true;
+
+  if (Args.hasArg(OPT_clangir_disable_passes))
+    Opts.ClangIRDisablePasses = true;
+
+  if (Args.hasArg(OPT_clangir_disable_verifier))
+    Opts.ClangIRDisableCIRVerifier = true;
+
+  if (Args.hasArg(OPT_clangir_disable_emit_cxx_default))
+    Opts.ClangIRDisableEmitCXXDefault = true;
+
+  if (Args.hasArg(OPT_clangir_verify_diagnostics))
+    Opts.ClangIRVerifyDiags = true;
+
+  if (Args.hasArg(OPT_fclangir_call_conv_lowering))
+    Opts.ClangIREnableCallConvLowering = true;
+
+  if (Args.hasArg(OPT_fclangir_analysis_only))
+    Opts.ClangIRAnalysisOnly = true;
+
+  if (const Arg *A = Args.getLastArg(OPT_fclangir_lifetime_check,
+                                     OPT_fclangir_lifetime_check_EQ)) {
+    Opts.ClangIRLifetimeCheck = true;
+    Opts.ClangIRLifetimeCheckOpts = A->getValue();
+  }
+
+  if (const Arg *A = Args.getLastArg(OPT_fclangir_idiom_recognizer,
+                                     OPT_fclangir_idiom_recognizer_EQ)) {
+    Opts.ClangIRIdiomRecognizer = true;
+    Opts.ClangIRIdiomRecognizerOpts = A->getValue();
+  }
+
+  if (const Arg *A =
+          Args.getLastArg(OPT_fclangir_lib_opt, OPT_fclangir_lib_opt_EQ)) {
+    Opts.ClangIRLibOpt = true;
+    Opts.ClangIRLibOptOpts = A->getValue();
+  }
+
+  if (Args.hasArg(OPT_fclangir_mem2reg))
+    Opts.ClangIREnableMem2Reg = true;
+
   if (Args.hasArg(OPT_aux_target_cpu))
     Opts.AuxTargetCPU = std::string(Args.getLastArgValue(OPT_aux_target_cpu));
   if (Args.hasArg(OPT_aux_target_feature))
@@ -4388,6 +4452,9 @@ static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) {
   case frontend::EmitAssembly:
   case frontend::EmitBC:
   case frontend::EmitCIR:
+  case frontend::EmitCIRFlat:
+  case frontend::EmitCIROnly:
+  case frontend::EmitMLIR:
   case frontend::EmitHTML:
   case frontend::EmitLLVM:
   case frontend::EmitLLVMOnly:
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index a9c45e525c69..664c9ed0dffc 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -779,6 +779,26 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     return true;
   }
 
+  // TODO: blindly duplicating for now
+  if (Input.getKind().getLanguage() == Language::CIR) {
+    assert(hasCIRSupport() && "This action does not have CIR file support!");
+
+    // Inform the diagnostic client we are processing a source file.
+    CI.getDiagnosticClient().BeginSourceFile(CI.getLangOpts(), nullptr);
+    HasBegunSourceFile = true;
+
+    // Initialize the action.
+    if (!BeginSourceFileAction(CI))
+      return false;
+
+    // Initialize the main file entry.
+    if (!CI.InitializeSourceManager(CurrentInput))
+      return false;
+
+    FailureCleanup.release();
+    return true;
+  }
+
   // If the implicit PCH include is actually a directory, rather than
   // a single file, search for a suitable PCH file in that directory.
   if (!CI.getPreprocessorOpts().ImplicitPCHInclude.empty()) {
diff --git a/clang/lib/FrontendTool/CMakeLists.txt b/clang/lib/FrontendTool/CMakeLists.txt
index 51c379ade270..6dae1455010c 100644
--- a/clang/lib/FrontendTool/CMakeLists.txt
+++ b/clang/lib/FrontendTool/CMakeLists.txt
@@ -12,6 +12,24 @@ set(link_libs
   clangRewriteFrontend
   )
 
+set(deps)
+
+if(CLANG_ENABLE_CIR)
+  list(APPEND link_libs
+    clangCIRFrontendAction
+    MLIRCIRTransforms
+    MLIRIR
+    MLIRPass
+    )
+  list(APPEND deps
+    MLIRBuiltinLocationAttributesIncGen
+    MLIRBuiltinTypeInterfacesIncGen
+    )
+
+  include_directories(${LLVM_MAIN_SRC_DIR}/../mlir/include)
+  include_directories(${CMAKE_BINARY_DIR}/tools/mlir/include)
+endif()
+
 if(CLANG_ENABLE_ARCMT)
   list(APPEND link_libs
     clangARCMigrate
@@ -29,6 +47,7 @@ add_clang_library(clangFrontendTool
 
   DEPENDS
   ClangDriverOptions
+  ${deps}
 
   LINK_LIBS
   ${link_libs}
diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
index 7476b1076d10..76eca4e44cca 100644
--- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -31,6 +31,15 @@
 #include "llvm/Support/BuryPointer.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/ErrorHandling.h"
+
+#if CLANG_ENABLE_CIR
+#include "mlir/IR/AsmState.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/Pass/PassManager.h"
+#include "clang/CIR/Dialect/Passes.h"
+#include "clang/CIRFrontendAction/CIRGenAction.h"
+#endif
+
 using namespace clang;
 using namespace llvm::opt;
 
@@ -42,6 +51,25 @@ CreateFrontendBaseAction(CompilerInstance &CI) {
   StringRef Action("unknown");
   (void)Action;
 
+  auto UseCIR = CI.getFrontendOpts().UseClangIRPipeline;
+  auto Act = CI.getFrontendOpts().ProgramAction;
+  auto CIRAnalysisOnly = CI.getFrontendOpts().ClangIRAnalysisOnly;
+  auto EmitsCIR = Act == EmitCIR || Act == EmitCIRFlat || Act == EmitCIROnly;
+
+#if !CLANG_ENABLE_CIR
+  if (UseCIR)
+    llvm::report_fatal_error(
+        "CIR is not supported by this build of Clang");
+#endif
+
+  if (!UseCIR && EmitsCIR)
+    llvm::report_fatal_error(
+        "-emit-cir and -emit-cir-only only valid when using -fclangir");
+
+  if (CI.getFrontendOpts().ClangIRDirectLowering && Act == EmitMLIR)
+    llvm::report_fatal_error(
+        "ClangIR direct lowering is incompatible with -emit-mlir");
+
   switch (CI.getFrontendOpts().ProgramAction) {
   case ASTDeclList:            return std::make_unique<ASTDeclListAction>();
   case ASTDump:                return std::make_unique<ASTDumpAction>();
@@ -51,15 +79,68 @@ CreateFrontendBaseAction(CompilerInstance &CI) {
     return std::make_unique<DumpCompilerOptionsAction>();
   case DumpRawTokens:          return std::make_unique<DumpRawTokensAction>();
   case DumpTokens:             return std::make_unique<DumpTokensAction>();
-  case EmitAssembly:           return std::make_unique<EmitAssemblyAction>();
-  case EmitBC:                 return std::make_unique<EmitBCAction>();
+  case EmitAssembly:
+#if CLANG_ENABLE_CIR
+    if (UseCIR)
+      return std::make_unique<::cir::EmitAssemblyAction>();
+    if (CIRAnalysisOnly)
+      return std::make_unique<::cir::AnalysisOnlyAndEmitAssemblyAction>();
+#endif
+    return std::make_unique<EmitAssemblyAction>();
+  case EmitBC: {
+#if CLANG_ENABLE_CIR
+    if (UseCIR)
+      return std::make_unique<::cir::EmitBCAction>();
+    if (CIRAnalysisOnly)
+      return std::make_unique<::cir::AnalysisOnlyAndEmitBCAction>();
+#endif
+    return std::make_unique<EmitBCAction>();
+  }
+#if CLANG_ENABLE_CIR
+  case EmitCIR:                return std::make_unique<::cir::EmitCIRAction>();
+  case EmitCIRFlat:
+    return std::make_unique<::cir::EmitCIRFlatAction>();
+  case EmitCIROnly:            return std::make_unique<::cir::EmitCIROnlyAction>();
+  case EmitMLIR:               return std::make_unique<::cir::EmitMLIRAction>();
+#else
   case EmitCIR:
+  case EmitCIRFlat:
+  case EmitCIROnly:
     llvm_unreachable("CIR suppport not built into clang");
+#endif
   case EmitHTML:               return std::make_unique<HTMLPrintAction>();
-  case EmitLLVM:               return std::make_unique<EmitLLVMAction>();
-  case EmitLLVMOnly:           return std::make_unique<EmitLLVMOnlyAction>();
-  case EmitCodeGenOnly:        return std::make_unique<EmitCodeGenOnlyAction>();
-  case EmitObj:                return std::make_unique<EmitObjAction>();
+  case EmitLLVM: {
+#if CLANG_ENABLE_CIR
+    if (UseCIR)
+      return std::make_unique<::cir::EmitLLVMAction>();
+    if (CIRAnalysisOnly)
+      return std::make_unique<::cir::AnalysisOnlyAndEmitLLVMAction>();
+#endif
+    return std::make_unique<EmitLLVMAction>();
+  }
+  case EmitLLVMOnly: {
+#if CLANG_ENABLE_CIR
+    if (CIRAnalysisOnly)
+      return std::make_unique<::cir::AnalysisOnlyAndEmitLLVMOnlyAction>();
+#endif
+    return std::make_unique<EmitLLVMOnlyAction>();
+  }
+  case EmitCodeGenOnly: {
+#if CLANG_ENABLE_CIR
+    if (CIRAnalysisOnly)
+      return std::make_unique<::cir::AnalysisOnlyAndEmitLLVMOnlyAction>();
+#endif
+    return std::make_unique<EmitCodeGenOnlyAction>();
+  }
+  case EmitObj: {
+#if CLANG_ENABLE_CIR
+    if (UseCIR)
+      return std::make_unique<::cir::EmitObjAction>();
+    if (CIRAnalysisOnly)
+      return std::make_unique<::cir::AnalysisOnlyAndEmitObjAction>();
+#endif
+    return std::make_unique<EmitObjAction>();
+  }
   case ExtractAPI:
     return std::make_unique<ExtractAPIAction>();
   case FixIt:                  return std::make_unique<FixItAction>();
@@ -269,7 +350,21 @@ bool ExecuteCompilerInvocation(CompilerInstance *Clang) {
     return true;
   }
 #endif
-
+#if CLANG_ENABLE_CIR
+  if (!Clang->getFrontendOpts().MLIRArgs.empty()) {
+    mlir::registerCIRPasses();
+    mlir::registerMLIRContextCLOptions();
+    mlir::registerPassManagerCLOptions();
+    mlir::registerAsmPrinterCLOptions();
+    unsigned NumArgs = Clang->getFrontendOpts().MLIRArgs.size();
+    auto Args = std::make_unique<const char *[]>(NumArgs + 2);
+    Args[0] = "clang (MLIR option parsing)";
+    for (unsigned i = 0; i != NumArgs; ++i)
+      Args[i + 1] = Clang->getFrontendOpts().MLIRArgs[i].c_str();
+    Args[NumArgs + 1] = nullptr;
+    llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get());
+  }
+#endif
   // If there were errors in processing arguments, don't do anything else.
   if (Clang->getDiagnostics().hasErrorOccurred())
     return false;
diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index f152d243d39a..bb61d9d77a39 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -13,6 +13,14 @@ clang_tablegen(OpenCLBuiltins.inc -gen-clang-opencl-builtins
   TARGET ClangOpenCLBuiltinsImpl
   )
 
+if(CLANG_ENABLE_CIR)
+  set(CIR_DEPS
+    MLIRCIROpsIncGen
+    MLIRCIR
+  )
+endif()
+
+
 add_clang_library(clangSema
   AnalysisBasedWarnings.cpp
   CodeCompleteConsumer.cpp
@@ -93,6 +101,7 @@ add_clang_library(clangSema
   ClangOpenCLBuiltinsImpl
   omp_gen
   ClangDriverOptions
+  ${CIR_DEPS}
 
   LINK_LIBS
   clangAPINotes
diff --git a/clang/test/CIR/CodeGen/Inputs/std-compare.h b/clang/test/CIR/CodeGen/Inputs/std-compare.h
new file mode 100644
index 000000000000..f7f0c9b06db6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/Inputs/std-compare.h
@@ -0,0 +1,324 @@
+#ifndef STD_COMPARE_H
+#define STD_COMPARE_H
+
+namespace std {
+inline namespace __1 {
+
+#ifdef NON_CANONICAL_CMP_RESULTS
+
+// exposition only
+enum class _EqResult : unsigned char {
+  __equal = 2,
+  __equiv = __equal,
+};
+
+enum class _OrdResult : signed char {
+  __less = 1,
+  __greater = 3
+};
+
+#else
+
+// exposition only
+enum class _EqResult : unsigned char {
+  __equal = 0,
+  __equiv = __equal,
+};
+
+enum class _OrdResult : signed char {
+  __less = -1,
+  __greater = 1
+};
+
+#endif
+
+enum class _NCmpResult : signed char {
+  __unordered = -127
+};
+
+struct _CmpUnspecifiedType;
+using _CmpUnspecifiedParam = void (_CmpUnspecifiedType::*)();
+
+class partial_ordering {
+  using _ValueT = signed char;
+  explicit constexpr partial_ordering(_EqResult __v) noexcept
+      : __value_(_ValueT(__v)) {}
+  explicit constexpr partial_ordering(_OrdResult __v) noexcept
+      : __value_(_ValueT(__v)) {}
+  explicit constexpr partial_ordering(_NCmpResult __v) noexcept
+      : __value_(_ValueT(__v)) {}
+
+  constexpr bool __is_ordered() const noexcept {
+    return __value_ != _ValueT(_NCmpResult::__unordered);
+  }
+
+public:
+  // valid values
+  static const partial_ordering less;
+  static const partial_ordering equivalent;
+  static const partial_ordering greater;
+  static const partial_ordering unordered;
+
+  // comparisons
+  friend constexpr bool operator==(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator!=(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator<(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator<=(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator>(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator>=(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator==(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+  friend constexpr bool operator!=(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+  friend constexpr bool operator<(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+  friend constexpr bool operator<=(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+  friend constexpr bool operator>(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+  friend constexpr bool operator>=(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+
+  friend constexpr partial_ordering operator<=>(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr partial_ordering operator<=>(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+
+  // test helper
+  constexpr bool test_eq(partial_ordering const &other) const noexcept {
+    return __value_ == other.__value_;
+  }
+
+private:
+  _ValueT __value_;
+};
+
+inline constexpr partial_ordering partial_ordering::less(_OrdResult::__less);
+inline constexpr partial_ordering partial_ordering::equivalent(_EqResult::__equiv);
+inline constexpr partial_ordering partial_ordering::greater(_OrdResult::__greater);
+inline constexpr partial_ordering partial_ordering::unordered(_NCmpResult ::__unordered);
+constexpr bool operator==(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__is_ordered() && __v.__value_ == 0;
+}
+constexpr bool operator<(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__is_ordered() && __v.__value_ < 0;
+}
+constexpr bool operator<=(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__is_ordered() && __v.__value_ <= 0;
+}
+constexpr bool operator>(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__is_ordered() && __v.__value_ > 0;
+}
+constexpr bool operator>=(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__is_ordered() && __v.__value_ >= 0;
+}
+constexpr bool operator==(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return __v.__is_ordered() && 0 == __v.__value_;
+}
+constexpr bool operator<(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return __v.__is_ordered() && 0 < __v.__value_;
+}
+constexpr bool operator<=(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return __v.__is_ordered() && 0 <= __v.__value_;
+}
+constexpr bool operator>(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return __v.__is_ordered() && 0 > __v.__value_;
+}
+constexpr bool operator>=(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return __v.__is_ordered() && 0 >= __v.__value_;
+}
+constexpr bool operator!=(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return !__v.__is_ordered() || __v.__value_ != 0;
+}
+constexpr bool operator!=(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return !__v.__is_ordered() || __v.__value_ != 0;
+}
+
+constexpr partial_ordering operator<=>(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v;
+}
+constexpr partial_ordering operator<=>(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return __v < 0 ? partial_ordering::greater : (__v > 0 ? partial_ordering::less : __v);
+}
+
+class weak_ordering {
+  using _ValueT = signed char;
+  explicit constexpr weak_ordering(_EqResult __v) noexcept : __value_(_ValueT(__v)) {}
+  explicit constexpr weak_ordering(_OrdResult __v) noexcept : __value_(_ValueT(__v)) {}
+
+public:
+  static const weak_ordering less;
+  static const weak_ordering equivalent;
+  static const weak_ordering greater;
+
+  // conversions
+  constexpr operator partial_ordering() const noexcept {
+    return __value_ == 0 ? partial_ordering::equivalent
+                         : (__value_ < 0 ? partial_ordering::less : partial_ordering::greater);
+  }
+
+  // comparisons
+  friend constexpr bool operator==(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator!=(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator<(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator<=(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator>(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator>=(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator==(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+  friend constexpr bool operator!=(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+  friend constexpr bool operator<(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+  friend constexpr bool operator<=(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+  friend constexpr bool operator>(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+  friend constexpr bool operator>=(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+
+  friend constexpr weak_ordering operator<=>(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr weak_ordering operator<=>(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+
+  // test helper
+  constexpr bool test_eq(weak_ordering const &other) const noexcept {
+    return __value_ == other.__value_;
+  }
+
+private:
+  _ValueT __value_;
+};
+
+inline constexpr weak_ordering weak_ordering::less(_OrdResult::__less);
+inline constexpr weak_ordering weak_ordering::equivalent(_EqResult::__equiv);
+inline constexpr weak_ordering weak_ordering::greater(_OrdResult::__greater);
+constexpr bool operator==(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ == 0;
+}
+constexpr bool operator!=(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ != 0;
+}
+constexpr bool operator<(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ < 0;
+}
+constexpr bool operator<=(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ <= 0;
+}
+constexpr bool operator>(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ > 0;
+}
+constexpr bool operator>=(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ >= 0;
+}
+constexpr bool operator==(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return 0 == __v.__value_;
+}
+constexpr bool operator!=(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return 0 != __v.__value_;
+}
+constexpr bool operator<(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return 0 < __v.__value_;
+}
+constexpr bool operator<=(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return 0 <= __v.__value_;
+}
+constexpr bool operator>(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return 0 > __v.__value_;
+}
+constexpr bool operator>=(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return 0 >= __v.__value_;
+}
+
+constexpr weak_ordering operator<=>(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v;
+}
+constexpr weak_ordering operator<=>(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return __v < 0 ? weak_ordering::greater : (__v > 0 ? weak_ordering::less : __v);
+}
+
+class strong_ordering {
+  using _ValueT = signed char;
+  explicit constexpr strong_ordering(_EqResult __v) noexcept : __value_(static_cast<signed char>(__v)) {}
+  explicit constexpr strong_ordering(_OrdResult __v) noexcept : __value_(static_cast<signed char>(__v)) {}
+
+public:
+  static const strong_ordering less;
+  static const strong_ordering equal;
+  static const strong_ordering equivalent;
+  static const strong_ordering greater;
+
+  // conversions
+  constexpr operator partial_ordering() const noexcept {
+    return __value_ == 0 ? partial_ordering::equivalent
+                         : (__value_ < 0 ? partial_ordering::less : partial_ordering::greater);
+  }
+  constexpr operator weak_ordering() const noexcept {
+    return __value_ == 0 ? weak_ordering::equivalent
+                         : (__value_ < 0 ? weak_ordering::less : weak_ordering::greater);
+  }
+
+  // comparisons
+  friend constexpr bool operator==(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator!=(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator<(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator<=(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator>(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator>=(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator==(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+  friend constexpr bool operator!=(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+  friend constexpr bool operator<(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+  friend constexpr bool operator<=(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+  friend constexpr bool operator>(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+  friend constexpr bool operator>=(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+
+  friend constexpr strong_ordering operator<=>(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr strong_ordering operator<=>(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+
+  // test helper
+  constexpr bool test_eq(strong_ordering const &other) const noexcept {
+    return __value_ == other.__value_;
+  }
+
+private:
+  _ValueT __value_;
+};
+
+inline constexpr strong_ordering strong_ordering::less(_OrdResult::__less);
+inline constexpr strong_ordering strong_ordering::equal(_EqResult::__equal);
+inline constexpr strong_ordering strong_ordering::equivalent(_EqResult::__equiv);
+inline constexpr strong_ordering strong_ordering::greater(_OrdResult::__greater);
+
+constexpr bool operator==(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ == 0;
+}
+constexpr bool operator!=(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ != 0;
+}
+constexpr bool operator<(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ < 0;
+}
+constexpr bool operator<=(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ <= 0;
+}
+constexpr bool operator>(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ > 0;
+}
+constexpr bool operator>=(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ >= 0;
+}
+constexpr bool operator==(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return 0 == __v.__value_;
+}
+constexpr bool operator!=(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return 0 != __v.__value_;
+}
+constexpr bool operator<(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return 0 < __v.__value_;
+}
+constexpr bool operator<=(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return 0 <= __v.__value_;
+}
+constexpr bool operator>(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return 0 > __v.__value_;
+}
+constexpr bool operator>=(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return 0 >= __v.__value_;
+}
+
+constexpr strong_ordering operator<=>(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v;
+}
+constexpr strong_ordering operator<=>(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return __v < 0 ? strong_ordering::greater : (__v > 0 ? strong_ordering::less : __v);
+}
+
+} // namespace __1
+} // end namespace std
+
+#endif // STD_COMPARE_H
diff --git a/clang/test/CIR/CodeGen/OpenCL/addrspace-alloca.cl b/clang/test/CIR/CodeGen/OpenCL/addrspace-alloca.cl
new file mode 100644
index 000000000000..c64b5015f369
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/addrspace-alloca.cl
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-llvm -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+
+// CIR: cir.func @func(%arg0: !cir.ptr<!s32i, addrspace(offload_local)>
+// LLVM: @func(ptr addrspace(3)
+kernel void func(local int *p) {
+  // CIR-NEXT: %[[#ALLOCA_P:]] = cir.alloca !cir.ptr<!s32i, addrspace(offload_local)>, !cir.ptr<!cir.ptr<!s32i, addrspace(offload_local)>, addrspace(offload_private)>, ["p", init] {alignment = 8 : i64}
+  // LLVM-NEXT: %[[#ALLOCA_P:]] = alloca ptr addrspace(3), i64 1, align 8
+
+  int x;
+  // CIR-NEXT: %[[#ALLOCA_X:]] = cir.alloca !s32i, !cir.ptr<!s32i, addrspace(offload_private)>, ["x"] {alignment = 4 : i64}
+  // LLVM-NEXT: %[[#ALLOCA_X:]] = alloca i32, i64 1, align 4
+
+  global char *b;
+  // CIR-NEXT: %[[#ALLOCA_B:]] = cir.alloca !cir.ptr<!s8i, addrspace(offload_global)>, !cir.ptr<!cir.ptr<!s8i, addrspace(offload_global)>, addrspace(offload_private)>, ["b"] {alignment = 8 : i64}
+  // LLVM-NEXT: %[[#ALLOCA_B:]] = alloca ptr addrspace(1), i64 1, align 8
+
+  private int *ptr;
+  // CIR-NEXT: %[[#ALLOCA_PTR:]] = cir.alloca !cir.ptr<!s32i, addrspace(offload_private)>, !cir.ptr<!cir.ptr<!s32i, addrspace(offload_private)>, addrspace(offload_private)>, ["ptr"] {alignment = 8 : i64}
+  // LLVM-NEXT: %[[#ALLOCA_PTR:]] = alloca ptr, i64 1, align 8
+
+  // Store of the argument `p`
+  // CIR-NEXT: cir.store %arg0, %[[#ALLOCA_P]] : !cir.ptr<!s32i, addrspace(offload_local)>, !cir.ptr<!cir.ptr<!s32i, addrspace(offload_local)>, addrspace(offload_private)>
+  // LLVM-NEXT: store ptr addrspace(3) %{{[0-9]+}}, ptr %[[#ALLOCA_P]], align 8
+
+  ptr = &x;
+  // CIR-NEXT: cir.store %[[#ALLOCA_X]], %[[#ALLOCA_PTR]] : !cir.ptr<!s32i, addrspace(offload_private)>, !cir.ptr<!cir.ptr<!s32i, addrspace(offload_private)>, addrspace(offload_private)>
+  // LLVM-NEXT: store ptr %[[#ALLOCA_X]], ptr %[[#ALLOCA_PTR]]
+
+  return;
+}
diff --git a/clang/test/CIR/CodeGen/OpenCL/array-decay.cl b/clang/test/CIR/CodeGen/OpenCL/array-decay.cl
new file mode 100644
index 000000000000..d81e425729a6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/array-decay.cl
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-llvm -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+// CIR: @func1
+// LLVM: @func1
+kernel void func1(global int *data) {
+    local int arr[32];
+
+    local int *ptr = arr;
+    // CIR:      cir.cast(array_to_ptrdecay, %{{[0-9]+}} : !cir.ptr<!cir.array<!s32i x 32>, addrspace(offload_local)>), !cir.ptr<!s32i, addrspace(offload_local)>
+    // CIR-NEXT: cir.store %{{[0-9]+}}, %{{[0-9]+}} : !cir.ptr<!s32i, addrspace(offload_local)>, !cir.ptr<!cir.ptr<!s32i, addrspace(offload_local)>, addrspace(offload_private)>
+
+    // LLVM: store ptr addrspace(3) @func1.arr, ptr %{{[0-9]+}}
+}
+
+// CIR: @func2
+// LLVM: @func2
+kernel void func2(global int *data) {
+    private int arr[32] = {data[2]};
+    // CIR: %{{[0-9]+}} = cir.cast(array_to_ptrdecay, %{{[0-9]+}} : !cir.ptr<!cir.array<!s32i x 32>, addrspace(offload_private)>), !cir.ptr<!s32i, addrspace(offload_private)>
+
+    // LLVM: %{{[0-9]+}} = getelementptr i32, ptr %3, i32 0
+}
diff --git a/clang/test/CIR/CodeGen/OpenCL/convergent.cl b/clang/test/CIR/CodeGen/OpenCL/convergent.cl
new file mode 100644
index 000000000000..d953aa799307
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/convergent.cl
@@ -0,0 +1,107 @@
+// RUN: %clang_cc1 -fclangir -triple spirv64-unknown-unknown -emit-cir %s -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 -fclangir -triple spirv64-unknown-unknown -emit-llvm %s -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+// In ClangIR for OpenCL, all functions should be marked convergent.
+// In LLVM IR, it is initially assumed convergent, but can be deduced to not require it.
+
+// CIR:      #fn_attr[[CONV_NOINLINE_ATTR:[0-9]*]] = #cir<extra({convergent = #cir.convergent, inline = #cir.inline<no>
+// CIR-NEXT: #fn_attr[[CONV_DECL_ATTR:[0-9]*]] = #cir<extra({convergent = #cir.convergent
+// CIR-NEXT: #fn_attr[[CONV_NOTHROW_ATTR:[0-9]*]] = #cir<extra({convergent = #cir.convergent, nothrow = #cir.nothrow
+
+__attribute__((noinline))
+void non_convfun(void) {
+  volatile int* p;
+  *p = 0;
+}
+// CIR: cir.func @non_convfun(){{.*}} extra(#fn_attr[[CONV_NOINLINE_ATTR]])
+// LLVM: define{{.*}} spir_func void @non_convfun() local_unnamed_addr #[[NON_CONV_ATTR:[0-9]+]]
+// LLVM: ret void
+
+// External functions should be assumed convergent.
+void f(void);
+// CIR: cir.func{{.+}} @f(){{.*}} extra(#fn_attr[[CONV_DECL_ATTR]])
+// LLVM: declare {{.+}} spir_func void @f() local_unnamed_addr #[[CONV_ATTR:[0-9]+]]
+void g(void);
+// CIR: cir.func{{.+}} @g(){{.*}} extra(#fn_attr[[CONV_DECL_ATTR]])
+// LLVM: declare {{.+}} spir_func void @g() local_unnamed_addr #[[CONV_ATTR]]
+
+// Test two if's are merged and non_convfun duplicated.
+void test_merge_if(int a) {
+  if (a) {
+    f();
+  }
+  non_convfun();
+  if (a) {
+    g();
+  }
+}
+// CIR: cir.func @test_merge_if{{.*}} extra(#fn_attr[[CONV_NOTHROW_ATTR]])
+
+// The LLVM IR below is equivalent to:
+//    if (a) {
+//      f();
+//      non_convfun();
+//      g();
+//    } else {
+//      non_convfun();
+//    }
+
+// LLVM-LABEL: define{{.*}} spir_func void @test_merge_if
+// LLVM:         %[[tobool:.+]] = icmp eq i32 %[[ARG:.+]], 0
+// LLVM:         br i1 %[[tobool]], label %[[if_end3_critedge:[^,]+]], label %[[if_then:[^,]+]]
+
+// LLVM:       [[if_end3_critedge]]:
+// LLVM:         tail call spir_func void @non_convfun()
+// LLVM:         br label %[[if_end3:[^,]+]]
+
+// LLVM:       [[if_then]]:
+// LLVM:         tail call spir_func void @f()
+// LLVM:         tail call spir_func void @non_convfun()
+// LLVM:         tail call spir_func void @g()
+
+// LLVM:         br label %[[if_end3]]
+
+// LLVM:       [[if_end3]]:
+// LLVM:         ret void
+
+
+void convfun(void) __attribute__((convergent));
+// CIR: cir.func{{.+}} @convfun(){{.*}} extra(#fn_attr[[CONV_DECL_ATTR]])
+// LLVM: declare {{.+}} spir_func void @convfun() local_unnamed_addr #[[CONV_ATTR]]
+
+// Test two if's are not merged.
+void test_no_merge_if(int a) {
+  if (a) {
+    f();
+  }
+  convfun();
+  if(a) {
+    g();
+  }
+}
+// CIR: cir.func @test_no_merge_if{{.*}} extra(#fn_attr[[CONV_NOTHROW_ATTR]])
+
+// LLVM-LABEL: define{{.*}} spir_func void @test_no_merge_if
+// LLVM:         %[[tobool:.+]] = icmp eq i32 %[[ARG:.+]], 0
+// LLVM:         br i1 %[[tobool]], label %[[if_end:[^,]+]], label %[[if_then:[^,]+]]
+// LLVM:       [[if_then]]:
+// LLVM:         tail call spir_func void @f()
+// LLVM-NOT:     call spir_func void @convfun()
+// LLVM-NOT:     call spir_func void @g()
+// LLVM:         br label %[[if_end]]
+// LLVM:       [[if_end]]:
+// LLVM-NOT:     phi i1
+// LLVM:         tail call spir_func void @convfun()
+// LLVM:         br i1 %[[tobool]], label %[[if_end3:[^,]+]], label %[[if_then2:[^,]+]]
+// LLVM:       [[if_then2]]:
+// LLVM:         tail call spir_func void @g()
+// LLVM:         br label %[[if_end3:[^,]+]]
+// LLVM:       [[if_end3]]:
+// LLVM:         ret void
+
+
+// LLVM attribute definitions.
+// LLVM-NOT: attributes #[[NON_CONV_ATTR]] = { {{.*}}convergent{{.*}} }
+// LLVM:     attributes #[[CONV_ATTR]] = { {{.*}}convergent{{.*}} }
diff --git a/clang/test/CIR/CodeGen/OpenCL/global.cl b/clang/test/CIR/CodeGen/OpenCL/global.cl
new file mode 100644
index 000000000000..cab7378fd102
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/global.cl
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-llvm -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+global int a = 13;
+// CIR-DAG: cir.global external addrspace(offload_global) @a = #cir.int<13> : !s32i
+// LLVM-DAG: @a = addrspace(1) global i32 13
+
+global int b = 15;
+// CIR-DAG: cir.global external addrspace(offload_global) @b = #cir.int<15> : !s32i
+// LLVM-DAG: @b = addrspace(1) global i32 15
+
+kernel void test_get_global() {
+  a = b;
+  // CIR:      %[[#ADDRB:]] = cir.get_global @b : !cir.ptr<!s32i, addrspace(offload_global)>
+  // CIR-NEXT: %[[#LOADB:]] = cir.load %[[#ADDRB]] : !cir.ptr<!s32i, addrspace(offload_global)>, !s32i
+  // CIR-NEXT: %[[#ADDRA:]] = cir.get_global @a : !cir.ptr<!s32i, addrspace(offload_global)>
+  // CIR-NEXT: cir.store %[[#LOADB]], %[[#ADDRA]] : !s32i, !cir.ptr<!s32i, addrspace(offload_global)>
+
+  // LLVM:      %[[#LOADB:]] = load i32, ptr addrspace(1) @b, align 4
+  // LLVM-NEXT: store i32 %[[#LOADB]], ptr addrspace(1) @a, align 4
+}
diff --git a/clang/test/CIR/CodeGen/OpenCL/kernel-arg-info-single-as.cl b/clang/test/CIR/CodeGen/OpenCL/kernel-arg-info-single-as.cl
new file mode 100644
index 000000000000..b78ee6dddbf7
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/kernel-arg-info-single-as.cl
@@ -0,0 +1,14 @@
+// Test that the kernel argument info always refers to SPIR address spaces,
+// even if the target has only one address space like x86_64 does.
+// RUN: %clang_cc1 -fclangir %s -cl-std=CL2.0 -emit-cir -o - -triple x86_64-unknown-linux-gnu -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+
+// RUN: %clang_cc1 -fclangir %s -cl-std=CL2.0 -emit-llvm -o - -triple x86_64-unknown-linux-gnu -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+kernel void foo(__global int * G, __constant int *C, __local int *L) {
+  *G = *C + *L;
+}
+// CIR: cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32, 2 : i32, 3 : i32]
+// LLVM: !kernel_arg_addr_space ![[MD123:[0-9]+]]
+// LLVM: ![[MD123]] = !{i32 1, i32 2, i32 3}
diff --git a/clang/test/CIR/CodeGen/OpenCL/kernel-arg-info.cl b/clang/test/CIR/CodeGen/OpenCL/kernel-arg-info.cl
new file mode 100644
index 000000000000..6c7b69368974
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/kernel-arg-info.cl
@@ -0,0 +1,90 @@
+// See also clang/test/CodeGenOpenCL/kernel-arg-info.cl
+// RUN: %clang_cc1 -fclangir %s -cl-std=CL2.0 -emit-cir -o - -triple spirv64-unknown-unknown -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 -fclangir %s -cl-std=CL2.0 -emit-cir -o - -triple spirv64-unknown-unknown -cl-kernel-arg-info -o %t.arginfo.cir
+// RUN: FileCheck %s --input-file=%t.arginfo.cir --check-prefix=CIR-ARGINFO
+
+// RUN: %clang_cc1 -fclangir %s -cl-std=CL2.0 -emit-llvm -o - -triple spirv64-unknown-unknown -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+// RUN: %clang_cc1 -fclangir %s -cl-std=CL2.0 -emit-llvm -o - -triple spirv64-unknown-unknown -cl-kernel-arg-info -o %t.arginfo.ll
+// RUN: FileCheck %s --input-file=%t.arginfo.ll --check-prefix=LLVM-ARGINFO
+
+kernel void foo(global int * globalintp, global int * restrict globalintrestrictp,
+                global const int * globalconstintp,
+                global const int * restrict globalconstintrestrictp,
+                constant int * constantintp, constant int * restrict constantintrestrictp,
+                global const volatile int * globalconstvolatileintp,
+                global const volatile int * restrict globalconstvolatileintrestrictp,
+                global volatile int * globalvolatileintp,
+                global volatile int * restrict globalvolatileintrestrictp,
+                local int * localintp, local int * restrict localintrestrictp,
+                local const int * localconstintp,
+                local const int * restrict localconstintrestrictp,
+                local const volatile int * localconstvolatileintp,
+                local const volatile int * restrict localconstvolatileintrestrictp,
+                local volatile int * localvolatileintp,
+                local volatile int * restrict localvolatileintrestrictp,
+                int X, const int constint, const volatile int constvolatileint,
+                volatile int volatileint) {
+  *globalintrestrictp = constint + volatileint;
+}
+// CIR-DAG: #fn_attr[[KERNEL0:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32, 1 : i32, 1 : i32, 1 : i32, 2 : i32, 2 : i32, 1 : i32, 1 : i32, 1 : i32, 1 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32], access_qual = ["none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none"], type = ["int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int", "int", "int", "int"], base_type = ["int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int", "int", "int", "int"], type_qual = ["", "restrict", "const", "restrict const", "const", "restrict const", "const volatile", "restrict const volatile", "volatile", "restrict volatile", "", "restrict", "const", "restrict const", "const volatile", "restrict const volatile", "volatile", "restrict volatile", "", "", "", ""]>
+// CIR-DAG: cir.func @foo({{.+}}) extra(#fn_attr[[KERNEL0]])
+// CIR-ARGINFO-DAG: #fn_attr[[KERNEL0:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32, 1 : i32, 1 : i32, 1 : i32, 2 : i32, 2 : i32, 1 : i32, 1 : i32, 1 : i32, 1 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32], access_qual = ["none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none"], type = ["int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int", "int", "int", "int"], base_type = ["int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int", "int", "int", "int"], type_qual = ["", "restrict", "const", "restrict const", "const", "restrict const", "const volatile", "restrict const volatile", "volatile", "restrict volatile", "", "restrict", "const", "restrict const", "const volatile", "restrict const volatile", "volatile", "restrict volatile", "", "", "", ""], name = ["globalintp", "globalintrestrictp", "globalconstintp", "globalconstintrestrictp", "constantintp", "constantintrestrictp", "globalconstvolatileintp", "globalconstvolatileintrestrictp", "globalvolatileintp", "globalvolatileintrestrictp", "localintp", "localintrestrictp", "localconstintp", "localconstintrestrictp", "localconstvolatileintp", "localconstvolatileintrestrictp", "localvolatileintp", "localvolatileintrestrictp", "X", "constint", "constvolatileint", "volatileint"]>
+// CIR-ARGINFO-DAG: cir.func @foo({{.+}}) extra(#fn_attr[[KERNEL0]])
+
+// LLVM-DAG: define{{.*}} void @foo{{.+}} !kernel_arg_addr_space ![[MD11:[0-9]+]] !kernel_arg_access_qual ![[MD12:[0-9]+]] !kernel_arg_type ![[MD13:[0-9]+]] !kernel_arg_base_type ![[MD13]] !kernel_arg_type_qual ![[MD14:[0-9]+]] {
+// LLVM-ARGINFO-DAG: define{{.*}} void @foo{{.+}} !kernel_arg_addr_space ![[MD11:[0-9]+]] !kernel_arg_access_qual ![[MD12:[0-9]+]] !kernel_arg_type ![[MD13:[0-9]+]] !kernel_arg_base_type ![[MD13]] !kernel_arg_type_qual ![[MD14:[0-9]+]] !kernel_arg_name ![[MD15:[0-9]+]] {
+
+// LLVM-DAG: ![[MD11]] = !{i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 1, i32 1, i32 1, i32 1, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 0, i32 0, i32 0, i32 0}
+// LLVM-DAG: ![[MD12]] = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"}
+// LLVM-DAG: ![[MD13]] = !{!"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int", !"int", !"int", !"int"}
+// LLVM-DAG: ![[MD14]] = !{!"", !"restrict", !"const", !"restrict const", !"const", !"restrict const", !"const volatile", !"restrict const volatile", !"volatile", !"restrict volatile", !"", !"restrict", !"const", !"restrict const", !"const volatile", !"restrict const volatile", !"volatile", !"restrict volatile", !"", !"", !"", !""}
+// LLVM-ARGINFO-DAG: ![[MD15]] = !{!"globalintp", !"globalintrestrictp", !"globalconstintp", !"globalconstintrestrictp", !"constantintp", !"constantintrestrictp", !"globalconstvolatileintp", !"globalconstvolatileintrestrictp", !"globalvolatileintp", !"globalvolatileintrestrictp", !"localintp", !"localintrestrictp", !"localconstintp", !"localconstintrestrictp", !"localconstvolatileintp", !"localconstvolatileintrestrictp", !"localvolatileintp", !"localvolatileintrestrictp", !"X", !"constint", !"constvolatileint", !"volatileint"}
+
+typedef unsigned int myunsignedint;
+kernel void foo4(__global unsigned int * X, __global myunsignedint * Y) {
+}
+
+// CIR-DAG: #fn_attr[[KERNEL4:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32, 1 : i32], access_qual = ["none", "none"], type = ["uint*", "myunsignedint*"], base_type = ["uint*", "uint*"], type_qual = ["", ""]>
+// CIR-DAG: cir.func @foo4({{.+}}) extra(#fn_attr[[KERNEL4]])
+// CIR-ARGINFO-DAG: #fn_attr[[KERNEL4:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32, 1 : i32], access_qual = ["none", "none"], type = ["uint*", "myunsignedint*"], base_type = ["uint*", "uint*"], type_qual = ["", ""], name = ["X", "Y"]>
+// CIR-ARGINFO-DAG: cir.func @foo4({{.+}}) extra(#fn_attr[[KERNEL4]])
+
+// LLVM-DAG: define{{.*}} void @foo4{{.+}} !kernel_arg_addr_space ![[MD41:[0-9]+]] !kernel_arg_access_qual ![[MD42:[0-9]+]] !kernel_arg_type ![[MD43:[0-9]+]] !kernel_arg_base_type ![[MD44:[0-9]+]] !kernel_arg_type_qual ![[MD45:[0-9]+]] {
+// LLVM-ARGINFO-DAG: define{{.*}} void @foo4{{.+}} !kernel_arg_addr_space ![[MD41:[0-9]+]] !kernel_arg_access_qual ![[MD42:[0-9]+]] !kernel_arg_type ![[MD43:[0-9]+]] !kernel_arg_base_type ![[MD44:[0-9]+]] !kernel_arg_type_qual ![[MD45:[0-9]+]] !kernel_arg_name ![[MD46:[0-9]+]] {
+
+// LLVM-DAG: ![[MD41]] = !{i32 1, i32 1}
+// LLVM-DAG: ![[MD42]] = !{!"none", !"none"}
+// LLVM-DAG: ![[MD43]] = !{!"uint*", !"myunsignedint*"}
+// LLVM-DAG: ![[MD44]] = !{!"uint*", !"uint*"}
+// LLVM-DAG: ![[MD45]] = !{!"", !""}
+// LLVM-ARGINFO-DAG: ![[MD46]] = !{!"X", !"Y"}
+
+typedef char char16 __attribute__((ext_vector_type(16)));
+__kernel void foo6(__global char16 arg[]) {}
+
+// CIR-DAG: #fn_attr[[KERNEL6:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32], access_qual = ["none"], type = ["char16*"], base_type = ["char __attribute__((ext_vector_type(16)))*"], type_qual = [""]>
+// CIR-DAG: cir.func @foo6({{.+}}) extra(#fn_attr[[KERNEL6]])
+// CIR-ARGINFO-DAG: #fn_attr[[KERNEL6:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32], access_qual = ["none"], type = ["char16*"], base_type = ["char __attribute__((ext_vector_type(16)))*"], type_qual = [""], name = ["arg"]>
+// CIR-ARGINFO-DAG: cir.func @foo6({{.+}}) extra(#fn_attr[[KERNEL6]])
+
+// LLVM-DAG: !kernel_arg_type ![[MD61:[0-9]+]]
+// LLVM-ARGINFO-DAG: !kernel_arg_name ![[MD62:[0-9]+]]
+// LLVM-DAG: ![[MD61]] = !{!"char16*"}
+// LLVM-ARGINFO-DAG: ![[MD62]] = !{!"arg"}
+
+kernel void foo9(signed char sc1,  global const signed char* sc2) {}
+
+// CIR-DAG: #fn_attr[[KERNEL9:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [0 : i32, 1 : i32], access_qual = ["none", "none"], type = ["char", "char*"], base_type = ["char", "char*"], type_qual = ["", "const"]>
+// CIR-DAG: cir.func @foo9({{.+}}) extra(#fn_attr[[KERNEL9]])
+// CIR-ARGINFO-DAG: #fn_attr[[KERNEL9:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [0 : i32, 1 : i32], access_qual = ["none", "none"], type = ["char", "char*"], base_type = ["char", "char*"], type_qual = ["", "const"], name = ["sc1", "sc2"]>
+// CIR-ARGINFO-DAG: cir.func @foo9({{.+}}) extra(#fn_attr[[KERNEL9]])
+
+// LLVM-DAG: define{{.*}} void @foo9{{.+}} !kernel_arg_addr_space ![[SCHAR_AS_QUAL:[0-9]+]] !kernel_arg_access_qual ![[MD42]] !kernel_arg_type ![[SCHAR_TY:[0-9]+]] !kernel_arg_base_type ![[SCHAR_TY]] !kernel_arg_type_qual ![[SCHAR_QUAL:[0-9]+]] {
+// LLVM-ARGINFO-DAG: define{{.*}} void @foo9{{.+}} !kernel_arg_addr_space ![[SCHAR_AS_QUAL:[0-9]+]] !kernel_arg_access_qual ![[MD42]] !kernel_arg_type ![[SCHAR_TY:[0-9]+]] !kernel_arg_base_type ![[SCHAR_TY]] !kernel_arg_type_qual ![[SCHAR_QUAL:[0-9]+]] !kernel_arg_name ![[SCHAR_ARG_NAMES:[0-9]+]] {
+
+// LLVM-DAG: ![[SCHAR_AS_QUAL]] = !{i32 0, i32 1}
+// LLVM-DAG: ![[SCHAR_TY]] = !{!"char", !"char*"}
+// LLVM-DAG: ![[SCHAR_QUAL]] = !{!"", !"const"}
+// LLVM-ARGINFO-DAG: ![[SCHAR_ARG_NAMES]] = !{!"sc1", !"sc2"}
diff --git a/clang/test/CIR/CodeGen/OpenCL/kernel-arg-metadata.cl b/clang/test/CIR/CodeGen/OpenCL/kernel-arg-metadata.cl
new file mode 100644
index 000000000000..ccc8ce967e50
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/kernel-arg-metadata.cl
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 %s -fclangir -triple spirv64-unknown-unknown -emit-cir -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 %s -fclangir -triple spirv64-unknown-unknown -emit-llvm -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+__kernel void kernel_function() {}
+
+// CIR: #fn_attr[[ATTR:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [], access_qual = [], type = [], base_type = [], type_qual = []>{{.+}}
+// CIR: cir.func @kernel_function() cc(spir_kernel) extra(#fn_attr[[ATTR]])
+
+// LLVM: define {{.*}}spir_kernel void @kernel_function() {{[^{]+}} !kernel_arg_addr_space ![[MD:[0-9]+]] !kernel_arg_access_qual ![[MD]] !kernel_arg_type ![[MD]] !kernel_arg_base_type ![[MD]] !kernel_arg_type_qual ![[MD]] {
+// LLVM: ![[MD]] = !{}
diff --git a/clang/test/CIR/CodeGen/OpenCL/kernel-attributes.cl b/clang/test/CIR/CodeGen/OpenCL/kernel-attributes.cl
new file mode 100644
index 000000000000..8a32f1d8088d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/kernel-attributes.cl
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 -fclangir -emit-llvm -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
+
+
+kernel  __attribute__((vec_type_hint(int))) __attribute__((reqd_work_group_size(1,2,4))) void kernel1(int a) {}
+
+// CIR-DAG: #fn_attr[[KERNEL1:[0-9]*]] = {{.+}}cl.kernel_metadata = #cir.cl.kernel_metadata<reqd_work_group_size = [1 : i32, 2 : i32, 4 : i32], vec_type_hint = !s32i, vec_type_hint_signedness = 1>{{.+}}
+// CIR-DAG: cir.func @kernel1{{.+}} extra(#fn_attr[[KERNEL1]])
+
+// LLVM-DAG: define {{(dso_local )?}}spir_kernel void @kernel1(i32 {{[^%]*}}%0) {{[^{]+}} !reqd_work_group_size ![[MD1_REQD_WG:[0-9]+]] !vec_type_hint ![[MD1_VEC_TYPE:[0-9]+]]
+// LLVM-DAG: [[MD1_VEC_TYPE]] = !{i32 undef, i32 1}
+// LLVM-DAG: [[MD1_REQD_WG]] = !{i32 1, i32 2, i32 4}
+
+
+kernel __attribute__((vec_type_hint(uint4))) __attribute__((work_group_size_hint(8,16,32))) void kernel2(int a) {}
+
+// CIR-DAG: #fn_attr[[KERNEL2:[0-9]*]] = {{.+}}cl.kernel_metadata = #cir.cl.kernel_metadata<work_group_size_hint = [8 : i32, 16 : i32, 32 : i32], vec_type_hint = !cir.vector<!u32i x 4>, vec_type_hint_signedness = 0>{{.+}}
+// CIR-DAG: cir.func @kernel2{{.+}} extra(#fn_attr[[KERNEL2]])
+
+// LLVM-DAG: define {{(dso_local )?}}spir_kernel void @kernel2(i32 {{[^%]*}}%0) {{[^{]+}} !vec_type_hint ![[MD2_VEC_TYPE:[0-9]+]] !work_group_size_hint ![[MD2_WG_SIZE:[0-9]+]]
+// LLVM-DAG: [[MD2_VEC_TYPE]] = !{<4 x i32> undef, i32 0}
+// LLVM-DAG: [[MD2_WG_SIZE]] = !{i32 8, i32 16, i32 32}
+
+
+kernel __attribute__((intel_reqd_sub_group_size(8))) void kernel3(int a) {}
+
+// CIR-DAG: #fn_attr[[KERNEL3:[0-9]*]] = {{.+}}cl.kernel_metadata = #cir.cl.kernel_metadata<intel_reqd_sub_group_size = 8 : i32>{{.+}}
+// CIR-DAG: cir.func @kernel3{{.+}} extra(#fn_attr[[KERNEL3]])
+
+// LLVM-DAG: define {{(dso_local )?}}spir_kernel void @kernel3(i32 {{[^%]*}}%0) {{[^{]+}} !intel_reqd_sub_group_size ![[MD3_INTEL:[0-9]+]]
+// LLVM-DAG: [[MD3_INTEL]] = !{i32 8}
diff --git a/clang/test/CIR/CodeGen/OpenCL/kernel-unit-attr.cl b/clang/test/CIR/CodeGen/OpenCL/kernel-unit-attr.cl
new file mode 100644
index 000000000000..01348013bbf0
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/kernel-unit-attr.cl
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+
+
+// CIR: #fn_attr[[KERNEL1:[0-9]*]] = {{.+}}cl.kernel = #cir.cl.kernel
+// CIR-NEXT: #fn_attr[[FUNC1:[0-9]*]] =
+// CIR-NOT: cl.kernel = #cir.cl.kernel
+
+kernel void kernel1() {}
+// CIR: cir.func @kernel1{{.+}} extra(#fn_attr[[KERNEL1]])
+
+void func1() {}
+
+// CIR: cir.func @func1{{.+}} extra(#fn_attr[[FUNC1]])
diff --git a/clang/test/CIR/CodeGen/OpenCL/opencl-c-lang.cl b/clang/test/CIR/CodeGen/OpenCL/opencl-c-lang.cl
new file mode 100644
index 000000000000..67aeda32c2a1
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/opencl-c-lang.cl
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+
+// CIR: module{{.*}} attributes {{{.*}}cir.lang = #cir.lang<opencl_c>
diff --git a/clang/test/CIR/CodeGen/OpenCL/opencl-version.cl b/clang/test/CIR/CodeGen/OpenCL/opencl-version.cl
new file mode 100644
index 000000000000..f0536a560b97
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/opencl-version.cl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR-CL30
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-llvm -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM-CL30
+// RUN: %clang_cc1 -cl-std=CL1.2 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR-CL12
+// RUN: %clang_cc1 -cl-std=CL1.2 -O0 -fclangir -emit-llvm -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM-CL12
+
+// CIR-CL30: module {{.*}} attributes {{{.*}}cir.cl.version = #cir.cl.version<3, 0>
+// LLVM-CL30: !opencl.ocl.version = !{![[MDCL30:[0-9]+]]}
+// LLVM-CL30: ![[MDCL30]] = !{i32 3, i32 0}
+
+// CIR-CL12: module {{.*}} attributes {{{.*}}cir.cl.version = #cir.cl.version<1, 2>
+// LLVM-CL12: !opencl.ocl.version = !{![[MDCL12:[0-9]+]]}
+// LLVM-CL12: ![[MDCL12]] = !{i32 1, i32 2}
diff --git a/clang/test/CIR/CodeGen/OpenCL/spir-calling-conv.cl b/clang/test/CIR/CodeGen/OpenCL/spir-calling-conv.cl
new file mode 100644
index 000000000000..bf711bec7d46
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/spir-calling-conv.cl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fclangir %s -O0 -triple "spirv64-unknown-unknown" -emit-cir -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 -fclangir %s -O0 -triple "spirv64-unknown-unknown" -emit-llvm -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+// CIR: cir.func {{.*}}@get_dummy_id{{.*}} cc(spir_function)
+// LLVM-DAG: declare{{.*}} spir_func i32 @get_dummy_id(
+int get_dummy_id(int D);
+
+// CIR: cir.func {{.*}}@bar{{.*}} cc(spir_kernel)
+// LLVM-DAG: declare{{.*}} spir_kernel void @bar(
+kernel void bar(global int *A);
+
+// CIR: cir.func {{.*}}@foo{{.*}} cc(spir_kernel)
+// LLVM-DAG: define{{.*}} spir_kernel void @foo(
+kernel void foo(global int *A) {
+  int id = get_dummy_id(0);
+  // CIR: %{{[0-9]+}} = cir.call @get_dummy_id(%2) : (!s32i) -> !s32i cc(spir_function)
+  // LLVM: %{{[a-z0-9_]+}} = call spir_func i32 @get_dummy_id(
+  A[id] = id;
+  bar(A);
+  // CIR: cir.call @bar(%8) : (!cir.ptr<!s32i, addrspace(offload_global)>) -> () cc(spir_kernel)
+  // LLVM: call spir_kernel void @bar(ptr addrspace(1)
+}
diff --git a/clang/test/CIR/CodeGen/OpenCL/spirv-target.cl b/clang/test/CIR/CodeGen/OpenCL/spirv-target.cl
new file mode 100644
index 000000000000..dadf4e6022b5
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/spirv-target.cl
@@ -0,0 +1,30 @@
+// See also: clang/test/CodeGenOpenCL/spirv_target.cl
+// RUN: %clang_cc1 -cl-std=CL3.0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t_64.cir
+// RUN: FileCheck --input-file=%t_64.cir %s --check-prefix=CIR-SPIRV64
+// RUN: %clang_cc1 -cl-std=CL3.0 -fclangir -emit-llvm -triple spirv64-unknown-unknown %s -o %t_64.ll
+// RUN: FileCheck --input-file=%t_64.ll %s --check-prefix=LLVM-SPIRV64
+
+
+// CIR-SPIRV64: cir.triple = "spirv64-unknown-unknown"
+// LLVM-SPIRV64: target triple = "spirv64-unknown-unknown"
+
+typedef struct {
+  char c;
+  void *v;
+  void *v2;
+} my_st;
+
+// CIR-SPIRV64: cir.func @func(
+// LLVM-SPIRV64: define spir_kernel void @func(
+kernel void func(global long *arg) {
+  int res1[sizeof(my_st)  == 24 ? 1 : -1]; // expected-no-diagnostics
+  int res2[sizeof(void *) ==  8 ? 1 : -1]; // expected-no-diagnostics
+  int res3[sizeof(arg)    ==  8 ? 1 : -1]; // expected-no-diagnostics
+
+  my_st *tmp = 0;
+
+  // LLVM-SPIRV64: store i64 8, ptr addrspace(1)
+  arg[0] = (long)(&tmp->v);
+  // LLVM-SPIRV64: store i64 16, ptr addrspace(1)
+  arg[1] = (long)(&tmp->v2);
+}
diff --git a/clang/test/CIR/CodeGen/OpenCL/static-vardecl.cl b/clang/test/CIR/CodeGen/OpenCL/static-vardecl.cl
new file mode 100644
index 000000000000..9ad8277012c4
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenCL/static-vardecl.cl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-llvm -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+kernel void test_static(int i) {
+  static global int b = 15;
+  // CIR-DAG: cir.global "private" internal dsolocal addrspace(offload_global) @test_static.b = #cir.int<15> : !s32i {alignment = 4 : i64}
+  // LLVM-DAG: @test_static.b = internal addrspace(1) global i32 15
+
+  local int c;
+  // CIR-DAG: cir.global "private" internal dsolocal addrspace(offload_local) @test_static.c : !s32i {alignment = 4 : i64}
+  // LLVM-DAG: @test_static.c = internal addrspace(3) global i32 undef
+
+  // CIR-DAG: %[[#ADDRB:]] = cir.get_global @test_static.b : !cir.ptr<!s32i, addrspace(offload_global)>
+  // CIR-DAG: %[[#ADDRC:]] = cir.get_global @test_static.c : !cir.ptr<!s32i, addrspace(offload_local)>
+
+  c = b;
+  // CIR:      %[[#LOADB:]] = cir.load %[[#ADDRB]] : !cir.ptr<!s32i, addrspace(offload_global)>, !s32i
+  // CIR-NEXT: cir.store %[[#LOADB]], %[[#ADDRC]] : !s32i, !cir.ptr<!s32i, addrspace(offload_local)>
+
+  // LLVM:     %[[#LOADB:]] = load i32, ptr addrspace(1) @test_static.b, align 4
+  // LLVM-NEXT: store i32 %[[#LOADB]], ptr addrspace(3) @test_static.c, align 4
+}
diff --git a/clang/test/CIR/CodeGen/OpenMP/barrier.cpp b/clang/test/CIR/CodeGen/OpenMP/barrier.cpp
new file mode 100644
index 000000000000..b93016a3f1e4
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenMP/barrier.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fopenmp-enable-irbuilder -fopenmp -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: cir.func
+void omp_barrier_1(){
+// CHECK: omp.barrier
+  #pragma omp barrier
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/OpenMP/parallel.cpp b/clang/test/CIR/CodeGen/OpenMP/parallel.cpp
new file mode 100644
index 000000000000..d2523d7b5396
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenMP/parallel.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fopenmp -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: cir.func
+void omp_parallel_1() {
+// CHECK: omp.parallel {
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: }
+// CHECK-NEXT: omp.terminator
+// CHECK-NEXT: }
+#pragma omp parallel
+{
+}
+}
+// CHECK: cir.func
+void omp_parallel_2() {
+// CHECK: %[[YVarDecl:.+]] = {{.*}} ["y", init]
+// CHECK: omp.parallel {
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[XVarDecl:.+]] = {{.*}} ["x", init]
+// CHECK-NEXT: %[[C1:.+]] = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT: cir.store %[[C1]], %[[XVarDecl]]
+// CHECK-NEXT: %[[XVal:.+]] = cir.load %[[XVarDecl]]
+// CHECK-NEXT: %[[COne:.+]] = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT: %[[BinOpVal:.+]] = cir.binop(add, %[[XVal]], %[[COne]])
+// CHECK-NEXT: cir.store %[[BinOpVal]], %[[YVarDecl]]
+// CHECK-NEXT: }
+// CHECK-NEXT: omp.terminator
+// CHECK-NEXT: }
+  int y = 0;
+#pragma omp parallel
+{
+  int x = 1;
+  y = x + 1;
+}
+}
diff --git a/clang/test/CIR/CodeGen/OpenMP/taskwait.cpp b/clang/test/CIR/CodeGen/OpenMP/taskwait.cpp
new file mode 100644
index 000000000000..4c1df728db2a
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenMP/taskwait.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fopenmp-enable-irbuilder -fopenmp -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: cir.func
+void omp_taskwait_1(){
+// CHECK: omp.taskwait
+  #pragma omp taskwait
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/OpenMP/taskyield.cpp b/clang/test/CIR/CodeGen/OpenMP/taskyield.cpp
new file mode 100644
index 000000000000..aa2903c07f74
--- /dev/null
+++ b/clang/test/CIR/CodeGen/OpenMP/taskyield.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fopenmp-enable-irbuilder -fopenmp -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: cir.func
+void omp_taskyield_1(){
+// CHECK: omp.taskyield
+  #pragma omp taskyield
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/String.cpp b/clang/test/CIR/CodeGen/String.cpp
new file mode 100644
index 000000000000..5898eb09fbb6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/String.cpp
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+class String {
+  char *storage{nullptr};
+  long size;
+  long capacity;
+
+public:
+  String() : size{0} {}
+  String(int size) : size{size} {}
+  String(const char *s) {}
+};
+
+void test() {
+  String s1{};
+  String s2{1};
+  String s3{"abcdefghijklmnop"};
+}
+
+//      CHECK: cir.func linkonce_odr @_ZN6StringC2Ev
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!ty_String>
+// CHECK-NEXT:   cir.store %arg0, %0
+// CHECK-NEXT:   %1 = cir.load %0
+// CHECK-NEXT:   %2 = cir.get_member %1[0] {name = "storage"}
+// CHECK-NEXT:   %3 = cir.const #cir.ptr<null> : !cir.ptr<!s8i>
+// CHECK-NEXT:   cir.store %3, %2 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:   %4 = cir.get_member %1[1] {name = "size"} : !cir.ptr<!ty_String> -> !cir.ptr<!s64i>
+// CHECK-NEXT:   %5 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:   %6 = cir.cast(integral, %5 : !s32i), !s64i
+// CHECK-NEXT:   cir.store %6, %4 : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+//      CHECK: cir.func linkonce_odr @_ZN6StringC2Ei
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!ty_String>
+// CHECK-NEXT:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["size", init]
+// CHECK-NEXT:   cir.store %arg0, %0
+// CHECK-NEXT:   cir.store %arg1, %1
+// CHECK-NEXT:   %2 = cir.load %0
+// CHECK-NEXT:   %3 = cir.get_member %2[0] {name = "storage"}
+// CHECK-NEXT:   %4 = cir.const #cir.ptr<null> : !cir.ptr<!s8i>
+// CHECK-NEXT:   cir.store %4, %3
+// CHECK-NEXT:   %5 = cir.get_member %2[1] {name = "size"} : !cir.ptr<!ty_String> -> !cir.ptr<!s64i>
+// CHECK-NEXT:   %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %7 = cir.cast(integral, %6 : !s32i), !s64i
+// CHECK-NEXT:   cir.store %7, %5 : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+//      CHECK: cir.func linkonce_odr @_ZN6StringC2EPKc
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!ty_String>, !cir.ptr<!cir.ptr<!ty_String>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:   %1 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["s", init] {alignment = 8 : i64}
+// CHECK-NEXT:   cir.store %arg0, %0 : !cir.ptr<!ty_String>, !cir.ptr<!cir.ptr<!ty_String>>
+// CHECK-NEXT:   cir.store %arg1, %1 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:   %2 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_String>>, !cir.ptr<!ty_String>
+// CHECK-NEXT:   %3 = cir.get_member %2[0] {name = "storage"} : !cir.ptr<!ty_String> -> !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:   %4 = cir.const #cir.ptr<null> : !cir.ptr<!s8i>
+// CHECK-NEXT:   cir.store %4, %3 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:   cir.return
+
+//      CHECK: cir.func linkonce_odr @_ZN6StringC1EPKc
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!ty_String>, !cir.ptr<!cir.ptr<!ty_String>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:   %1 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["s", init] {alignment = 8 : i64}
+// CHECK-NEXT:   cir.store %arg0, %0 : !cir.ptr<!ty_String>, !cir.ptr<!cir.ptr<!ty_String>>
+// CHECK-NEXT:   cir.store %arg1, %1 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:   %2 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_String>>, !cir.ptr<!ty_String>
+// CHECK-NEXT:   %3 = cir.load %1 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CHECK-NEXT:   cir.call @_ZN6StringC2EPKc(%2, %3) : (!cir.ptr<!ty_String>, !cir.ptr<!s8i>) -> ()
+// CHECK-NEXT:   cir.return
+
+// CHECK: cir.func @_Z4testv()
+// CHECK:   cir.call @_ZN6StringC1Ev(%0) : (!cir.ptr<!ty_String>) -> ()
+// CHECK:   cir.call @_ZN6StringC1Ei(%1, %3) : (!cir.ptr<!ty_String>, !s32i) -> ()
+// CHECK:   cir.call @_ZN6StringC1EPKc(%2, %5) : (!cir.ptr<!ty_String>, !cir.ptr<!s8i>) -> ()
diff --git a/clang/test/CIR/CodeGen/StringExample.cpp b/clang/test/CIR/CodeGen/StringExample.cpp
new file mode 100644
index 000000000000..a2c0ef374f1c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/StringExample.cpp
@@ -0,0 +1,34 @@
+// RUN: true
+
+int strlen(char const *);
+void puts(char const *);
+
+struct String {
+  long size;
+  long capacity;
+  char *storage;
+
+  String() : size{0}, capacity{0}, storage{nullptr} {}
+  String(char const *s) : size{strlen(s)}, capacity{size},
+                          storage{new char[capacity]} {}
+};
+
+struct StringView {
+  long size;
+  char *storage;
+
+  StringView(const String &s) : size{s.size}, storage{s.storage} {}
+  StringView() : size{0}, storage{nullptr} {}
+};
+
+int main() {
+  StringView sv;
+  {
+    String s = "Hi";
+    sv = s;
+
+    puts(sv.storage);
+  }
+
+  puts(sv.storage);
+}
diff --git a/clang/test/CIR/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CIR/CodeGen/aarch64-neon-intrinsics.c
new file mode 100644
index 000000000000..02aa70a4d628
--- /dev/null
+++ b/clang/test/CIR/CodeGen/aarch64-neon-intrinsics.c
@@ -0,0 +1,17465 @@
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-llvm -o - %s \
+// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// This test mimics clang/test/CodeGen/aarch64-neon-intrinsics.c, which eventually
+// CIR shall be able to support fully. Since this is going to take some time to converge,
+// the unsupported/NYI code is commented out, so that we can incrementally improve this.
+// The NYI filecheck used contains the LLVM output from OG codegen that should guide the
+// correct result when implementing this into the CIR pipeline.
+
+#include <arm_neon.h>
+
+// NYI-LABEL: @test_vadd_s8(
+// NYI:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
+// NYI:   ret <8 x i8> [[ADD_I]]
+// int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
+//   return vadd_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_s16(
+// NYI:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
+// NYI:   ret <4 x i16> [[ADD_I]]
+// int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
+//   return vadd_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_s32(
+// NYI:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
+// NYI:   ret <2 x i32> [[ADD_I]]
+// int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
+//   return vadd_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_s64(
+// NYI:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
+// NYI:   ret <1 x i64> [[ADD_I]]
+// int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
+//   return vadd_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_f32(
+// NYI:   [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
+// NYI:   ret <2 x float> [[ADD_I]]
+// float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
+//   return vadd_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_u8(
+// NYI:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
+// NYI:   ret <8 x i8> [[ADD_I]]
+// uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vadd_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_u16(
+// NYI:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
+// NYI:   ret <4 x i16> [[ADD_I]]
+// uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vadd_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_u32(
+// NYI:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
+// NYI:   ret <2 x i32> [[ADD_I]]
+// uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vadd_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_u64(
+// NYI:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
+// NYI:   ret <1 x i64> [[ADD_I]]
+// uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
+//   return vadd_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_s8(
+// NYI:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
+// NYI:   ret <16 x i8> [[ADD_I]]
+// int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vaddq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_s16(
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vaddq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_s32(
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vaddq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_s64(
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vaddq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_f32(
+// NYI:   [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
+// NYI:   ret <4 x float> [[ADD_I]]
+// float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vaddq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_f64(
+// NYI:   [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
+// NYI:   ret <2 x double> [[ADD_I]]
+// float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vaddq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_u8(
+// NYI:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
+// NYI:   ret <16 x i8> [[ADD_I]]
+// uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vaddq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_u16(
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vaddq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_u32(
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vaddq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_u64(
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vaddq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_s8(
+// NYI:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
+// NYI:   ret <8 x i8> [[SUB_I]]
+// int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
+//   return vsub_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_s16(
+// NYI:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
+// NYI:   ret <4 x i16> [[SUB_I]]
+// int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
+//   return vsub_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_s32(
+// NYI:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
+// NYI:   ret <2 x i32> [[SUB_I]]
+// int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
+//   return vsub_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_s64(
+// NYI:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
+// NYI:   ret <1 x i64> [[SUB_I]]
+// int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
+//   return vsub_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_f32(
+// NYI:   [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
+// NYI:   ret <2 x float> [[SUB_I]]
+// float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
+//   return vsub_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_u8(
+// NYI:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
+// NYI:   ret <8 x i8> [[SUB_I]]
+// uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vsub_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_u16(
+// NYI:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
+// NYI:   ret <4 x i16> [[SUB_I]]
+// uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vsub_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_u32(
+// NYI:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
+// NYI:   ret <2 x i32> [[SUB_I]]
+// uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vsub_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_u64(
+// NYI:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
+// NYI:   ret <1 x i64> [[SUB_I]]
+// uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
+//   return vsub_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_s8(
+// NYI:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
+// NYI:   ret <16 x i8> [[SUB_I]]
+// int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vsubq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_s16(
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vsubq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_s32(
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vsubq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vsubq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_s64(
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
+// NYI:   ret <2 x i64> [[SUB_I]]
+// int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vsubq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_f32(
+// NYI:   [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
+// NYI:   ret <4 x float> [[SUB_I]]
+// float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vsubq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_f64(
+// NYI:   [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
+// NYI:   ret <2 x double> [[SUB_I]]
+// float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vsubq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_u8(
+// NYI:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
+// NYI:   ret <16 x i8> [[SUB_I]]
+// uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vsubq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_u16(
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vsubq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_u32(
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vsubq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_u64(
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
+// NYI:   ret <2 x i64> [[SUB_I]]
+// uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vsubq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_s8(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
+// NYI:   ret <8 x i8> [[MUL_I]]
+// int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
+//   return vmul_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_s16(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
+// NYI:   ret <4 x i16> [[MUL_I]]
+// int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
+//   return vmul_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_s32(
+// NYI:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
+// NYI:   ret <2 x i32> [[MUL_I]]
+// int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
+//   return vmul_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_f32(
+// NYI:   [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
+// NYI:   ret <2 x float> [[MUL_I]]
+// float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
+//   return vmul_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_u8(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
+// NYI:   ret <8 x i8> [[MUL_I]]
+// uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vmul_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_u16(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
+// NYI:   ret <4 x i16> [[MUL_I]]
+// uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vmul_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_u32(
+// NYI:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
+// NYI:   ret <2 x i32> [[MUL_I]]
+// uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vmul_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_s8(
+// NYI:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
+// NYI:   ret <16 x i8> [[MUL_I]]
+// int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vmulq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_s16(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
+// NYI:   ret <8 x i16> [[MUL_I]]
+// int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vmulq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_s32(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
+// NYI:   ret <4 x i32> [[MUL_I]]
+// int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vmulq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_u8(
+// NYI:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
+// NYI:   ret <16 x i8> [[MUL_I]]
+// uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vmulq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_u16(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
+// NYI:   ret <8 x i16> [[MUL_I]]
+// uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vmulq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_u32(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
+// NYI:   ret <4 x i32> [[MUL_I]]
+// uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vmulq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_f32(
+// NYI:   [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
+// NYI:   ret <4 x float> [[MUL_I]]
+// float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vmulq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_f64(
+// NYI:   [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
+// NYI:   ret <2 x double> [[MUL_I]]
+// float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vmulq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_p8(
+// NYI:   [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2)
+// NYI:   ret <8 x i8> [[VMUL_V_I]]
+// poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
+//   return vmul_p8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_p8(
+// NYI:   [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2)
+// NYI:   ret <16 x i8> [[VMULQ_V_I]]
+// poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
+//   return vmulq_p8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmla_s8(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
+// NYI:   ret <8 x i8> [[ADD_I]]
+// int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+//   return vmla_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmla_s16(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+//   return (int8x8_t)vmla_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmla_s32(
+// NYI:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
+// NYI:   ret <2 x i32> [[ADD_I]]
+// int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+//   return vmla_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmla_f32(
+// NYI:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
+// NYI:   ret <2 x float> [[ADD_I]]
+// float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+//   return vmla_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmla_u8(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
+// NYI:   ret <8 x i8> [[ADD_I]]
+// uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+//   return vmla_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmla_u16(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
+// NYI:   ret <4 x i16> [[ADD_I]]
+// uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+//   return vmla_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmla_u32(
+// NYI:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
+// NYI:   ret <2 x i32> [[ADD_I]]
+// uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+//   return vmla_u32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_s8(
+// NYI:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
+// NYI:   ret <16 x i8> [[ADD_I]]
+// int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+//   return vmlaq_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_s16(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+//   return vmlaq_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_s32(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+//   return vmlaq_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_f32(
+// NYI:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
+// NYI:   ret <4 x float> [[ADD_I]]
+// float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+//   return vmlaq_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_u8(
+// NYI:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
+// NYI:   ret <16 x i8> [[ADD_I]]
+// uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+//   return vmlaq_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_u16(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+//   return vmlaq_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_u32(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+//   return vmlaq_u32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_f64(
+// NYI:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
+// NYI:   ret <2 x double> [[ADD_I]]
+// float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+//   return vmlaq_f64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_s8(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
+// NYI:   ret <8 x i8> [[SUB_I]]
+// int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+//   return vmls_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_s16(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+//   return (int8x8_t)vmls_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_s32(
+// NYI:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
+// NYI:   ret <2 x i32> [[SUB_I]]
+// int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+//   return vmls_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_f32(
+// NYI:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
+// NYI:   ret <2 x float> [[SUB_I]]
+// float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+//   return vmls_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_u8(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
+// NYI:   ret <8 x i8> [[SUB_I]]
+// uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+//   return vmls_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_u16(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
+// NYI:   ret <4 x i16> [[SUB_I]]
+// uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+//   return vmls_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_u32(
+// NYI:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
+// NYI:   ret <2 x i32> [[SUB_I]]
+// uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+//   return vmls_u32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_s8(
+// NYI:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
+// NYI:   ret <16 x i8> [[SUB_I]]
+// int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+//   return vmlsq_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_s16(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+//   return vmlsq_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_s32(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+//   return vmlsq_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_f32(
+// NYI:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
+// NYI:   ret <4 x float> [[SUB_I]]
+// float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+//   return vmlsq_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_u8(
+// NYI:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
+// NYI:   ret <16 x i8> [[SUB_I]]
+// uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+//   return vmlsq_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_u16(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+//   return vmlsq_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_u32(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+//   return vmlsq_u32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_f64(
+// NYI:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
+// NYI:   ret <2 x double> [[SUB_I]]
+// float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+//   return vmlsq_f64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vfma_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
+// NYI:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1)
+// NYI:   ret <2 x float> [[TMP3]]
+// float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+//   return vfma_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vfmaq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
+// NYI:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1)
+// NYI:   ret <4 x float> [[TMP3]]
+// float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+//   return vfmaq_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vfmaq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
+// NYI:   [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1)
+// NYI:   ret <2 x double> [[TMP3]]
+// float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+//   return vfmaq_f64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vfms_f32(
+// NYI:   [[SUB_I:%.*]] = fneg <2 x float> %v2
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
+// NYI:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1)
+// NYI:   ret <2 x float> [[TMP3]]
+// float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+//   return vfms_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vfmsq_f32(
+// NYI:   [[SUB_I:%.*]] = fneg <4 x float> %v2
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
+// NYI:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1)
+// NYI:   ret <4 x float> [[TMP3]]
+// float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+//   return vfmsq_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vfmsq_f64(
+// NYI:   [[SUB_I:%.*]] = fneg <2 x double> %v2
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
+// NYI:   [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1)
+// NYI:   ret <2 x double> [[TMP3]]
+// float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+//   return vfmsq_f64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vdivq_f64(
+// NYI:   [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
+// NYI:   ret <2 x double> [[DIV_I]]
+// float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vdivq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vdivq_f32(
+// NYI:   [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
+// NYI:   ret <4 x float> [[DIV_I]]
+// float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vdivq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vdiv_f32(
+// NYI:   [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
+// NYI:   ret <2 x float> [[DIV_I]]
+// float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
+//   return vdiv_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaba_s8(
+// NYI:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
+// NYI:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
+// NYI:   ret <8 x i8> [[ADD_I]]
+// int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+//   return vaba_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vaba_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
+// NYI:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
+// NYI:   ret <4 x i16> [[ADD_I]]
+// int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+//   return vaba_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vaba_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
+// NYI:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
+// NYI:   ret <2 x i32> [[ADD_I]]
+// int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+//   return vaba_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vaba_u8(
+// NYI:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
+// NYI:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
+// NYI:   ret <8 x i8> [[ADD_I]]
+// uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+//   return vaba_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vaba_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
+// NYI:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
+// NYI:   ret <4 x i16> [[ADD_I]]
+// uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+//   return vaba_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vaba_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
+// NYI:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
+// NYI:   ret <2 x i32> [[ADD_I]]
+// uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+//   return vaba_u32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabaq_s8(
+// NYI:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
+// NYI:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
+// NYI:   ret <16 x i8> [[ADD_I]]
+// int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+//   return vabaq_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabaq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+//   return vabaq_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabaq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+//   return vabaq_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabaq_u8(
+// NYI:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
+// NYI:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
+// NYI:   ret <16 x i8> [[ADD_I]]
+// uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+//   return vabaq_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabaq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+//   return vabaq_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabaq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+//   return vabaq_u32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabd_s8(
+// NYI:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
+// NYI:   ret <8 x i8> [[VABD_I]]
+// int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
+//   return vabd_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabd_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
+// NYI:   ret <4 x i16> [[VABD2_I]]
+// int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
+//   return vabd_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabd_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
+// NYI:   ret <2 x i32> [[VABD2_I]]
+// int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
+//   return vabd_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabd_u8(
+// NYI:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
+// NYI:   ret <8 x i8> [[VABD_I]]
+// uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vabd_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabd_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
+// NYI:   ret <4 x i16> [[VABD2_I]]
+// uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vabd_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabd_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
+// NYI:   ret <2 x i32> [[VABD2_I]]
+// uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vabd_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabd_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2)
+// NYI:   ret <2 x float> [[VABD2_I]]
+// float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
+//   return vabd_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabdq_s8(
+// NYI:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
+// NYI:   ret <16 x i8> [[VABD_I]]
+// int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vabdq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabdq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
+// NYI:   ret <8 x i16> [[VABD2_I]]
+// int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vabdq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabdq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
+// NYI:   ret <4 x i32> [[VABD2_I]]
+// int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vabdq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabdq_u8(
+// NYI:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
+// NYI:   ret <16 x i8> [[VABD_I]]
+// uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vabdq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabdq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
+// NYI:   ret <8 x i16> [[VABD2_I]]
+// uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vabdq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabdq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
+// NYI:   ret <4 x i32> [[VABD2_I]]
+// uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vabdq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabdq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2)
+// NYI:   ret <4 x float> [[VABD2_I]]
+// float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vabdq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vabdq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2)
+// NYI:   ret <2 x double> [[VABD2_I]]
+// float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vabdq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vbsl_s8(
+// NYI:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
+// NYI:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// NYI:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
+// NYI:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// NYI:   ret <8 x i8> [[VBSL2_I]]
+// int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
+//   return vbsl_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbsl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP4]]
+// int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
+//   return (int8x8_t)vbsl_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbsl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <2 x i32> [[VBSL5_I]]
+// int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
+//   return vbsl_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbsl_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <1 x i64> [[VBSL5_I]]
+// int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) {
+//   return vbsl_s64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbsl_u8(
+// NYI:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
+// NYI:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// NYI:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
+// NYI:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// NYI:   ret <8 x i8> [[VBSL2_I]]
+// uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+//   return vbsl_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbsl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <4 x i16> [[VBSL5_I]]
+// uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+//   return vbsl_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbsl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <2 x i32> [[VBSL5_I]]
+// uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+//   return vbsl_u32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbsl_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <1 x i64> [[VBSL5_I]]
+// uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
+//   return vbsl_u64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbsl_f32(
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
+// NYI:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+// NYI:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+// NYI:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]]
+// NYI:   [[TMP4:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
+// NYI:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
+// NYI:   ret <2 x float> [[TMP5]]
+// float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) {
+//   return vbsl_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbsl_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
+// NYI:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
+// NYI:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]]
+// NYI:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
+// NYI:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
+// NYI:   ret <1 x double> [[TMP4]]
+// float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
+//   return vbsl_f64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbsl_p8(
+// NYI:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
+// NYI:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// NYI:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
+// NYI:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// NYI:   ret <8 x i8> [[VBSL2_I]]
+// poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
+//   return vbsl_p8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbsl_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <4 x i16> [[VBSL5_I]]
+// poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
+//   return vbsl_p16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_s8(
+// NYI:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
+// NYI:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// NYI:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
+// NYI:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// NYI:   ret <16 x i8> [[VBSL2_I]]
+// int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
+//   return vbslq_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <8 x i16> [[VBSL5_I]]
+// int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
+//   return vbslq_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <4 x i32> [[VBSL5_I]]
+// int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
+//   return vbslq_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <2 x i64> [[VBSL5_I]]
+// int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
+//   return vbslq_s64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_u8(
+// NYI:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
+// NYI:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// NYI:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
+// NYI:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// NYI:   ret <16 x i8> [[VBSL2_I]]
+// uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+//   return vbslq_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <8 x i16> [[VBSL5_I]]
+// uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+//   return vbslq_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <4 x i32> [[VBSL5_I]]
+// int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
+//   return vbslq_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <2 x i64> [[VBSL5_I]]
+// uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
+//   return vbslq_u64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
+// NYI:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+// NYI:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]]
+// NYI:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
+// NYI:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
+// NYI:   ret <4 x float> [[TMP4]]
+// float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
+//   return vbslq_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_p8(
+// NYI:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
+// NYI:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// NYI:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
+// NYI:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// NYI:   ret <16 x i8> [[VBSL2_I]]
+// poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
+//   return vbslq_p8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <8 x i16> [[VBSL5_I]]
+// poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
+//   return vbslq_p16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
+// NYI:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+// NYI:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]]
+// NYI:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
+// NYI:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
+// NYI:   ret <2 x double> [[TMP4]]
+// float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
+//   return vbslq_f64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vrecps_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2)
+// NYI:   ret <2 x float> [[VRECPS_V2_I]]
+// float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
+//   return vrecps_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrecpsq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2)
+// NYI:   [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x float> [[VRECPSQ_V2_I]]
+// float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vrecpsq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrecpsq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2)
+// NYI:   [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x double> [[VRECPSQ_V2_I]]
+// float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vrecpsq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrsqrts_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2)
+// NYI:   [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
+// NYI:   ret <2 x float> [[VRSQRTS_V2_I]]
+// float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
+//   return vrsqrts_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrsqrtsq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2)
+// NYI:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x float> [[VRSQRTSQ_V2_I]]
+// float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vrsqrtsq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrsqrtsq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2)
+// NYI:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x double> [[VRSQRTSQ_V2_I]]
+// float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vrsqrtsq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcage_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
+// NYI:   ret <2 x i32> [[VCAGE_V2_I]]
+// uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcage_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcage_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x i64> [[VCAGE_V2_I]]
+// uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
+//   return vcage_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcageq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
+// NYI:   ret <4 x i32> [[VCAGEQ_V2_I]]
+// uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcageq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcageq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
+// NYI:   ret <2 x i64> [[VCAGEQ_V2_I]]
+// uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcageq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcagt_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
+// NYI:   ret <2 x i32> [[VCAGT_V2_I]]
+// uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcagt_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcagt_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x i64> [[VCAGT_V2_I]]
+// uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
+//   return vcagt_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcagtq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
+// NYI:   ret <4 x i32> [[VCAGTQ_V2_I]]
+// uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcagtq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcagtq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
+// NYI:   ret <2 x i64> [[VCAGTQ_V2_I]]
+// uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcagtq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcale_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
+// NYI:   ret <2 x i32> [[VCALE_V2_I]]
+// uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcale_f32(v1, v2);
+//   // Using registers other than v0, v1 are possible, but would be odd.
+// }
+
+// NYI-LABEL: @test_vcale_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
+// NYI:   ret <1 x i64> [[VCALE_V2_I]]
+// uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
+//   return vcale_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcaleq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
+// NYI:   ret <4 x i32> [[VCALEQ_V2_I]]
+// uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcaleq_f32(v1, v2);
+//   // Using registers other than v0, v1 are possible, but would be odd.
+// }
+
+// NYI-LABEL: @test_vcaleq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
+// NYI:   ret <2 x i64> [[VCALEQ_V2_I]]
+// uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcaleq_f64(v1, v2);
+//   // Using registers other than v0, v1 are possible, but would be odd.
+// }
+
+// NYI-LABEL: @test_vcalt_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
+// NYI:   ret <2 x i32> [[VCALT_V2_I]]
+// uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcalt_f32(v1, v2);
+//   // Using registers other than v0, v1 are possible, but would be odd.
+// }
+
+// NYI-LABEL: @test_vcalt_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
+// NYI:   ret <1 x i64> [[VCALT_V2_I]]
+// uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
+//   return vcalt_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcaltq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
+// NYI:   ret <4 x i32> [[VCALTQ_V2_I]]
+// uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcaltq_f32(v1, v2);
+//   // Using registers other than v0, v1 are possible, but would be odd.
+// }
+
+// NYI-LABEL: @test_vcaltq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
+// NYI:   ret <2 x i64> [[VCALTQ_V2_I]]
+// uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcaltq_f64(v1, v2);
+//   // Using registers other than v0, v1 are possible, but would be odd.
+// }
+
+// NYI-LABEL: @test_vtst_s8(
+// NYI:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
+// NYI:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VTST_I]]
+// uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
+//   return vtst_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VTST_I]]
+// uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
+//   return vtst_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <2 x i32> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VTST_I]]
+// uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
+//   return vtst_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_u8(
+// NYI:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
+// NYI:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VTST_I]]
+// uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vtst_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VTST_I]]
+// uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vtst_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <2 x i32> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VTST_I]]
+// uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vtst_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_s8(
+// NYI:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
+// NYI:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// NYI:   ret <16 x i8> [[VTST_I]]
+// uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vtstq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VTST_I]]
+// uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vtstq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <4 x i32> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+// NYI:   ret <4 x i32> [[VTST_I]]
+// uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vtstq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_u8(
+// NYI:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
+// NYI:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// NYI:   ret <16 x i8> [[VTST_I]]
+// uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vtstq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VTST_I]]
+// uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vtstq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <4 x i32> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+// NYI:   ret <4 x i32> [[VTST_I]]
+// uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vtstq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <2 x i64> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+// NYI:   ret <2 x i64> [[VTST_I]]
+// uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vtstq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <2 x i64> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+// NYI:   ret <2 x i64> [[VTST_I]]
+// uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vtstq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_p8(
+// NYI:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
+// NYI:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VTST_I]]
+// uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
+//   return vtst_p8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VTST_I]]
+// uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
+//   return vtst_p16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_p8(
+// NYI:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
+// NYI:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// NYI:   ret <16 x i8> [[VTST_I]]
+// uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
+//   return vtstq_p8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VTST_I]]
+// uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
+//   return vtstq_p16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <1 x i64> %a, %b
+// NYI:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+// NYI:   ret <1 x i64> [[VTST_I]]
+// uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
+//   return vtst_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vtst_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <1 x i64> %a, %b
+// NYI:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+// NYI:   ret <1 x i64> [[VTST_I]]
+// uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
+//   return vtst_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vceq_s8(
+// NYI:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
+//   return vceq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_s16(
+// NYI:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
+//   return vceq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_s32(
+// NYI:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
+//   return vceq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_s64(
+// NYI:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
+//   return vceq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vceq_u64(
+// NYI:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
+//   return vceq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vceq_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
+//   return vceq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
+//   return vceq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vceq_u8(
+// NYI:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vceq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_u16(
+// NYI:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vceq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_u32(
+// NYI:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vceq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_p8(
+// NYI:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
+//   return vceq_p8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_s8(
+// NYI:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vceqq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_s16(
+// NYI:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vceqq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_s32(
+// NYI:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vceqq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vceqq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_u8(
+// NYI:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vceqq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_u16(
+// NYI:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vceqq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_u32(
+// NYI:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vceqq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_p8(
+// NYI:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
+//   return vceqq_p8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_s64(
+// NYI:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vceqq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_u64(
+// NYI:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vceqq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vceqq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_s8(
+// NYI:   [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
+//   return vcge_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_s16(
+// NYI:   [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
+//   return vcge_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_s32(
+// NYI:   [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
+//   return vcge_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_s64(
+// NYI:   [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
+//   return vcge_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcge_u64(
+// NYI:   [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
+//   return vcge_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vcge_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcge_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
+//   return vcge_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcge_u8(
+// NYI:   [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vcge_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_u16(
+// NYI:   [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vcge_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_u32(
+// NYI:   [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vcge_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_s8(
+// NYI:   [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vcgeq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_s16(
+// NYI:   [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vcgeq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_s32(
+// NYI:   [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vcgeq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcgeq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_u8(
+// NYI:   [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vcgeq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_u16(
+// NYI:   [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vcgeq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_u32(
+// NYI:   [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vcgeq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_s64(
+// NYI:   [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vcgeq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_u64(
+// NYI:   [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vcgeq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcgeq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_s8(
+// NYI:   [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// Notes about vcle:
+// LE condition predicate implemented as GE, so check reversed operands.
+// Using registers other than v0, v1 are possible, but would be odd.
+// uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
+//   return vcle_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_s16(
+// NYI:   [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
+//   return vcle_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_s32(
+// NYI:   [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
+//   return vcle_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_s64(
+// NYI:   [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
+//   return vcle_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcle_u64(
+// NYI:   [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
+//   return vcle_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vcle_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcle_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
+//   return vcle_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcle_u8(
+// NYI:   [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vcle_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_u16(
+// NYI:   [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vcle_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_u32(
+// NYI:   [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vcle_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_s8(
+// NYI:   [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vcleq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_s16(
+// NYI:   [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vcleq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_s32(
+// NYI:   [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vcleq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcleq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_u8(
+// NYI:   [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vcleq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_u16(
+// NYI:   [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vcleq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_u32(
+// NYI:   [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vcleq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_s64(
+// NYI:   [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vcleq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_u64(
+// NYI:   [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vcleq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcleq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_s8(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
+//   return vcgt_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_s16(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
+//   return vcgt_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_s32(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
+//   return vcgt_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_s64(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
+//   return vcgt_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgt_u64(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
+//   return vcgt_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgt_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcgt_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
+//   return vcgt_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgt_u8(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vcgt_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_u16(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vcgt_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_u32(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vcgt_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_s8(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vcgtq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_s16(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vcgtq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_s32(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vcgtq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcgtq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_u8(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vcgtq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_u16(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vcgtq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_u32(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vcgtq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_s64(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vcgtq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_u64(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vcgtq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcgtq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_s8(
+// NYI:   [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// Notes about vclt:
+// LT condition predicate implemented as GT, so check reversed operands.
+// Using registers other than v0, v1 are possible, but would be odd.
+// uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
+//   return vclt_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_s16(
+// NYI:   [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
+//   return vclt_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_s32(
+// NYI:   [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
+//   return vclt_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_s64(
+// NYI:   [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
+//   return vclt_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vclt_u64(
+// NYI:   [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
+//   return vclt_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vclt_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
+//   return vclt_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
+//   return vclt_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vclt_u8(
+// NYI:   [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vclt_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_u16(
+// NYI:   [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vclt_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_u32(
+// NYI:   [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vclt_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_s8(
+// NYI:   [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vcltq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_s16(
+// NYI:   [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vcltq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_s32(
+// NYI:   [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vcltq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcltq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_u8(
+// NYI:   [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vcltq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_u16(
+// NYI:   [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vcltq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_u32(
+// NYI:   [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vcltq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_s64(
+// NYI:   [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vcltq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_u64(
+// NYI:   [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vcltq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcltq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhadd_s8(
+// NYI:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
+// NYI:   ret <8 x i8> [[VHADD_V_I]]
+// int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
+//   return vhadd_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhadd_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
+// NYI:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VHADD_V2_I]]
+// int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
+//   return vhadd_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhadd_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
+// NYI:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VHADD_V2_I]]
+// int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
+//   return vhadd_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhadd_u8(
+// NYI:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
+// NYI:   ret <8 x i8> [[VHADD_V_I]]
+// uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vhadd_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhadd_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
+// NYI:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VHADD_V2_I]]
+// uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vhadd_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhadd_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
+// NYI:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VHADD_V2_I]]
+// uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vhadd_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhaddq_s8(
+// NYI:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
+// NYI:   ret <16 x i8> [[VHADDQ_V_I]]
+// int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vhaddq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhaddq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
+// NYI:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VHADDQ_V2_I]]
+// int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vhaddq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhaddq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
+// NYI:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VHADDQ_V2_I]]
+// int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vhaddq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhaddq_u8(
+// NYI:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
+// NYI:   ret <16 x i8> [[VHADDQ_V_I]]
+// uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vhaddq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhaddq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
+// NYI:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VHADDQ_V2_I]]
+// uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vhaddq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhaddq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
+// NYI:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VHADDQ_V2_I]]
+// uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vhaddq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhsub_s8(
+// NYI:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
+// NYI:   ret <8 x i8> [[VHSUB_V_I]]
+// int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
+//   return vhsub_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhsub_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
+// NYI:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VHSUB_V2_I]]
+// int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
+//   return vhsub_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhsub_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
+// NYI:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VHSUB_V2_I]]
+// int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
+//   return vhsub_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhsub_u8(
+// NYI:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
+// NYI:   ret <8 x i8> [[VHSUB_V_I]]
+// uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vhsub_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhsub_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
+// NYI:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VHSUB_V2_I]]
+// uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vhsub_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhsub_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
+// NYI:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VHSUB_V2_I]]
+// uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vhsub_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhsubq_s8(
+// NYI:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
+// NYI:   ret <16 x i8> [[VHSUBQ_V_I]]
+// int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vhsubq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhsubq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
+// NYI:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VHSUBQ_V2_I]]
+// int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vhsubq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhsubq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
+// NYI:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VHSUBQ_V2_I]]
+// int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vhsubq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhsubq_u8(
+// NYI:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
+// NYI:   ret <16 x i8> [[VHSUBQ_V_I]]
+// uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vhsubq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhsubq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
+// NYI:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VHSUBQ_V2_I]]
+// uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vhsubq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vhsubq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
+// NYI:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VHSUBQ_V2_I]]
+// uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vhsubq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrhadd_s8(
+// NYI:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
+// NYI:   ret <8 x i8> [[VRHADD_V_I]]
+// int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
+//   return vrhadd_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrhadd_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
+// NYI:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VRHADD_V2_I]]
+// int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
+//   return vrhadd_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrhadd_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
+// NYI:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VRHADD_V2_I]]
+// int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
+//   return vrhadd_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrhadd_u8(
+// NYI:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
+// NYI:   ret <8 x i8> [[VRHADD_V_I]]
+// uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vrhadd_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrhadd_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
+// NYI:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VRHADD_V2_I]]
+// uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vrhadd_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrhadd_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
+// NYI:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VRHADD_V2_I]]
+// uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vrhadd_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrhaddq_s8(
+// NYI:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
+// NYI:   ret <16 x i8> [[VRHADDQ_V_I]]
+// int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vrhaddq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrhaddq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
+// NYI:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VRHADDQ_V2_I]]
+// int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vrhaddq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrhaddq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
+// NYI:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VRHADDQ_V2_I]]
+// int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vrhaddq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrhaddq_u8(
+// NYI:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
+// NYI:   ret <16 x i8> [[VRHADDQ_V_I]]
+// uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vrhaddq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrhaddq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
+// NYI:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VRHADDQ_V2_I]]
+// uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vrhaddq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrhaddq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
+// NYI:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VRHADDQ_V2_I]]
+// uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vrhaddq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vqadd_s8(
+// NYI:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQADD_V_I]]
+// int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
+//   return vqadd_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqadd_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQADD_V2_I]]
+// int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
+//   return vqadd_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqadd_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQADD_V2_I]]
+// int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
+//   return vqadd_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqadd_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQADD_V2_I]]
+// int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
+//   return vqadd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqadd_u8(
+// NYI:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQADD_V_I]]
+// uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
+//   return vqadd_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqadd_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQADD_V2_I]]
+// uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
+//   return vqadd_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqadd_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQADD_V2_I]]
+// uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
+//   return vqadd_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqadd_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQADD_V2_I]]
+// uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
+//   return vqadd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_s8(
+// NYI:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQADDQ_V_I]]
+// int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
+//   return vqaddq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQADDQ_V2_I]]
+// int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
+//   return vqaddq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQADDQ_V2_I]]
+// int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
+//   return vqaddq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQADDQ_V2_I]]
+// int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
+//   return vqaddq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_u8(
+// NYI:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQADDQ_V_I]]
+// uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
+//   return vqaddq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQADDQ_V2_I]]
+// uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
+//   return vqaddq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQADDQ_V2_I]]
+// uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
+//   return vqaddq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQADDQ_V2_I]]
+// uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
+//   return vqaddq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_s8(
+// NYI:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQSUB_V_I]]
+// int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
+//   return vqsub_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQSUB_V2_I]]
+// int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
+//   return vqsub_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQSUB_V2_I]]
+// int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
+//   return vqsub_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQSUB_V2_I]]
+// int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
+//   return vqsub_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_u8(
+// NYI:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQSUB_V_I]]
+// uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
+//   return vqsub_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQSUB_V2_I]]
+// uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
+//   return vqsub_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQSUB_V2_I]]
+// uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
+//   return vqsub_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQSUB_V2_I]]
+// uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
+//   return vqsub_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_s8(
+// NYI:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQSUBQ_V_I]]
+// int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
+//   return vqsubq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQSUBQ_V2_I]]
+// int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
+//   return vqsubq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQSUBQ_V2_I]]
+// int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
+//   return vqsubq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQSUBQ_V2_I]]
+// int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
+//   return vqsubq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_u8(
+// NYI:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQSUBQ_V_I]]
+// uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
+//   return vqsubq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQSUBQ_V2_I]]
+// uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
+//   return vqsubq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQSUBQ_V2_I]]
+// uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
+//   return vqsubq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQSUBQ_V2_I]]
+// uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
+//   return vqsubq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vshl_s8(
+// NYI:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VSHL_V_I]]
+// int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
+//   return vshl_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vshl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VSHL_V2_I]]
+// int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
+//   return vshl_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vshl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VSHL_V2_I]]
+// int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
+//   return vshl_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vshl_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VSHL_V2_I]]
+// int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
+//   return vshl_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vshl_u8(
+// NYI:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VSHL_V_I]]
+// uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
+//   return vshl_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vshl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VSHL_V2_I]]
+// uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
+//   return vshl_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vshl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VSHL_V2_I]]
+// uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
+//   return vshl_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vshl_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VSHL_V2_I]]
+// uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
+//   return vshl_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vshlq_s8(
+// NYI:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VSHLQ_V_I]]
+// int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
+//   return vshlq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vshlq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VSHLQ_V2_I]]
+// int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
+//   return vshlq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vshlq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VSHLQ_V2_I]]
+// int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
+//   return vshlq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vshlq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VSHLQ_V2_I]]
+// int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
+//   return vshlq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vshlq_u8(
+// NYI:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VSHLQ_V_I]]
+// uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
+//   return vshlq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vshlq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VSHLQ_V2_I]]
+// uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
+//   return vshlq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vshlq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VSHLQ_V2_I]]
+// uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
+//   return vshlq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vshlq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VSHLQ_V2_I]]
+// uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
+//   return vshlq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_s8(
+// NYI:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQSHL_V_I]]
+// int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
+//   return vqshl_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQSHL_V2_I]]
+// int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
+//   return vqshl_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQSHL_V2_I]]
+// int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
+//   return vqshl_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQSHL_V2_I]]
+// int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
+//   return vqshl_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_u8(
+// NYI:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQSHL_V_I]]
+// uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
+//   return vqshl_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQSHL_V2_I]]
+// uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
+//   return vqshl_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQSHL_V2_I]]
+// uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
+//   return vqshl_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQSHL_V2_I]]
+// uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
+//   return vqshl_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_s8(
+// NYI:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQSHLQ_V_I]]
+// int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
+//   return vqshlq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQSHLQ_V2_I]]
+// int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
+//   return vqshlq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQSHLQ_V2_I]]
+// int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
+//   return vqshlq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQSHLQ_V2_I]]
+// int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
+//   return vqshlq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_u8(
+// NYI:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQSHLQ_V_I]]
+// uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
+//   return vqshlq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQSHLQ_V2_I]]
+// uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
+//   return vqshlq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQSHLQ_V2_I]]
+// uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
+//   return vqshlq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQSHLQ_V2_I]]
+// uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
+//   return vqshlq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vrshl_s8(
+// NYI:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VRSHL_V_I]]
+// int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
+//   return vrshl_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vrshl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VRSHL_V2_I]]
+// int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
+//   return vrshl_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vrshl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VRSHL_V2_I]]
+// int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
+//   return vrshl_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vrshl_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VRSHL_V2_I]]
+// int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
+//   return vrshl_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vrshl_u8(
+// NYI:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VRSHL_V_I]]
+// uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
+//   return vrshl_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vrshl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VRSHL_V2_I]]
+// uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
+//   return vrshl_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vrshl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VRSHL_V2_I]]
+// uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
+//   return vrshl_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vrshl_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VRSHL_V2_I]]
+// uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
+//   return vrshl_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vrshlq_s8(
+// NYI:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VRSHLQ_V_I]]
+// int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
+//   return vrshlq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vrshlq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VRSHLQ_V2_I]]
+// int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
+//   return vrshlq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vrshlq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VRSHLQ_V2_I]]
+// int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
+//   return vrshlq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vrshlq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VRSHLQ_V2_I]]
+// int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
+//   return vrshlq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vrshlq_u8(
+// NYI:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VRSHLQ_V_I]]
+// uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
+//   return vrshlq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vrshlq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VRSHLQ_V2_I]]
+// uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
+//   return vrshlq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vrshlq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VRSHLQ_V2_I]]
+// uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
+//   return vrshlq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vrshlq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VRSHLQ_V2_I]]
+// uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
+//   return vrshlq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_s8(
+// NYI:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQRSHL_V_I]]
+// int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
+//   return vqrshl_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQRSHL_V2_I]]
+// int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
+//   return vqrshl_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQRSHL_V2_I]]
+// int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
+//   return vqrshl_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQRSHL_V2_I]]
+// int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
+//   return vqrshl_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_u8(
+// NYI:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQRSHL_V_I]]
+// uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
+//   return vqrshl_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQRSHL_V2_I]]
+// uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
+//   return vqrshl_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQRSHL_V2_I]]
+// uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
+//   return vqrshl_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQRSHL_V2_I]]
+// uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
+//   return vqrshl_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_s8(
+// NYI:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQRSHLQ_V_I]]
+// int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
+//   return vqrshlq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQRSHLQ_V2_I]]
+// int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
+//   return vqrshlq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQRSHLQ_V2_I]]
+// int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
+//   return vqrshlq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQRSHLQ_V2_I]]
+// int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
+//   return vqrshlq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_u8(
+// NYI:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQRSHLQ_V_I]]
+// uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
+//   return vqrshlq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQRSHLQ_V2_I]]
+// uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
+//   return vqrshlq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQRSHLQ_V2_I]]
+// uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
+//   return vqrshlq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQRSHLQ_V2_I]]
+// uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
+//   return vqrshlq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vsli_n_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
+// NYI:   ret <1 x i64> [[VSLI_N2]]
+// poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
+//   return vsli_n_p64(a, b, 0);
+// }
+
+// NYI-LABEL: @test_vsliq_n_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
+// NYI:   ret <2 x i64> [[VSLI_N2]]
+// poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
+//   return vsliq_n_p64(a, b, 0);
+// }
+
+// NYI-LABEL: @test_vmax_s8(
+// NYI:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VMAX_I]]
+// int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
+//   return vmax_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vmax_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VMAX2_I]]
+// int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
+//   return vmax_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vmax_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VMAX2_I]]
+// int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
+//   return vmax_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vmax_u8(
+// NYI:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VMAX_I]]
+// uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
+//   return vmax_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vmax_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VMAX2_I]]
+// uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
+//   return vmax_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vmax_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VMAX2_I]]
+// uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
+//   return vmax_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vmax_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VMAX2_I]]
+// float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
+//   return vmax_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxq_s8(
+// NYI:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VMAX_I]]
+// int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
+//   return vmaxq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VMAX2_I]]
+// int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
+//   return vmaxq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VMAX2_I]]
+// int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
+//   return vmaxq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxq_u8(
+// NYI:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VMAX_I]]
+// uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
+//   return vmaxq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VMAX2_I]]
+// uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
+//   return vmaxq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VMAX2_I]]
+// uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
+//   return vmaxq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VMAX2_I]]
+// float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
+//   return vmaxq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VMAX2_I]]
+// float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
+//   return vmaxq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vmin_s8(
+// NYI:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VMIN_I]]
+// int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
+//   return vmin_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vmin_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VMIN2_I]]
+// int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
+//   return vmin_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vmin_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VMIN2_I]]
+// int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
+//   return vmin_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vmin_u8(
+// NYI:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VMIN_I]]
+// uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
+//   return vmin_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vmin_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VMIN2_I]]
+// uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
+//   return vmin_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vmin_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VMIN2_I]]
+// uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
+//   return vmin_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vmin_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VMIN2_I]]
+// float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
+//   return vmin_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vminq_s8(
+// NYI:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VMIN_I]]
+// int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
+//   return vminq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vminq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VMIN2_I]]
+// int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
+//   return vminq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vminq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VMIN2_I]]
+// int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
+//   return vminq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vminq_u8(
+// NYI:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VMIN_I]]
+// uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
+//   return vminq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vminq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VMIN2_I]]
+// uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
+//   return vminq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vminq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VMIN2_I]]
+// uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
+//   return vminq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vminq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VMIN2_I]]
+// float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
+//   return vminq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vminq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VMIN2_I]]
+// float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
+//   return vminq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxnm_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VMAXNM2_I]]
+// float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
+//   return vmaxnm_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxnmq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VMAXNM2_I]]
+// float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
+//   return vmaxnmq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxnmq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VMAXNM2_I]]
+// float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
+//   return vmaxnmq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vminnm_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VMINNM2_I]]
+// float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
+//   return vminnm_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vminnmq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VMINNM2_I]]
+// float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
+//   return vminnmq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vminnmq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VMINNM2_I]]
+// float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
+//   return vminnmq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_s8(
+// NYI:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VPMAX_I]]
+// int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
+//   return vpmax_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VPMAX2_I]]
+// int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
+//   return vpmax_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VPMAX2_I]]
+// int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
+//   return vpmax_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_u8(
+// NYI:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VPMAX_I]]
+// uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
+//   return vpmax_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VPMAX2_I]]
+// uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
+//   return vpmax_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VPMAX2_I]]
+// uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
+//   return vpmax_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VPMAX2_I]]
+// float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
+//   return vpmax_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_s8(
+// NYI:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VPMAX_I]]
+// int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
+//   return vpmaxq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VPMAX2_I]]
+// int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
+//   return vpmaxq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VPMAX2_I]]
+// int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
+//   return vpmaxq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_u8(
+// NYI:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VPMAX_I]]
+// uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
+//   return vpmaxq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VPMAX2_I]]
+// uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
+//   return vpmaxq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VPMAX2_I]]
+// uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
+//   return vpmaxq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VPMAX2_I]]
+// float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
+//   return vpmaxq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VPMAX2_I]]
+// float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
+//   return vpmaxq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_s8(
+// NYI:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VPMIN_I]]
+// int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
+//   return vpmin_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VPMIN2_I]]
+// int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
+//   return vpmin_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VPMIN2_I]]
+// int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
+//   return vpmin_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_u8(
+// NYI:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VPMIN_I]]
+// uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
+//   return vpmin_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VPMIN2_I]]
+// uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
+//   return vpmin_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VPMIN2_I]]
+// uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
+//   return vpmin_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VPMIN2_I]]
+// float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
+//   return vpmin_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_s8(
+// NYI:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VPMIN_I]]
+// int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
+//   return vpminq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VPMIN2_I]]
+// int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
+//   return vpminq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VPMIN2_I]]
+// int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
+//   return vpminq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_u8(
+// NYI:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VPMIN_I]]
+// uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
+//   return vpminq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VPMIN2_I]]
+// uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
+//   return vpminq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VPMIN2_I]]
+// uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
+//   return vpminq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VPMIN2_I]]
+// float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
+//   return vpminq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VPMIN2_I]]
+// float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
+//   return vpminq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxnm_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VPMAXNM2_I]]
+// float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
+//   return vpmaxnm_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxnmq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VPMAXNM2_I]]
+// float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
+//   return vpmaxnmq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxnmq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VPMAXNM2_I]]
+// float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
+//   return vpmaxnmq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vpminnm_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VPMINNM2_I]]
+// float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
+//   return vpminnm_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpminnmq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VPMINNM2_I]]
+// float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
+//   return vpminnmq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpminnmq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VPMINNM2_I]]
+// float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
+//   return vpminnmq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_s8(
+// NYI:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VPADD_V_I]]
+// int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
+//   return vpadd_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VPADD_V2_I]]
+// int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
+//   return vpadd_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VPADD_V2_I]]
+// int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
+//   return vpadd_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_u8(
+// NYI:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VPADD_V_I]]
+// uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
+//   return vpadd_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VPADD_V2_I]]
+// uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
+//   return vpadd_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VPADD_V2_I]]
+// uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
+//   return vpadd_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
+// NYI:   ret <2 x float> [[VPADD_V2_I]]
+// float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
+//   return vpadd_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_s8(
+// NYI:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VPADDQ_V_I]]
+// int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
+//   return vpaddq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VPADDQ_V2_I]]
+// int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
+//   return vpaddq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VPADDQ_V2_I]]
+// int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
+//   return vpaddq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_u8(
+// NYI:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VPADDQ_V_I]]
+// uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
+//   return vpaddq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VPADDQ_V2_I]]
+// uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
+//   return vpaddq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VPADDQ_V2_I]]
+// uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
+//   return vpaddq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x float> [[VPADDQ_V2_I]]
+// float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
+//   return vpaddq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x double> [[VPADDQ_V2_I]]
+// float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
+//   return vpaddq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmulh_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQDMULH_V2_I]]
+// int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
+//   return vqdmulh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmulh_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQDMULH_V2_I]]
+// int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
+//   return vqdmulh_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmulhq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQDMULHQ_V2_I]]
+// int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
+//   return vqdmulhq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmulhq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQDMULHQ_V2_I]]
+// int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
+//   return vqdmulhq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrdmulh_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQRDMULH_V2_I]]
+// int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
+//   return vqrdmulh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrdmulh_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQRDMULH_V2_I]]
+// int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
+//   return vqrdmulh_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrdmulhq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQRDMULHQ_V2_I]]
+// int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
+//   return vqrdmulhq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrdmulhq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQRDMULHQ_V2_I]]
+// int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
+//   return vqrdmulhq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vmulx_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VMULX2_I]]
+// float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
+//   return vmulx_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vmulxq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VMULX2_I]]
+// float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
+//   return vmulxq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vmulxq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VMULX2_I]]
+// float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
+//   return vmulxq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vshl_n_s8(
+// NYI:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   ret <8 x i8> [[VSHL_N]]
+// int8x8_t test_vshl_n_s8(int8x8_t a) {
+//   return vshl_n_s8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshl_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <4 x i16> [[VSHL_N]]
+// int16x4_t test_vshl_n_s16(int16x4_t a) {
+//   return vshl_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vshl_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
+// NYI:   ret <2 x i32> [[VSHL_N]]
+// int32x2_t test_vshl_n_s32(int32x2_t a) {
+//   return vshl_n_s32(a, 3);
+// }
+
+// NYI-LABEL: @test_vshlq_n_s8(
+// NYI:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   ret <16 x i8> [[VSHL_N]]
+// int8x16_t test_vshlq_n_s8(int8x16_t a) {
+//   return vshlq_n_s8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshlq_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <8 x i16> [[VSHL_N]]
+// int16x8_t test_vshlq_n_s16(int16x8_t a) {
+//   return vshlq_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vshlq_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
+// NYI:   ret <4 x i32> [[VSHL_N]]
+// int32x4_t test_vshlq_n_s32(int32x4_t a) {
+//   return vshlq_n_s32(a, 3);
+// }
+
+// NYI-LABEL: @test_vshlq_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
+// NYI:   ret <2 x i64> [[VSHL_N]]
+// int64x2_t test_vshlq_n_s64(int64x2_t a) {
+//   return vshlq_n_s64(a, 3);
+// }
+
+// NYI-LABEL: @test_vshl_n_u8(
+// NYI:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   ret <8 x i8> [[VSHL_N]]
+// uint8x8_t test_vshl_n_u8(uint8x8_t a) {
+//   return vshl_n_u8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshl_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <4 x i16> [[VSHL_N]]
+// uint16x4_t test_vshl_n_u16(uint16x4_t a) {
+//   return vshl_n_u16(a, 3);
+// }
+
+// NYI-LABEL: @test_vshl_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
+// NYI:   ret <2 x i32> [[VSHL_N]]
+// uint32x2_t test_vshl_n_u32(uint32x2_t a) {
+//   return vshl_n_u32(a, 3);
+// }
+
+// NYI-LABEL: @test_vshlq_n_u8(
+// NYI:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   ret <16 x i8> [[VSHL_N]]
+// uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
+//   return vshlq_n_u8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshlq_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <8 x i16> [[VSHL_N]]
+// uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
+//   return vshlq_n_u16(a, 3);
+// }
+
+// NYI-LABEL: @test_vshlq_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
+// NYI:   ret <4 x i32> [[VSHL_N]]
+// uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
+//   return vshlq_n_u32(a, 3);
+// }
+
+// NYI-LABEL: @test_vshlq_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
+// NYI:   ret <2 x i64> [[VSHL_N]]
+// uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
+//   return vshlq_n_u64(a, 3);
+// }
+
+// NYI-LABEL: @test_vshr_n_s8(
+// NYI:   [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   ret <8 x i8> [[VSHR_N]]
+// int8x8_t test_vshr_n_s8(int8x8_t a) {
+//   return vshr_n_s8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshr_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <4 x i16> [[VSHR_N]]
+// int16x4_t test_vshr_n_s16(int16x4_t a) {
+//   return vshr_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vshr_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3>
+// NYI:   ret <2 x i32> [[VSHR_N]]
+// int32x2_t test_vshr_n_s32(int32x2_t a) {
+//   return vshr_n_s32(a, 3);
+// }
+
+// NYI-LABEL: @test_vshrq_n_s8(
+// NYI:   [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   ret <16 x i8> [[VSHR_N]]
+// int8x16_t test_vshrq_n_s8(int8x16_t a) {
+//   return vshrq_n_s8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshrq_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <8 x i16> [[VSHR_N]]
+// int16x8_t test_vshrq_n_s16(int16x8_t a) {
+//   return vshrq_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vshrq_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
+// NYI:   ret <4 x i32> [[VSHR_N]]
+// int32x4_t test_vshrq_n_s32(int32x4_t a) {
+//   return vshrq_n_s32(a, 3);
+// }
+
+// NYI-LABEL: @test_vshrq_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3>
+// NYI:   ret <2 x i64> [[VSHR_N]]
+// int64x2_t test_vshrq_n_s64(int64x2_t a) {
+//   return vshrq_n_s64(a, 3);
+// }
+
+// NYI-LABEL: @test_vshr_n_u8(
+// NYI:   [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   ret <8 x i8> [[VSHR_N]]
+// uint8x8_t test_vshr_n_u8(uint8x8_t a) {
+//   return vshr_n_u8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshr_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <4 x i16> [[VSHR_N]]
+// uint16x4_t test_vshr_n_u16(uint16x4_t a) {
+//   return vshr_n_u16(a, 3);
+// }
+
+// NYI-LABEL: @test_vshr_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3>
+// NYI:   ret <2 x i32> [[VSHR_N]]
+// uint32x2_t test_vshr_n_u32(uint32x2_t a) {
+//   return vshr_n_u32(a, 3);
+// }
+
+// NYI-LABEL: @test_vshrq_n_u8(
+// NYI:   [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   ret <16 x i8> [[VSHR_N]]
+// uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
+//   return vshrq_n_u8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshrq_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <8 x i16> [[VSHR_N]]
+// uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
+//   return vshrq_n_u16(a, 3);
+// }
+
+// NYI-LABEL: @test_vshrq_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
+// NYI:   ret <4 x i32> [[VSHR_N]]
+// uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
+//   return vshrq_n_u32(a, 3);
+// }
+
+// NYI-LABEL: @test_vshrq_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3>
+// NYI:   ret <2 x i64> [[VSHR_N]]
+// uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
+//   return vshrq_n_u64(a, 3);
+// }
+
+// NYI-LABEL: @test_vsra_n_s8(
+// NYI:   [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
+//   return vsra_n_s8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsra_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
+// NYI:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
+// NYI:   ret <4 x i16> [[TMP4]]
+// int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
+//   return vsra_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsra_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3>
+// NYI:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
+// NYI:   ret <2 x i32> [[TMP4]]
+// int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
+//   return vsra_n_s32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsraq_n_s8(
+// NYI:   [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
+//   return vsraq_n_s8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsraq_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
+// NYI:   ret <8 x i16> [[TMP4]]
+// int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
+//   return vsraq_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsraq_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
+// NYI:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
+// NYI:   ret <4 x i32> [[TMP4]]
+// int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
+//   return vsraq_n_s32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsraq_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3>
+// NYI:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
+// NYI:   ret <2 x i64> [[TMP4]]
+// int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
+//   return vsraq_n_s64(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsra_n_u8(
+// NYI:   [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
+//   return vsra_n_u8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsra_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
+// NYI:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
+// NYI:   ret <4 x i16> [[TMP4]]
+// uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
+//   return vsra_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsra_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3>
+// NYI:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
+// NYI:   ret <2 x i32> [[TMP4]]
+// uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
+//   return vsra_n_u32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsraq_n_u8(
+// NYI:   [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
+//   return vsraq_n_u8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsraq_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
+// NYI:   ret <8 x i16> [[TMP4]]
+// uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
+//   return vsraq_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsraq_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
+// NYI:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
+// NYI:   ret <4 x i32> [[TMP4]]
+// uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
+//   return vsraq_n_u32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsraq_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3>
+// NYI:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
+// NYI:   ret <2 x i64> [[TMP4]]
+// uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
+//   return vsraq_n_u64(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrshr_n_s8(
+// NYI:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// NYI:   ret <8 x i8> [[VRSHR_N]]
+// int8x8_t test_vrshr_n_s8(int8x8_t a) {
+//   return vrshr_n_s8(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshr_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
+// NYI:   ret <4 x i16> [[VRSHR_N1]]
+// int16x4_t test_vrshr_n_s16(int16x4_t a) {
+//   return vrshr_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshr_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
+// NYI:   ret <2 x i32> [[VRSHR_N1]]
+// int32x2_t test_vrshr_n_s32(int32x2_t a) {
+//   return vrshr_n_s32(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshrq_n_s8(
+// NYI:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// NYI:   ret <16 x i8> [[VRSHR_N]]
+// int8x16_t test_vrshrq_n_s8(int8x16_t a) {
+//   return vrshrq_n_s8(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshrq_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
+// NYI:   ret <8 x i16> [[VRSHR_N1]]
+// int16x8_t test_vrshrq_n_s16(int16x8_t a) {
+//   return vrshrq_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshrq_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
+// NYI:   ret <4 x i32> [[VRSHR_N1]]
+// int32x4_t test_vrshrq_n_s32(int32x4_t a) {
+//   return vrshrq_n_s32(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshrq_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
+// NYI:   ret <2 x i64> [[VRSHR_N1]]
+// int64x2_t test_vrshrq_n_s64(int64x2_t a) {
+//   return vrshrq_n_s64(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshr_n_u8(
+// NYI:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// NYI:   ret <8 x i8> [[VRSHR_N]]
+// uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
+//   return vrshr_n_u8(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshr_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
+// NYI:   ret <4 x i16> [[VRSHR_N1]]
+// uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
+//   return vrshr_n_u16(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshr_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
+// NYI:   ret <2 x i32> [[VRSHR_N1]]
+// uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
+//   return vrshr_n_u32(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshrq_n_u8(
+// NYI:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// NYI:   ret <16 x i8> [[VRSHR_N]]
+// uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
+//   return vrshrq_n_u8(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshrq_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
+// NYI:   ret <8 x i16> [[VRSHR_N1]]
+// uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
+//   return vrshrq_n_u16(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshrq_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
+// NYI:   ret <4 x i32> [[VRSHR_N1]]
+// uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
+//   return vrshrq_n_u32(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshrq_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
+// NYI:   ret <2 x i64> [[VRSHR_N1]]
+// uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
+//   return vrshrq_n_u64(a, 3);
+// }
+
+// NYI-LABEL: @test_vrsra_n_s8(
+// NYI:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// NYI:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
+//   return vrsra_n_s8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsra_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
+// NYI:   ret <4 x i16> [[TMP3]]
+// int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
+//   return vrsra_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsra_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
+// NYI:   ret <2 x i32> [[TMP3]]
+// int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
+//   return vrsra_n_s32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsraq_n_s8(
+// NYI:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// NYI:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
+//   return vrsraq_n_s8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsraq_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
+// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
+// NYI:   ret <8 x i16> [[TMP3]]
+// int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
+//   return vrsraq_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsraq_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
+// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
+// NYI:   ret <4 x i32> [[TMP3]]
+// int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
+//   return vrsraq_n_s32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsraq_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
+// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
+// NYI:   ret <2 x i64> [[TMP3]]
+// int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
+//   return vrsraq_n_s64(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsra_n_u8(
+// NYI:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// NYI:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
+//   return vrsra_n_u8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsra_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
+// NYI:   ret <4 x i16> [[TMP3]]
+// uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
+//   return vrsra_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsra_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
+// NYI:   ret <2 x i32> [[TMP3]]
+// uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
+//   return vrsra_n_u32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsraq_n_u8(
+// NYI:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
+// NYI:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
+//   return vrsraq_n_u8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsraq_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
+// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
+// NYI:   ret <8 x i16> [[TMP3]]
+// uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
+//   return vrsraq_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsraq_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
+// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
+// NYI:   ret <4 x i32> [[TMP3]]
+// uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
+//   return vrsraq_n_u32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrsraq_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
+// NYI:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
+// NYI:   ret <2 x i64> [[TMP3]]
+// uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
+//   return vrsraq_n_u64(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_s8(
+// NYI:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// NYI:   ret <8 x i8> [[VSRI_N]]
+// int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
+//   return vsri_n_s8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
+// NYI:   ret <4 x i16> [[VSRI_N2]]
+// int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
+//   return vsri_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
+// NYI:   ret <2 x i32> [[VSRI_N2]]
+// int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
+//   return vsri_n_s32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_s8(
+// NYI:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// NYI:   ret <16 x i8> [[VSRI_N]]
+// int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
+//   return vsriq_n_s8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
+// NYI:   ret <8 x i16> [[VSRI_N2]]
+// int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
+//   return vsriq_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
+// NYI:   ret <4 x i32> [[VSRI_N2]]
+// int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
+//   return vsriq_n_s32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
+// NYI:   ret <2 x i64> [[VSRI_N2]]
+// int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
+//   return vsriq_n_s64(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_u8(
+// NYI:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// NYI:   ret <8 x i8> [[VSRI_N]]
+// uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
+//   return vsri_n_u8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
+// NYI:   ret <4 x i16> [[VSRI_N2]]
+// uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
+//   return vsri_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
+// NYI:   ret <2 x i32> [[VSRI_N2]]
+// uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
+//   return vsri_n_u32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_u8(
+// NYI:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// NYI:   ret <16 x i8> [[VSRI_N]]
+// uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
+//   return vsriq_n_u8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
+// NYI:   ret <8 x i16> [[VSRI_N2]]
+// uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
+//   return vsriq_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
+// NYI:   ret <4 x i32> [[VSRI_N2]]
+// uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
+//   return vsriq_n_u32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
+// NYI:   ret <2 x i64> [[VSRI_N2]]
+// uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
+//   return vsriq_n_u64(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_p8(
+// NYI:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// NYI:   ret <8 x i8> [[VSRI_N]]
+// poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
+//   return vsri_n_p8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
+// NYI:   ret <4 x i16> [[VSRI_N2]]
+// poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
+//   return vsri_n_p16(a, b, 15);
+// }
+
+// NYI-LABEL: @test_vsriq_n_p8(
+// NYI:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// NYI:   ret <16 x i8> [[VSRI_N]]
+// poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
+//   return vsriq_n_p8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
+// NYI:   ret <8 x i16> [[VSRI_N2]]
+// poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
+//   return vsriq_n_p16(a, b, 15);
+// }
+
+// NYI-LABEL: @test_vsli_n_s8(
+// NYI:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// NYI:   ret <8 x i8> [[VSLI_N]]
+// int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
+//   return vsli_n_s8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
+// NYI:   ret <4 x i16> [[VSLI_N2]]
+// int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
+//   return vsli_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
+// NYI:   ret <2 x i32> [[VSLI_N2]]
+// int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
+//   return vsli_n_s32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_s8(
+// NYI:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// NYI:   ret <16 x i8> [[VSLI_N]]
+// int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
+//   return vsliq_n_s8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
+// NYI:   ret <8 x i16> [[VSLI_N2]]
+// int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
+//   return vsliq_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
+// NYI:   ret <4 x i32> [[VSLI_N2]]
+// int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
+//   return vsliq_n_s32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
+// NYI:   ret <2 x i64> [[VSLI_N2]]
+// int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
+//   return vsliq_n_s64(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_u8(
+// NYI:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// NYI:   ret <8 x i8> [[VSLI_N]]
+// uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
+//   return vsli_n_u8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
+// NYI:   ret <4 x i16> [[VSLI_N2]]
+// uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
+//   return vsli_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
+// NYI:   ret <2 x i32> [[VSLI_N2]]
+// uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
+//   return vsli_n_u32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_u8(
+// NYI:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// NYI:   ret <16 x i8> [[VSLI_N]]
+// uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
+//   return vsliq_n_u8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
+// NYI:   ret <8 x i16> [[VSLI_N2]]
+// uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
+//   return vsliq_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
+// NYI:   ret <4 x i32> [[VSLI_N2]]
+// uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
+//   return vsliq_n_u32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
+// NYI:   ret <2 x i64> [[VSLI_N2]]
+// uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
+//   return vsliq_n_u64(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_p8(
+// NYI:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// NYI:   ret <8 x i8> [[VSLI_N]]
+// poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
+//   return vsli_n_p8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
+// NYI:   ret <4 x i16> [[VSLI_N2]]
+// poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
+//   return vsli_n_p16(a, b, 15);
+// }
+
+// NYI-LABEL: @test_vsliq_n_p8(
+// NYI:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// NYI:   ret <16 x i8> [[VSLI_N]]
+// poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
+//   return vsliq_n_p8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
+// NYI:   ret <8 x i16> [[VSLI_N2]]
+// poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
+//   return vsliq_n_p16(a, b, 15);
+// }
+
+// NYI-LABEL: @test_vqshlu_n_s8(
+// NYI:   [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+// NYI:   ret <8 x i8> [[VQSHLU_N]]
+// uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
+//   return vqshlu_n_s8(a, 3);
+// }
+
+// NYI-LABEL: @test_vqshlu_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
+// NYI:   ret <4 x i16> [[VQSHLU_N1]]
+// uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
+//   return vqshlu_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vqshlu_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
+// NYI:   ret <2 x i32> [[VQSHLU_N1]]
+// uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
+//   return vqshlu_n_s32(a, 3);
+// }
+
+// NYI-LABEL: @test_vqshluq_n_s8(
+// NYI:   [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+// NYI:   ret <16 x i8> [[VQSHLU_N]]
+// uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
+//   return vqshluq_n_s8(a, 3);
+// }
+
+// NYI-LABEL: @test_vqshluq_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+// NYI:   ret <8 x i16> [[VQSHLU_N1]]
+// uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
+//   return vqshluq_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vqshluq_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+// NYI:   ret <4 x i32> [[VQSHLU_N1]]
+// uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
+//   return vqshluq_n_s32(a, 3);
+// }
+
+// NYI-LABEL: @test_vqshluq_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
+// NYI:   ret <2 x i64> [[VQSHLU_N1]]
+// uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
+//   return vqshluq_n_s64(a, 3);
+// }
+
+// NYI-LABEL: @test_vshrn_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VSHRN_N]]
+// int8x8_t test_vshrn_n_s16(int16x8_t a) {
+//   return vshrn_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vshrn_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
+// NYI:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VSHRN_N]]
+// int16x4_t test_vshrn_n_s32(int32x4_t a) {
+//   return vshrn_n_s32(a, 9);
+// }
+
+// NYI-LABEL: @test_vshrn_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
+// NYI:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VSHRN_N]]
+// int32x2_t test_vshrn_n_s64(int64x2_t a) {
+//   return vshrn_n_s64(a, 19);
+// }
+
+// NYI-LABEL: @test_vshrn_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VSHRN_N]]
+// uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
+//   return vshrn_n_u16(a, 3);
+// }
+
+// NYI-LABEL: @test_vshrn_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
+// NYI:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VSHRN_N]]
+// uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
+//   return vshrn_n_u32(a, 9);
+// }
+
+// NYI-LABEL: @test_vshrn_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
+// NYI:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VSHRN_N]]
+// uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
+//   return vshrn_n_u64(a, 19);
+// }
+
+// NYI-LABEL: @test_vshrn_high_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+//   return vshrn_high_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vshrn_high_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
+// NYI:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+//   return vshrn_high_n_s32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vshrn_high_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
+// NYI:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+//   return vshrn_high_n_s64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vshrn_high_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+//   return vshrn_high_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vshrn_high_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
+// NYI:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+//   return vshrn_high_n_u32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vshrn_high_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
+// NYI:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+//   return vshrn_high_n_u64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vqshrun_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
+// NYI:   ret <8 x i8> [[VQSHRUN_N1]]
+// uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
+//   return vqshrun_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vqshrun_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
+// NYI:   ret <4 x i16> [[VQSHRUN_N1]]
+// uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
+//   return vqshrun_n_s32(a, 9);
+// }
+
+// NYI-LABEL: @test_vqshrun_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
+// NYI:   ret <2 x i32> [[VQSHRUN_N1]]
+// uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
+//   return vqshrun_n_s64(a, 19);
+// }
+
+// NYI-LABEL: @test_vqshrun_high_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
+//   return vqshrun_high_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vqshrun_high_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
+//   return vqshrun_high_n_s32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vqshrun_high_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
+//   return vqshrun_high_n_s64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vrshrn_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// NYI:   ret <8 x i8> [[VRSHRN_N1]]
+// int8x8_t test_vrshrn_n_s16(int16x8_t a) {
+//   return vrshrn_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshrn_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// NYI:   ret <4 x i16> [[VRSHRN_N1]]
+// int16x4_t test_vrshrn_n_s32(int32x4_t a) {
+//   return vrshrn_n_s32(a, 9);
+// }
+
+// NYI-LABEL: @test_vrshrn_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// NYI:   ret <2 x i32> [[VRSHRN_N1]]
+// int32x2_t test_vrshrn_n_s64(int64x2_t a) {
+//   return vrshrn_n_s64(a, 19);
+// }
+
+// NYI-LABEL: @test_vrshrn_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// NYI:   ret <8 x i8> [[VRSHRN_N1]]
+// uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
+//   return vrshrn_n_u16(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshrn_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// NYI:   ret <4 x i16> [[VRSHRN_N1]]
+// uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
+//   return vrshrn_n_u32(a, 9);
+// }
+
+// NYI-LABEL: @test_vrshrn_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// NYI:   ret <2 x i32> [[VRSHRN_N1]]
+// uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
+//   return vrshrn_n_u64(a, 19);
+// }
+
+// NYI-LABEL: @test_vrshrn_high_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+//   return vrshrn_high_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrshrn_high_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+//   return vrshrn_high_n_s32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vrshrn_high_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+//   return vrshrn_high_n_s64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vrshrn_high_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+//   return vrshrn_high_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrshrn_high_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+//   return vrshrn_high_n_u32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vrshrn_high_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+//   return vrshrn_high_n_u64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vqrshrun_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
+// NYI:   ret <8 x i8> [[VQRSHRUN_N1]]
+// uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
+//   return vqrshrun_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vqrshrun_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
+// NYI:   ret <4 x i16> [[VQRSHRUN_N1]]
+// uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
+//   return vqrshrun_n_s32(a, 9);
+// }
+
+// NYI-LABEL: @test_vqrshrun_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
+// NYI:   ret <2 x i32> [[VQRSHRUN_N1]]
+// uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
+//   return vqrshrun_n_s64(a, 19);
+// }
+
+// NYI-LABEL: @test_vqrshrun_high_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
+//   return vqrshrun_high_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vqrshrun_high_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
+//   return vqrshrun_high_n_s32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vqrshrun_high_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
+//   return vqrshrun_high_n_s64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vqshrn_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// NYI:   ret <8 x i8> [[VQSHRN_N1]]
+// int8x8_t test_vqshrn_n_s16(int16x8_t a) {
+//   return vqshrn_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vqshrn_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// NYI:   ret <4 x i16> [[VQSHRN_N1]]
+// int16x4_t test_vqshrn_n_s32(int32x4_t a) {
+//   return vqshrn_n_s32(a, 9);
+// }
+
+// NYI-LABEL: @test_vqshrn_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// NYI:   ret <2 x i32> [[VQSHRN_N1]]
+// int32x2_t test_vqshrn_n_s64(int64x2_t a) {
+//   return vqshrn_n_s64(a, 19);
+// }
+
+// NYI-LABEL: @test_vqshrn_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// NYI:   ret <8 x i8> [[VQSHRN_N1]]
+// uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
+//   return vqshrn_n_u16(a, 3);
+// }
+
+// NYI-LABEL: @test_vqshrn_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// NYI:   ret <4 x i16> [[VQSHRN_N1]]
+// uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
+//   return vqshrn_n_u32(a, 9);
+// }
+
+// NYI-LABEL: @test_vqshrn_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// NYI:   ret <2 x i32> [[VQSHRN_N1]]
+// uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
+//   return vqshrn_n_u64(a, 19);
+// }
+
+// NYI-LABEL: @test_vqshrn_high_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+//   return vqshrn_high_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vqshrn_high_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+//   return vqshrn_high_n_s32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vqshrn_high_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+//   return vqshrn_high_n_s64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vqshrn_high_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+//   return vqshrn_high_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vqshrn_high_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+//   return vqshrn_high_n_u32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vqshrn_high_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+//   return vqshrn_high_n_u64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vqrshrn_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// NYI:   ret <8 x i8> [[VQRSHRN_N1]]
+// int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
+//   return vqrshrn_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vqrshrn_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// NYI:   ret <4 x i16> [[VQRSHRN_N1]]
+// int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
+//   return vqrshrn_n_s32(a, 9);
+// }
+
+// NYI-LABEL: @test_vqrshrn_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// NYI:   ret <2 x i32> [[VQRSHRN_N1]]
+// int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
+//   return vqrshrn_n_s64(a, 19);
+// }
+
+// NYI-LABEL: @test_vqrshrn_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// NYI:   ret <8 x i8> [[VQRSHRN_N1]]
+// uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
+//   return vqrshrn_n_u16(a, 3);
+// }
+
+// NYI-LABEL: @test_vqrshrn_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// NYI:   ret <4 x i16> [[VQRSHRN_N1]]
+// uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
+//   return vqrshrn_n_u32(a, 9);
+// }
+
+// NYI-LABEL: @test_vqrshrn_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// NYI:   ret <2 x i32> [[VQRSHRN_N1]]
+// uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
+//   return vqrshrn_n_u64(a, 19);
+// }
+
+// NYI-LABEL: @test_vqrshrn_high_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+//   return vqrshrn_high_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vqrshrn_high_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+//   return vqrshrn_high_n_s32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vqrshrn_high_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+//   return vqrshrn_high_n_s64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vqrshrn_high_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+//   return vqrshrn_high_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vqrshrn_high_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+//   return vqrshrn_high_n_u32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vqrshrn_high_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+//   return vqrshrn_high_n_u64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vshll_n_s8(
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
+// NYI:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <8 x i16> [[VSHLL_N]]
+// int16x8_t test_vshll_n_s8(int8x8_t a) {
+//   return vshll_n_s8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshll_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// NYI:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
+// NYI:   ret <4 x i32> [[VSHLL_N]]
+// int32x4_t test_vshll_n_s16(int16x4_t a) {
+//   return vshll_n_s16(a, 9);
+// }
+
+// NYI-LABEL: @test_vshll_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// NYI:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
+// NYI:   ret <2 x i64> [[VSHLL_N]]
+// int64x2_t test_vshll_n_s32(int32x2_t a) {
+//   return vshll_n_s32(a, 19);
+// }
+
+// NYI-LABEL: @test_vshll_n_u8(
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
+// NYI:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <8 x i16> [[VSHLL_N]]
+// uint16x8_t test_vshll_n_u8(uint8x8_t a) {
+//   return vshll_n_u8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshll_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// NYI:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
+// NYI:   ret <4 x i32> [[VSHLL_N]]
+// uint32x4_t test_vshll_n_u16(uint16x4_t a) {
+//   return vshll_n_u16(a, 9);
+// }
+
+// NYI-LABEL: @test_vshll_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// NYI:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
+// NYI:   ret <2 x i64> [[VSHLL_N]]
+// uint64x2_t test_vshll_n_u32(uint32x2_t a) {
+//   return vshll_n_u32(a, 19);
+// }
+
+// NYI-LABEL: @test_vshll_high_n_s8(
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
+// NYI:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <8 x i16> [[VSHLL_N]]
+// int16x8_t test_vshll_high_n_s8(int8x16_t a) {
+//   return vshll_high_n_s8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshll_high_n_s16(
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// NYI:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
+// NYI:   ret <4 x i32> [[VSHLL_N]]
+// int32x4_t test_vshll_high_n_s16(int16x8_t a) {
+//   return vshll_high_n_s16(a, 9);
+// }
+
+// NYI-LABEL: @test_vshll_high_n_s32(
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// NYI:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
+// NYI:   ret <2 x i64> [[VSHLL_N]]
+// int64x2_t test_vshll_high_n_s32(int32x4_t a) {
+//   return vshll_high_n_s32(a, 19);
+// }
+
+// NYI-LABEL: @test_vshll_high_n_u8(
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
+// NYI:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <8 x i16> [[VSHLL_N]]
+// uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
+//   return vshll_high_n_u8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshll_high_n_u16(
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// NYI:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
+// NYI:   ret <4 x i32> [[VSHLL_N]]
+// uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
+//   return vshll_high_n_u16(a, 9);
+// }
+
+// NYI-LABEL: @test_vshll_high_n_u32(
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// NYI:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
+// NYI:   ret <2 x i64> [[VSHLL_N]]
+// uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
+//   return vshll_high_n_u32(a, 19);
+// }
+
+// NYI-LABEL: @test_vmovl_s8(
+// NYI:   [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[VMOVL_I]]
+// int16x8_t test_vmovl_s8(int8x8_t a) {
+//   return vmovl_s8(a);
+// }
+
+// NYI-LABEL: @test_vmovl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[VMOVL_I]]
+// int32x4_t test_vmovl_s16(int16x4_t a) {
+//   return vmovl_s16(a);
+// }
+
+// NYI-LABEL: @test_vmovl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[VMOVL_I]]
+// int64x2_t test_vmovl_s32(int32x2_t a) {
+//   return vmovl_s32(a);
+// }
+
+// NYI-LABEL: @test_vmovl_u8(
+// NYI:   [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[VMOVL_I]]
+// uint16x8_t test_vmovl_u8(uint8x8_t a) {
+//   return vmovl_u8(a);
+// }
+
+// NYI-LABEL: @test_vmovl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[VMOVL_I]]
+// uint32x4_t test_vmovl_u16(uint16x4_t a) {
+//   return vmovl_u16(a);
+// }
+
+// NYI-LABEL: @test_vmovl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[VMOVL_I]]
+// uint64x2_t test_vmovl_u32(uint32x2_t a) {
+//   return vmovl_u32(a);
+// }
+
+// NYI-LABEL: @test_vmovl_high_s8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vmovl_high_s8(int8x16_t a) {
+//   return vmovl_high_s8(a);
+// }
+
+// NYI-LABEL: @test_vmovl_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP1]]
+// int32x4_t test_vmovl_high_s16(int16x8_t a) {
+//   return vmovl_high_s16(a);
+// }
+
+// NYI-LABEL: @test_vmovl_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP1]]
+// int64x2_t test_vmovl_high_s32(int32x4_t a) {
+//   return vmovl_high_s32(a);
+// }
+
+// NYI-LABEL: @test_vmovl_high_u8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
+//   return vmovl_high_u8(a);
+// }
+
+// NYI-LABEL: @test_vmovl_high_u16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP1]]
+// uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
+//   return vmovl_high_u16(a);
+// }
+
+// NYI-LABEL: @test_vmovl_high_u32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP1]]
+// uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
+//   return vmovl_high_u32(a);
+// }
+
+// NYI-LABEL: @test_vcvt_n_f32_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
+// NYI:   ret <2 x float> [[VCVT_N1]]
+// float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
+//   return vcvt_n_f32_s32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_f32_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
+// NYI:   ret <4 x float> [[VCVT_N1]]
+// float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
+//   return vcvtq_n_f32_s32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_f64_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
+// NYI:   ret <2 x double> [[VCVT_N1]]
+// float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
+//   return vcvtq_n_f64_s64(a, 50);
+// }
+
+// NYI-LABEL: @test_vcvt_n_f32_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
+// NYI:   ret <2 x float> [[VCVT_N1]]
+// float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
+//   return vcvt_n_f32_u32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_f32_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
+// NYI:   ret <4 x float> [[VCVT_N1]]
+// float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
+//   return vcvtq_n_f32_u32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_f64_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
+// NYI:   ret <2 x double> [[VCVT_N1]]
+// float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
+//   return vcvtq_n_f64_u64(a, 50);
+// }
+
+// NYI-LABEL: @test_vcvt_n_s32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
+// NYI:   ret <2 x i32> [[VCVT_N1]]
+// int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
+//   return vcvt_n_s32_f32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_s32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// NYI:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
+// NYI:   ret <4 x i32> [[VCVT_N1]]
+// int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
+//   return vcvtq_n_s32_f32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
+// NYI:   ret <2 x i64> [[VCVT_N1]]
+// int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
+//   return vcvtq_n_s64_f64(a, 50);
+// }
+
+// NYI-LABEL: @test_vcvt_n_u32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
+// NYI:   ret <2 x i32> [[VCVT_N1]]
+// uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
+//   return vcvt_n_u32_f32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_u32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// NYI:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
+// NYI:   ret <4 x i32> [[VCVT_N1]]
+// uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
+//   return vcvtq_n_u32_f32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
+// NYI:   ret <2 x i64> [[VCVT_N1]]
+// uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
+//   return vcvtq_n_u64_f64(a, 50);
+// }
+
+// NYI-LABEL: @test_vaddl_s8(
+// NYI:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
+// NYI:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
+//   return vaddl_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
+//   return vaddl_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
+//   return vaddl_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_u8(
+// NYI:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
+// NYI:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
+//   return vaddl_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
+//   return vaddl_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
+//   return vaddl_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_high_s8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
+//   return vaddl_high_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_high_s16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
+//   return vaddl_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_high_s32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
+//   return vaddl_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_high_u8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
+//   return vaddl_high_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_high_u16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
+//   return vaddl_high_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_high_u32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
+//   return vaddl_high_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_s8(
+// NYI:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
+//   return vaddw_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
+//   return vaddw_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
+//   return vaddw_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_u8(
+// NYI:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
+//   return vaddw_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
+//   return vaddw_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
+//   return vaddw_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_high_s8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
+//   return vaddw_high_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_high_s16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
+//   return vaddw_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_high_s32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
+//   return vaddw_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_high_u8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
+//   return vaddw_high_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_high_u16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
+//   return vaddw_high_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_high_u32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
+//   return vaddw_high_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_s8(
+// NYI:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
+// NYI:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
+//   return vsubl_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
+//   return vsubl_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
+//   return vsubl_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_u8(
+// NYI:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
+// NYI:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
+//   return vsubl_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
+//   return vsubl_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
+//   return vsubl_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_high_s8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
+//   return vsubl_high_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_high_s16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
+//   return vsubl_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_high_s32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
+//   return vsubl_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_high_u8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
+//   return vsubl_high_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_high_u16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
+//   return vsubl_high_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_high_u32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
+//   return vsubl_high_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_s8(
+// NYI:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
+//   return vsubw_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
+//   return vsubw_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
+//   return vsubw_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_u8(
+// NYI:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
+//   return vsubw_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
+//   return vsubw_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
+//   return vsubw_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_high_s8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
+//   return vsubw_high_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_high_s16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
+//   return vsubw_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_high_s32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
+//   return vsubw_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_high_u8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
+//   return vsubw_high_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_high_u16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
+//   return vsubw_high_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_high_u32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
+//   return vsubw_high_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
+// NYI:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VADDHN2_I]]
+// int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
+//   return vaddhn_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
+// NYI:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VADDHN2_I]]
+// int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
+//   return vaddhn_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
+// NYI:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
+// NYI:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VADDHN2_I]]
+// int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
+//   return vaddhn_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
+// NYI:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VADDHN2_I]]
+// uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
+//   return vaddhn_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
+// NYI:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VADDHN2_I]]
+// uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
+//   return vaddhn_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
+// NYI:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
+// NYI:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VADDHN2_I]]
+// uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
+//   return vaddhn_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_high_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
+// NYI:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+//   return vaddhn_high_s16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_high_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
+// NYI:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+//   return vaddhn_high_s32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_high_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
+// NYI:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
+// NYI:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+//   return vaddhn_high_s64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_high_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
+// NYI:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+//   return vaddhn_high_u16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_high_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
+// NYI:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+//   return vaddhn_high_u32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_high_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
+// NYI:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
+// NYI:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+//   return vaddhn_high_u64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i8> [[VRADDHN_V2_I]]
+// int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
+//   return vraddhn_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VRADDHN_V2_I]]
+// int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
+//   return vraddhn_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VRADDHN_V2_I]]
+// int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
+//   return vraddhn_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i8> [[VRADDHN_V2_I]]
+// uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
+//   return vraddhn_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VRADDHN_V2_I]]
+// uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
+//   return vraddhn_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VRADDHN_V2_I]]
+// uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
+//   return vraddhn_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_high_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+//   return vraddhn_high_s16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_high_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+//   return vraddhn_high_s32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_high_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+//   return vraddhn_high_s64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_high_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+//   return vraddhn_high_u16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_high_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+//   return vraddhn_high_u32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_high_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+//   return vraddhn_high_u64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
+// NYI:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VSUBHN2_I]]
+// int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
+//   return vsubhn_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
+// NYI:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VSUBHN2_I]]
+// int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
+//   return vsubhn_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
+// NYI:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
+// NYI:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VSUBHN2_I]]
+// int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
+//   return vsubhn_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
+// NYI:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VSUBHN2_I]]
+// uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
+//   return vsubhn_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
+// NYI:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VSUBHN2_I]]
+// uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
+//   return vsubhn_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
+// NYI:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
+// NYI:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VSUBHN2_I]]
+// uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
+//   return vsubhn_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_high_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
+// NYI:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+//   return vsubhn_high_s16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_high_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
+// NYI:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+//   return vsubhn_high_s32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_high_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
+// NYI:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
+// NYI:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+//   return vsubhn_high_s64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_high_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
+// NYI:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+//   return vsubhn_high_u16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_high_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
+// NYI:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+//   return vsubhn_high_u32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_high_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
+// NYI:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
+// NYI:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+//   return vsubhn_high_u64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i8> [[VRSUBHN_V2_I]]
+// int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
+//   return vrsubhn_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VRSUBHN_V2_I]]
+// int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
+//   return vrsubhn_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VRSUBHN_V2_I]]
+// int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
+//   return vrsubhn_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i8> [[VRSUBHN_V2_I]]
+// uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
+//   return vrsubhn_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VRSUBHN_V2_I]]
+// uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
+//   return vrsubhn_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VRSUBHN_V2_I]]
+// uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
+//   return vrsubhn_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_high_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+//   return vrsubhn_high_s16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_high_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+//   return vrsubhn_high_s32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_high_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+//   return vrsubhn_high_s64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_high_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+//   return vrsubhn_high_u16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_high_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+//   return vrsubhn_high_u32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_high_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+//   return vrsubhn_high_u64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_s8(
+// NYI:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VMOVL_I_I]]
+// int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
+//   return vabdl_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[VMOVL_I_I]]
+// int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
+//   return vabdl_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[VMOVL_I_I]]
+// int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
+//   return vabdl_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_u8(
+// NYI:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VMOVL_I_I]]
+// uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
+//   return vabdl_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[VMOVL_I_I]]
+// uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
+//   return vabdl_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[VMOVL_I_I]]
+// uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
+//   return vabdl_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vabal_s8(
+// NYI:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+//   return vabal_s8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+//   return vabal_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+//   return vabal_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_u8(
+// NYI:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+//   return vabal_u8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+//   return vabal_u16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+//   return vabal_u32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabdl_high_s8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VMOVL_I_I_I]]
+// int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
+//   return vabdl_high_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[VMOVL_I_I_I]]
+// int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
+//   return vabdl_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[VMOVL_I_I_I]]
+// int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
+//   return vabdl_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_high_u8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VMOVL_I_I_I]]
+// uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
+//   return vabdl_high_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_high_u16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[VMOVL_I_I_I]]
+// uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
+//   return vabdl_high_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_high_u32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[VMOVL_I_I_I]]
+// uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
+//   return vabdl_high_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vabal_high_s8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
+// NYI:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I_I]]
+// int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+//   return vabal_high_s8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
+// NYI:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I_I]]
+// int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+//   return vabal_high_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
+// NYI:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I_I]]
+// int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+//   return vabal_high_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_high_u8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
+// NYI:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I_I]]
+// uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+//   return vabal_high_u8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_high_u16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
+// NYI:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I_I]]
+// uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+//   return vabal_high_u16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_high_u32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
+// NYI:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I_I]]
+// uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+//   return vabal_high_u32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmull_s8(
+// NYI:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i16> [[VMULL_I]]
+// int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
+//   return vmull_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i32> [[VMULL2_I]]
+// int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
+//   return vmull_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i64> [[VMULL2_I]]
+// int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
+//   return vmull_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_u8(
+// NYI:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i16> [[VMULL_I]]
+// uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
+//   return vmull_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i32> [[VMULL2_I]]
+// uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
+//   return vmull_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i64> [[VMULL2_I]]
+// uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
+//   return vmull_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_high_s8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   ret <8 x i16> [[VMULL_I_I]]
+// int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
+//   return vmull_high_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   ret <4 x i32> [[VMULL2_I_I]]
+// int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
+//   return vmull_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   ret <2 x i64> [[VMULL2_I_I]]
+// int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
+//   return vmull_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_high_u8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   ret <8 x i16> [[VMULL_I_I]]
+// uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
+//   return vmull_high_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_high_u16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   ret <4 x i32> [[VMULL2_I_I]]
+// uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
+//   return vmull_high_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_high_u32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   ret <2 x i64> [[VMULL2_I_I]]
+// uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
+//   return vmull_high_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vmlal_s8(
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+//   return vmlal_s8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+//   return vmlal_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+//   return vmlal_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_u8(
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+//   return vmlal_u8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+//   return vmlal_u16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+//   return vmlal_u32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_high_s8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I_I]]
+// int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+//   return vmlal_high_s8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I_I]]
+// int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+//   return vmlal_high_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I_I]]
+// int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+//   return vmlal_high_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_high_u8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I_I]]
+// uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+//   return vmlal_high_u8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_high_u16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I_I]]
+// uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+//   return vmlal_high_u16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_high_u32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I_I]]
+// uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+//   return vmlal_high_u32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_s8(
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+//   return vmlsl_s8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+//   return vmlsl_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+//   return vmlsl_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_u8(
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+//   return vmlsl_u8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+//   return vmlsl_u16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+//   return vmlsl_u32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_high_s8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
+// NYI:   ret <8 x i16> [[SUB_I_I]]
+// int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+//   return vmlsl_high_s8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <4 x i32> [[SUB_I_I]]
+// int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+//   return vmlsl_high_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <2 x i64> [[SUB_I_I]]
+// int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+//   return vmlsl_high_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_high_u8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
+// NYI:   ret <8 x i16> [[SUB_I_I]]
+// uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+//   return vmlsl_high_u8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_high_u16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <4 x i32> [[SUB_I_I]]
+// uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+//   return vmlsl_high_u16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_high_u32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <2 x i64> [[SUB_I_I]]
+// uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+//   return vmlsl_high_u32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmull_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQDMULL_V2_I]]
+// int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
+//   return vqdmull_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmull_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQDMULL_V2_I]]
+// int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
+//   return vqdmull_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmlal_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
+// NYI:   ret <4 x i32> [[VQDMLAL_V3_I]]
+// int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+//   return vqdmlal_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlal_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
+// NYI:   ret <2 x i64> [[VQDMLAL_V3_I]]
+// int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+//   return vqdmlal_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlsl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
+// NYI:   ret <4 x i32> [[VQDMLSL_V3_I]]
+// int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+//   return vqdmlsl_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlsl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
+// NYI:   ret <2 x i64> [[VQDMLSL_V3_I]]
+// int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+//   return vqdmlsl_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmull_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQDMULL_V2_I_I]]
+// int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
+//   return vqdmull_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmull_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQDMULL_V2_I_I]]
+// int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
+//   return vqdmull_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmlal_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
+// NYI:   ret <4 x i32> [[VQDMLAL_V3_I_I]]
+// int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+//   return vqdmlal_high_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlal_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
+// NYI:   ret <2 x i64> [[VQDMLAL_V3_I_I]]
+// int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+//   return vqdmlal_high_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlsl_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
+// NYI:   ret <4 x i32> [[VQDMLSL_V3_I_I]]
+// int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+//   return vqdmlsl_high_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlsl_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
+// NYI:   ret <2 x i64> [[VQDMLSL_V3_I_I]]
+// int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+//   return vqdmlsl_high_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmull_p8(
+// NYI:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i16> [[VMULL_I]]
+// poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
+//   return vmull_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_high_p8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   ret <8 x i16> [[VMULL_I_I]]
+// poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
+//   return vmull_high_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddd_s64(
+// NYI:   [[VADDD_I:%.*]] = add i64 %a, %b
+// NYI:   ret i64 [[VADDD_I]]
+// int64_t test_vaddd_s64(int64_t a, int64_t b) {
+//   return vaddd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vaddd_u64(
+// NYI:   [[VADDD_I:%.*]] = add i64 %a, %b
+// NYI:   ret i64 [[VADDD_I]]
+// uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
+//   return vaddd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vsubd_s64(
+// NYI:   [[VSUBD_I:%.*]] = sub i64 %a, %b
+// NYI:   ret i64 [[VSUBD_I]]
+// int64_t test_vsubd_s64(int64_t a, int64_t b) {
+//   return vsubd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vsubd_u64(
+// NYI:   [[VSUBD_I:%.*]] = sub i64 %a, %b
+// NYI:   ret i64 [[VSUBD_I]]
+// uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
+//   return vsubd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// int8_t test_vqaddb_s8(int8_t a, int8_t b) {
+//   return vqaddb_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vqaddh_s16(int16_t a, int16_t b) {
+//   return vqaddh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqadds_s32(
+// NYI:   [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQADDS_S32_I]]
+// int32_t test_vqadds_s32(int32_t a, int32_t b) {
+//   return vqadds_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddd_s64(
+// NYI:   [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQADDD_S64_I]]
+// int64_t test_vqaddd_s64(int64_t a, int64_t b) {
+//   return vqaddd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddb_u8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
+//   return vqaddb_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddh_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
+//   return vqaddh_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqadds_u32(
+// NYI:   [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQADDS_U32_I]]
+// uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
+//   return vqadds_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddd_u64(
+// NYI:   [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQADDD_U64_I]]
+// uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
+//   return vqaddd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// int8_t test_vqsubb_s8(int8_t a, int8_t b) {
+//   return vqsubb_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vqsubh_s16(int16_t a, int16_t b) {
+//   return vqsubh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubs_s32(
+// NYI:   [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQSUBS_S32_I]]
+// int32_t test_vqsubs_s32(int32_t a, int32_t b) {
+//   return vqsubs_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubd_s64(
+// NYI:   [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQSUBD_S64_I]]
+// int64_t test_vqsubd_s64(int64_t a, int64_t b) {
+//   return vqsubd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubb_u8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
+//   return vqsubb_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubh_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
+//   return vqsubh_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubs_u32(
+// NYI:   [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQSUBS_U32_I]]
+// uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
+//   return vqsubs_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubd_u64(
+// NYI:   [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQSUBD_U64_I]]
+// uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
+//   return vqsubd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vshld_s64(
+// NYI:   [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VSHLD_S64_I]]
+// int64_t test_vshld_s64(int64_t a, int64_t b) {
+//   return vshld_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vshld_u64(
+// NYI:   [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VSHLD_U64_I]]
+// uint64_t test_vshld_u64(uint64_t a, int64_t b) {
+//   return vshld_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// int8_t test_vqshlb_s8(int8_t a, int8_t b) {
+//   return vqshlb_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vqshlh_s16(int16_t a, int16_t b) {
+//   return vqshlh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqshls_s32(
+// NYI:   [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQSHLS_S32_I]]
+// int32_t test_vqshls_s32(int32_t a, int32_t b) {
+//   return vqshls_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqshld_s64(
+// NYI:   [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQSHLD_S64_I]]
+// int64_t test_vqshld_s64(int64_t a, int64_t b) {
+//   return vqshld_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlb_u8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// uint8_t test_vqshlb_u8(uint8_t a, int8_t b) {
+//   return vqshlb_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlh_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// uint16_t test_vqshlh_u16(uint16_t a, int16_t b) {
+//   return vqshlh_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqshls_u32(
+// NYI:   [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQSHLS_U32_I]]
+// uint32_t test_vqshls_u32(uint32_t a, int32_t b) {
+//   return vqshls_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqshld_u64(
+// NYI:   [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQSHLD_U64_I]]
+// uint64_t test_vqshld_u64(uint64_t a, int64_t b) {
+//   return vqshld_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vrshld_s64(
+// NYI:   [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VRSHLD_S64_I]]
+// int64_t test_vrshld_s64(int64_t a, int64_t b) {
+//   return vrshld_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vrshld_u64(
+// NYI:   [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VRSHLD_U64_I]]
+// uint64_t test_vrshld_u64(uint64_t a, int64_t b) {
+//   return vrshld_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
+//   return vqrshlb_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
+//   return vqrshlh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshls_s32(
+// NYI:   [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQRSHLS_S32_I]]
+// int32_t test_vqrshls_s32(int32_t a, int32_t b) {
+//   return vqrshls_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshld_s64(
+// NYI:   [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQRSHLD_S64_I]]
+// int64_t test_vqrshld_s64(int64_t a, int64_t b) {
+//   return vqrshld_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlb_u8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// uint8_t test_vqrshlb_u8(uint8_t a, int8_t b) {
+//   return vqrshlb_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlh_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// uint16_t test_vqrshlh_u16(uint16_t a, int16_t b) {
+//   return vqrshlh_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshls_u32(
+// NYI:   [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQRSHLS_U32_I]]
+// uint32_t test_vqrshls_u32(uint32_t a, int32_t b) {
+//   return vqrshls_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshld_u64(
+// NYI:   [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQRSHLD_U64_I]]
+// uint64_t test_vqrshld_u64(uint64_t a, int64_t b) {
+//   return vqrshld_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddd_s64(
+// NYI:   [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
+// NYI:   ret i64 [[VPADDD_S64_I]]
+// int64_t test_vpaddd_s64(int64x2_t a) {
+//   return vpaddd_s64(a);
+// }
+
+// NYI-LABEL: @test_vpadds_f32(
+// NYI:   [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
+// NYI:   [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
+// NYI:   [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
+// NYI:   ret float [[VPADDD_I]]
+// float32_t test_vpadds_f32(float32x2_t a) {
+//   return vpadds_f32(a);
+// }
+
+// NYI-LABEL: @test_vpaddd_f64(
+// NYI:   [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
+// NYI:   [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
+// NYI:   [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
+// NYI:   ret double [[VPADDD_I]]
+// float64_t test_vpaddd_f64(float64x2_t a) {
+//   return vpaddd_f64(a);
+// }
+
+// NYI-LABEL: @test_vpmaxnms_f32(
+// NYI:   [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VPMAXNMS_F32_I]]
+// float32_t test_vpmaxnms_f32(float32x2_t a) {
+//   return vpmaxnms_f32(a);
+// }
+
+// NYI-LABEL: @test_vpmaxnmqd_f64(
+// NYI:   [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VPMAXNMQD_F64_I]]
+// float64_t test_vpmaxnmqd_f64(float64x2_t a) {
+//   return vpmaxnmqd_f64(a);
+// }
+
+// NYI-LABEL: @test_vpmaxs_f32(
+// NYI:   [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VPMAXS_F32_I]]
+// float32_t test_vpmaxs_f32(float32x2_t a) {
+//   return vpmaxs_f32(a);
+// }
+
+// NYI-LABEL: @test_vpmaxqd_f64(
+// NYI:   [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VPMAXQD_F64_I]]
+// float64_t test_vpmaxqd_f64(float64x2_t a) {
+//   return vpmaxqd_f64(a);
+// }
+
+// NYI-LABEL: @test_vpminnms_f32(
+// NYI:   [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VPMINNMS_F32_I]]
+// float32_t test_vpminnms_f32(float32x2_t a) {
+//   return vpminnms_f32(a);
+// }
+
+// NYI-LABEL: @test_vpminnmqd_f64(
+// NYI:   [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VPMINNMQD_F64_I]]
+// float64_t test_vpminnmqd_f64(float64x2_t a) {
+//   return vpminnmqd_f64(a);
+// }
+
+// NYI-LABEL: @test_vpmins_f32(
+// NYI:   [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VPMINS_F32_I]]
+// float32_t test_vpmins_f32(float32x2_t a) {
+//   return vpmins_f32(a);
+// }
+
+// NYI-LABEL: @test_vpminqd_f64(
+// NYI:   [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VPMINQD_F64_I]]
+// float64_t test_vpminqd_f64(float64x2_t a) {
+//   return vpminqd_f64(a);
+// }
+
+// NYI-LABEL: @test_vqdmulhh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
+//   return vqdmulhh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmulhs_s32(
+// NYI:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQDMULHS_S32_I]]
+// int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
+//   return vqdmulhs_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrdmulhh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
+//   return vqrdmulhh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrdmulhs_s32(
+// NYI:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQRDMULHS_S32_I]]
+// int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
+//   return vqrdmulhs_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vmulxs_f32(
+// NYI:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b)
+// NYI:   ret float [[VMULXS_F32_I]]
+// float32_t test_vmulxs_f32(float32_t a, float32_t b) {
+//   return vmulxs_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vmulxd_f64(
+// NYI:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b)
+// NYI:   ret double [[VMULXD_F64_I]]
+// float64_t test_vmulxd_f64(float64_t a, float64_t b) {
+//   return vmulxd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vmulx_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x double> [[VMULX2_I]]
+// float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
+//   return vmulx_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vrecpss_f32(
+// NYI:   [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b)
+// NYI:   ret float [[VRECPS_I]]
+// float32_t test_vrecpss_f32(float32_t a, float32_t b) {
+//   return vrecpss_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vrecpsd_f64(
+// NYI:   [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b)
+// NYI:   ret double [[VRECPS_I]]
+// float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
+//   return vrecpsd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vrsqrtss_f32(
+// NYI:   [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b)
+// NYI:   ret float [[VRSQRTSS_F32_I]]
+// float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
+//   return vrsqrtss_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vrsqrtsd_f64(
+// NYI:   [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b)
+// NYI:   ret double [[VRSQRTSD_F64_I]]
+// float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
+//   return vrsqrtsd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcvts_f32_s32(
+// NYI:   [[TMP0:%.*]] = sitofp i32 %a to float
+// NYI:   ret float [[TMP0]]
+// float32_t test_vcvts_f32_s32(int32_t a) {
+//   return vcvts_f32_s32(a);
+// }
+
+// NYI-LABEL: @test_vcvtd_f64_s64(
+// NYI:   [[TMP0:%.*]] = sitofp i64 %a to double
+// NYI:   ret double [[TMP0]]
+// float64_t test_vcvtd_f64_s64(int64_t a) {
+//   return vcvtd_f64_s64(a);
+// }
+
+// NYI-LABEL: @test_vcvts_f32_u32(
+// NYI:   [[TMP0:%.*]] = uitofp i32 %a to float
+// NYI:   ret float [[TMP0]]
+// float32_t test_vcvts_f32_u32(uint32_t a) {
+//   return vcvts_f32_u32(a);
+// }
+
+// NYI-LABEL: @test_vcvtd_f64_u64(
+// NYI:   [[TMP0:%.*]] = uitofp i64 %a to double
+// NYI:   ret double [[TMP0]]
+// float64_t test_vcvtd_f64_u64(uint64_t a) {
+//   return vcvtd_f64_u64(a);
+// }
+
+// NYI-LABEL: @test_vrecpes_f32(
+// NYI:   [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a)
+// NYI:   ret float [[VRECPES_F32_I]]
+// float32_t test_vrecpes_f32(float32_t a) {
+//   return vrecpes_f32(a);
+// }
+
+// NYI-LABEL: @test_vrecped_f64(
+// NYI:   [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a)
+// NYI:   ret double [[VRECPED_F64_I]]
+// float64_t test_vrecped_f64(float64_t a) {
+//   return vrecped_f64(a);
+// }
+
+// NYI-LABEL: @test_vrecpxs_f32(
+// NYI:   [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a)
+// NYI:   ret float [[VRECPXS_F32_I]]
+// float32_t test_vrecpxs_f32(float32_t a) {
+//   return vrecpxs_f32(a);
+// }
+
+// NYI-LABEL: @test_vrecpxd_f64(
+// NYI:   [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a)
+// NYI:   ret double [[VRECPXD_F64_I]]
+// float64_t test_vrecpxd_f64(float64_t a) {
+//   return vrecpxd_f64(a);
+// }
+
+// NYI-LABEL: @test_vrsqrte_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a)
+// NYI:   ret <2 x i32> [[VRSQRTE_V1_I]]
+// uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
+//   return vrsqrte_u32(a);
+// }
+
+// NYI-LABEL: @test_vrsqrteq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a)
+// NYI:   ret <4 x i32> [[VRSQRTEQ_V1_I]]
+// uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
+//   return vrsqrteq_u32(a);
+// }
+
+// NYI-LABEL: @test_vrsqrtes_f32(
+// NYI:   [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a)
+// NYI:   ret float [[VRSQRTES_F32_I]]
+// float32_t test_vrsqrtes_f32(float32_t a) {
+//   return vrsqrtes_f32(a);
+// }
+
+// NYI-LABEL: @test_vrsqrted_f64(
+// NYI:   [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a)
+// NYI:   ret double [[VRSQRTED_F64_I]]
+// float64_t test_vrsqrted_f64(float64_t a) {
+//   return vrsqrted_f64(a);
+// }
+
+uint8x16_t test_vld1q_u8(uint8_t const *a) {
+  return vld1q_u8(a);
+  // CIR-LABEL: @test_vld1q_u8
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: cir.load align(1) %[[CAST]] : !cir.ptr<!cir.vector<!u8i x 16>>, !cir.vector<!u8i x 16>
+
+  // LLVM-LABEL: @test_vld1q_u8
+  // LLVM:   [[TMP1:%.*]] = load <16 x i8>, ptr %0, align 1,
+}
+
+uint16x8_t test_vld1q_u16(uint16_t const *a) {
+  return vld1q_u16(a);
+  // CIR-LABEL: @test_vld1q_u16
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u16i x 8>>
+  // CIR: cir.load align(2) %[[CAST]] : !cir.ptr<!cir.vector<!u16i x 8>>, !cir.vector<!u16i x 8>
+
+  // LLVM-LABEL: @test_vld1q_u16
+  // LLVM:   [[TMP1:%.*]] = load <8 x i16>, ptr %0, align 2,
+}
+
+uint32x4_t test_vld1q_u32(uint32_t const *a) {
+  return vld1q_u32(a);
+  // CIR-LABEL: @test_vld1q_u32
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: cir.load align(4) %[[CAST]] : !cir.ptr<!cir.vector<!u32i x 4>>, !cir.vector<!u32i x 4>
+
+  // LLVM-LABEL: @test_vld1q_u32
+  // LLVM:   [[TMP1:%.*]] = load <4 x i32>, ptr %0, align 4,
+}
+
+uint64x2_t test_vld1q_u64(uint64_t const *a) {
+  return vld1q_u64(a);
+  // CIR-LABEL: @test_vld1q_u64
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u64i x 2>>
+  // CIR: cir.load align(8) %[[CAST]] : !cir.ptr<!cir.vector<!u64i x 2>>, !cir.vector<!u64i x 2>
+
+  // LLVM-LABEL: @test_vld1q_u64
+  // LLVM:   [[TMP1:%.*]] = load <2 x i64>, ptr %0, align 8,
+}
+
+int8x16_t test_vld1q_s8(int8_t const *a) {
+  return vld1q_s8(a);
+  // CIR-LABEL: @test_vld1q_s8
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s8i x 16>>
+  // CIR: cir.load align(1) %[[CAST]] : !cir.ptr<!cir.vector<!s8i x 16>>, !cir.vector<!s8i x 16>
+
+  // LLVM-LABEL: @test_vld1q_s8
+  // LLVM:   [[TMP1:%.*]] = load <16 x i8>, ptr %0, align 1,
+}
+
+int16x8_t test_vld1q_s16(int16_t const *a) {
+  return vld1q_s16(a);
+  // CIR-LABEL: @test_vld1q_s16
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: cir.load align(2) %[[CAST]] : !cir.ptr<!cir.vector<!s16i x 8>>, !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: @test_vld1q_s16
+  // LLVM:   [[TMP1:%.*]] = load <8 x i16>, ptr %0, align 2,
+}
+
+int32x4_t test_vld1q_s32(int32_t const *a) {
+  return vld1q_s32(a);
+  // CIR-LABEL: @test_vld1q_s32
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR: cir.load align(4) %[[CAST]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: @test_vld1q_s32
+  // LLVM:   [[TMP1:%.*]] = load <4 x i32>, ptr %0, align 4,
+}
+
+int64x2_t test_vld1q_s64(int64_t const *a) {
+  return vld1q_s64(a);
+  // CIR-LABEL: @test_vld1q_s64
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s64i x 2>>
+  // CIR: cir.load align(8) %[[CAST]] : !cir.ptr<!cir.vector<!s64i x 2>>, !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_vld1q_s64
+  // LLVM:   [[TMP1:%.*]] = load <2 x i64>, ptr %0, align 8,
+}
+
+// NYI-LABEL: @test_vld1q_f16(
+// NYI:   [[TMP2:%.*]] = load <8 x half>, ptr %a, align 2
+// NYI:   ret <8 x half> [[TMP2]]
+// float16x8_t test_vld1q_f16(float16_t const *a) {
+//   return vld1q_f16(a);
+// }
+
+// NYI-LABEL: @test_vld1q_f32(
+// NYI:   [[TMP2:%.*]] = load <4 x float>, ptr %a, align 4
+// NYI:   ret <4 x float> [[TMP2]]
+// float32x4_t test_vld1q_f32(float32_t const *a) {
+//   return vld1q_f32(a);
+// }
+
+// NYI-LABEL: @test_vld1q_f64(
+// NYI:   [[TMP2:%.*]] = load <2 x double>, ptr %a, align 8
+// NYI:   ret <2 x double> [[TMP2]]
+// float64x2_t test_vld1q_f64(float64_t const *a) {
+//   return vld1q_f64(a);
+// }
+
+// NYI-LABEL: @test_vld1q_p8(
+// NYI:   [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1
+// NYI:   ret <16 x i8> [[TMP1]]
+// poly8x16_t test_vld1q_p8(poly8_t const *a) {
+//   return vld1q_p8(a);
+// }
+
+// NYI-LABEL: @test_vld1q_p16(
+// NYI:   [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
+// NYI:   ret <8 x i16> [[TMP2]]
+// poly16x8_t test_vld1q_p16(poly16_t const *a) {
+//   return vld1q_p16(a);
+// }
+
+// NYI-LABEL: @test_vld1_u8(
+// NYI:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
+// NYI:   ret <8 x i8> [[TMP1]]
+// uint8x8_t test_vld1_u8(uint8_t const *a) {
+//   return vld1_u8(a);
+// }
+
+// NYI-LABEL: @test_vld1_u16(
+// NYI:   [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
+// NYI:   ret <4 x i16> [[TMP2]]
+// uint16x4_t test_vld1_u16(uint16_t const *a) {
+//   return vld1_u16(a);
+// }
+
+// NYI-LABEL: @test_vld1_u32(
+// NYI:   [[TMP2:%.*]] = load <2 x i32>, ptr %a, align 4
+// NYI:   ret <2 x i32> [[TMP2]]
+// uint32x2_t test_vld1_u32(uint32_t const *a) {
+//   return vld1_u32(a);
+// }
+
+// NYI-LABEL: @test_vld1_u64(
+// NYI:   [[TMP2:%.*]] = load <1 x i64>, ptr %a, align 8
+// NYI:   ret <1 x i64> [[TMP2]]
+// uint64x1_t test_vld1_u64(uint64_t const *a) {
+//   return vld1_u64(a);
+// }
+
+// NYI-LABEL: @test_vld1_s8(
+// NYI:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
+// NYI:   ret <8 x i8> [[TMP1]]
+// int8x8_t test_vld1_s8(int8_t const *a) {
+//   return vld1_s8(a);
+// }
+
+// NYI-LABEL: @test_vld1_s16(
+// NYI:   [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
+// NYI:   ret <4 x i16> [[TMP2]]
+// int16x4_t test_vld1_s16(int16_t const *a) {
+//   return vld1_s16(a);
+// }
+
+// NYI-LABEL: @test_vld1_s32(
+// NYI:   [[TMP2:%.*]] = load <2 x i32>, ptr %a, align 4
+// NYI:   ret <2 x i32> [[TMP2]]
+// int32x2_t test_vld1_s32(int32_t const *a) {
+//   return vld1_s32(a);
+// }
+
+// NYI-LABEL: @test_vld1_s64(
+// NYI:   [[TMP2:%.*]] = load <1 x i64>, ptr %a, align 8
+// NYI:   ret <1 x i64> [[TMP2]]
+// int64x1_t test_vld1_s64(int64_t const *a) {
+//   return vld1_s64(a);
+// }
+
+// NYI-LABEL: @test_vld1_f16(
+// NYI:   [[TMP2:%.*]] = load <4 x half>, ptr %a, align 2
+// NYI:   ret <4 x half> [[TMP2]]
+// float16x4_t test_vld1_f16(float16_t const *a) {
+//   return vld1_f16(a);
+// }
+
+// NYI-LABEL: @test_vld1_f32(
+// NYI:   [[TMP2:%.*]] = load <2 x float>, ptr %a, align 4
+// NYI:   ret <2 x float> [[TMP2]]
+// float32x2_t test_vld1_f32(float32_t const *a) {
+//   return vld1_f32(a);
+// }
+
+// NYI-LABEL: @test_vld1_f64(
+// NYI:   [[TMP2:%.*]] = load <1 x double>, ptr %a, align 8
+// NYI:   ret <1 x double> [[TMP2]]
+// float64x1_t test_vld1_f64(float64_t const *a) {
+//   return vld1_f64(a);
+// }
+
+// NYI-LABEL: @test_vld1_p8(
+// NYI:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
+// NYI:   ret <8 x i8> [[TMP1]]
+// poly8x8_t test_vld1_p8(poly8_t const *a) {
+//   return vld1_p8(a);
+// }
+
+// NYI-LABEL: @test_vld1_p16(
+// NYI:   [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
+// NYI:   ret <4 x i16> [[TMP2]]
+// poly16x4_t test_vld1_p16(poly16_t const *a) {
+//   return vld1_p16(a);
+// }
+
+// NYI-LABEL: @test_vld1_u8_void(
+// NYI:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
+// NYI:   ret <8 x i8> [[TMP1]]
+// uint8x8_t test_vld1_u8_void(void *a) {
+//   return vld1_u8(a);
+// }
+
+// NYI-LABEL: @test_vld1_u16_void(
+// NYI:   [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
+// NYI:   ret <4 x i16> [[TMP1]]
+// uint16x4_t test_vld1_u16_void(void *a) {
+//   return vld1_u16(a);
+// }
+
+// NYI-LABEL: @test_vld1_u32_void(
+// NYI:   [[TMP1:%.*]] = load <2 x i32>, ptr %a, align 1
+// NYI:   ret <2 x i32> [[TMP1]]
+// uint32x2_t test_vld1_u32_void(void *a) {
+//   return vld1_u32(a);
+// }
+
+// NYI-LABEL: @test_vld1_u64_void(
+// NYI:   [[TMP1:%.*]] = load <1 x i64>, ptr %a, align 1
+// NYI:   ret <1 x i64> [[TMP1]]
+// uint64x1_t test_vld1_u64_void(void *a) {
+//   return vld1_u64(a);
+// }
+
+// NYI-LABEL: @test_vld1_s8_void(
+// NYI:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
+// NYI:   ret <8 x i8> [[TMP1]]
+// int8x8_t test_vld1_s8_void(void *a) {
+//   return vld1_s8(a);
+// }
+
+// NYI-LABEL: @test_vld1_s16_void(
+// NYI:   [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
+// NYI:   ret <4 x i16> [[TMP1]]
+// int16x4_t test_vld1_s16_void(void *a) {
+//   return vld1_s16(a);
+// }
+
+// NYI-LABEL: @test_vld1_s32_void(
+// NYI:   [[TMP1:%.*]] = load <2 x i32>, ptr %a, align 1
+// NYI:   ret <2 x i32> [[TMP1]]
+// int32x2_t test_vld1_s32_void(void *a) {
+//   return vld1_s32(a);
+// }
+
+// NYI-LABEL: @test_vld1_s64_void(
+// NYI:   [[TMP1:%.*]] = load <1 x i64>, ptr %a, align 1
+// NYI:   ret <1 x i64> [[TMP1]]
+// int64x1_t test_vld1_s64_void(void *a) {
+//   return vld1_s64(a);
+// }
+
+// NYI-LABEL: @test_vld1_f16_void(
+// NYI:   [[TMP1:%.*]] = load <4 x half>, ptr %a, align 1
+// NYI:   ret <4 x half> [[TMP1]]
+// float16x4_t test_vld1_f16_void(void *a) {
+//   return vld1_f16(a);
+// }
+
+// NYI-LABEL: @test_vld1_f32_void(
+// NYI:   [[TMP1:%.*]] = load <2 x float>, ptr %a, align 1
+// NYI:   ret <2 x float> [[TMP1]]
+// float32x2_t test_vld1_f32_void(void *a) {
+//   return vld1_f32(a);
+// }
+
+// NYI-LABEL: @test_vld1_f64_void(
+// NYI:   [[TMP1:%.*]] = load <1 x double>, ptr %a, align 1
+// NYI:   ret <1 x double> [[TMP1]]
+// float64x1_t test_vld1_f64_void(void *a) {
+//   return vld1_f64(a);
+// }
+
+// NYI-LABEL: @test_vld1_p8_void(
+// NYI:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
+// NYI:   ret <8 x i8> [[TMP1]]
+// poly8x8_t test_vld1_p8_void(void *a) {
+//   return vld1_p8(a);
+// }
+
+// NYI-LABEL: @test_vld1_p16_void(
+// NYI:   [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
+// NYI:   ret <4 x i16> [[TMP1]]
+// poly16x4_t test_vld1_p16_void(void *a) {
+//   return vld1_p16(a);
+// }
+
+// NYI-LABEL: @test_vld2q_u8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint8x16x2_t [[TMP5]]
+// uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
+//   return vld2q_u8(a);
+// }
+
+// NYI-LABEL: @test_vld2q_u16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint16x8x2_t [[TMP6]]
+// uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
+//   return vld2q_u16(a);
+// }
+
+// NYI-LABEL: @test_vld2q_u32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %a)
+// NYI:   store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint32x4x2_t [[TMP6]]
+// uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
+//   return vld2q_u32(a);
+// }
+
+// NYI-LABEL: @test_vld2q_u64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint64x2x2_t [[TMP6]]
+// uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
+//   return vld2q_u64(a);
+// }
+
+// NYI-LABEL: @test_vld2q_s8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int8x16x2_t [[TMP5]]
+// int8x16x2_t test_vld2q_s8(int8_t const *a) {
+//   return vld2q_s8(a);
+// }
+
+// NYI-LABEL: @test_vld2q_s16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int16x8x2_t [[TMP6]]
+// int16x8x2_t test_vld2q_s16(int16_t const *a) {
+//   return vld2q_s16(a);
+// }
+
+// NYI-LABEL: @test_vld2q_s32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %a)
+// NYI:   store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int32x4x2_t [[TMP6]]
+// int32x4x2_t test_vld2q_s32(int32_t const *a) {
+//   return vld2q_s32(a);
+// }
+
+// NYI-LABEL: @test_vld2q_s64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int64x2x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int64x2x2_t [[TMP6]]
+// int64x2x2_t test_vld2q_s64(int64_t const *a) {
+//   return vld2q_s64(a);
+// }
+
+// NYI-LABEL: @test_vld2q_f16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0(ptr %a)
+// NYI:   store { <8 x half>, <8 x half> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float16x8x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float16x8x2_t [[TMP6]]
+// float16x8x2_t test_vld2q_f16(float16_t const *a) {
+//   return vld2q_f16(a);
+// }
+
+// NYI-LABEL: @test_vld2q_f32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %a)
+// NYI:   store { <4 x float>, <4 x float> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float32x4x2_t [[TMP6]]
+// float32x4x2_t test_vld2q_f32(float32_t const *a) {
+//   return vld2q_f32(a);
+// }
+
+// NYI-LABEL: @test_vld2q_f64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr %a)
+// NYI:   store { <2 x double>, <2 x double> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float64x2x2_t [[TMP6]]
+// float64x2x2_t test_vld2q_f64(float64_t const *a) {
+//   return vld2q_f64(a);
+// }
+
+// NYI-LABEL: @test_vld2q_p8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly8x16x2_t [[TMP5]]
+// poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
+//   return vld2q_p8(a);
+// }
+
+// NYI-LABEL: @test_vld2q_p16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly16x8x2_t [[TMP6]]
+// poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
+//   return vld2q_p16(a);
+// }
+
+// NYI-LABEL: @test_vld2_u8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint8x8x2_t [[TMP5]]
+// uint8x8x2_t test_vld2_u8(uint8_t const *a) {
+//   return vld2_u8(a);
+// }
+
+// NYI-LABEL: @test_vld2_u16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint16x4x2_t [[TMP6]]
+// uint16x4x2_t test_vld2_u16(uint16_t const *a) {
+//   return vld2_u16(a);
+// }
+
+// NYI-LABEL: @test_vld2_u32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %a)
+// NYI:   store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint32x2x2_t [[TMP6]]
+// uint32x2x2_t test_vld2_u32(uint32_t const *a) {
+//   return vld2_u32(a);
+// }
+
+// NYI-LABEL: @test_vld2_u64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint64x1x2_t [[TMP6]]
+// uint64x1x2_t test_vld2_u64(uint64_t const *a) {
+//   return vld2_u64(a);
+// }
+
+// NYI-LABEL: @test_vld2_s8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int8x8x2_t [[TMP5]]
+// int8x8x2_t test_vld2_s8(int8_t const *a) {
+//   return vld2_s8(a);
+// }
+
+// NYI-LABEL: @test_vld2_s16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int16x4x2_t [[TMP6]]
+// int16x4x2_t test_vld2_s16(int16_t const *a) {
+//   return vld2_s16(a);
+// }
+
+// NYI-LABEL: @test_vld2_s32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %a)
+// NYI:   store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int32x2x2_t [[TMP6]]
+// int32x2x2_t test_vld2_s32(int32_t const *a) {
+//   return vld2_s32(a);
+// }
+
+// NYI-LABEL: @test_vld2_s64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int64x1x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int64x1x2_t [[TMP6]]
+// int64x1x2_t test_vld2_s64(int64_t const *a) {
+//   return vld2_s64(a);
+// }
+
+// NYI-LABEL: @test_vld2_f16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0(ptr %a)
+// NYI:   store { <4 x half>, <4 x half> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float16x4x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float16x4x2_t [[TMP6]]
+// float16x4x2_t test_vld2_f16(float16_t const *a) {
+//   return vld2_f16(a);
+// }
+
+// NYI-LABEL: @test_vld2_f32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr %a)
+// NYI:   store { <2 x float>, <2 x float> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float32x2x2_t [[TMP6]]
+// float32x2x2_t test_vld2_f32(float32_t const *a) {
+//   return vld2_f32(a);
+// }
+
+// NYI-LABEL: @test_vld2_f64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr %a)
+// NYI:   store { <1 x double>, <1 x double> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float64x1x2_t [[TMP6]]
+// float64x1x2_t test_vld2_f64(float64_t const *a) {
+//   return vld2_f64(a);
+// }
+
+// NYI-LABEL: @test_vld2_p8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly8x8x2_t [[TMP5]]
+// poly8x8x2_t test_vld2_p8(poly8_t const *a) {
+//   return vld2_p8(a);
+// }
+
+// NYI-LABEL: @test_vld2_p16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly16x4x2_t [[TMP6]]
+// poly16x4x2_t test_vld2_p16(poly16_t const *a) {
+//   return vld2_p16(a);
+// }
+
+// NYI-LABEL: @test_vld3q_u8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.uint8x16x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint8x16x3_t [[TMP5]]
+// uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
+//   return vld3q_u8(a);
+// }
+
+// NYI-LABEL: @test_vld3q_u16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint16x8x3_t [[TMP6]]
+// uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
+//   return vld3q_u16(a);
+// }
+
+// NYI-LABEL: @test_vld3q_u32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %a)
+// NYI:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint32x4x3_t [[TMP6]]
+// uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
+//   return vld3q_u32(a);
+// }
+
+// NYI-LABEL: @test_vld3q_u64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint64x2x3_t [[TMP6]]
+// uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
+//   return vld3q_u64(a);
+// }
+
+// NYI-LABEL: @test_vld3q_s8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.int8x16x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int8x16x3_t [[TMP5]]
+// int8x16x3_t test_vld3q_s8(int8_t const *a) {
+//   return vld3q_s8(a);
+// }
+
+// NYI-LABEL: @test_vld3q_s16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int16x8x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int16x8x3_t [[TMP6]]
+// int16x8x3_t test_vld3q_s16(int16_t const *a) {
+//   return vld3q_s16(a);
+// }
+
+// NYI-LABEL: @test_vld3q_s32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %a)
+// NYI:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int32x4x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int32x4x3_t [[TMP6]]
+// int32x4x3_t test_vld3q_s32(int32_t const *a) {
+//   return vld3q_s32(a);
+// }
+
+// NYI-LABEL: @test_vld3q_s64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int64x2x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int64x2x3_t [[TMP6]]
+// int64x2x3_t test_vld3q_s64(int64_t const *a) {
+//   return vld3q_s64(a);
+// }
+
+// NYI-LABEL: @test_vld3q_f16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0(ptr %a)
+// NYI:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float16x8x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float16x8x3_t [[TMP6]]
+// float16x8x3_t test_vld3q_f16(float16_t const *a) {
+//   return vld3q_f16(a);
+// }
+
+// NYI-LABEL: @test_vld3q_f32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr %a)
+// NYI:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float32x4x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float32x4x3_t [[TMP6]]
+// float32x4x3_t test_vld3q_f32(float32_t const *a) {
+//   return vld3q_f32(a);
+// }
+
+// NYI-LABEL: @test_vld3q_f64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr %a)
+// NYI:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float64x2x3_t [[TMP6]]
+// float64x2x3_t test_vld3q_f64(float64_t const *a) {
+//   return vld3q_f64(a);
+// }
+
+// NYI-LABEL: @test_vld3q_p8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.poly8x16x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly8x16x3_t [[TMP5]]
+// poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
+//   return vld3q_p8(a);
+// }
+
+// NYI-LABEL: @test_vld3q_p16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly16x8x3_t [[TMP6]]
+// poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
+//   return vld3q_p16(a);
+// }
+
+// NYI-LABEL: @test_vld3_u8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.uint8x8x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint8x8x3_t [[TMP5]]
+// uint8x8x3_t test_vld3_u8(uint8_t const *a) {
+//   return vld3_u8(a);
+// }
+
+// NYI-LABEL: @test_vld3_u16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint16x4x3_t [[TMP6]]
+// uint16x4x3_t test_vld3_u16(uint16_t const *a) {
+//   return vld3_u16(a);
+// }
+
+// NYI-LABEL: @test_vld3_u32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %a)
+// NYI:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint32x2x3_t [[TMP6]]
+// uint32x2x3_t test_vld3_u32(uint32_t const *a) {
+//   return vld3_u32(a);
+// }
+
+// NYI-LABEL: @test_vld3_u64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint64x1x3_t [[TMP6]]
+// uint64x1x3_t test_vld3_u64(uint64_t const *a) {
+//   return vld3_u64(a);
+// }
+
+// NYI-LABEL: @test_vld3_s8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.int8x8x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int8x8x3_t [[TMP5]]
+// int8x8x3_t test_vld3_s8(int8_t const *a) {
+//   return vld3_s8(a);
+// }
+
+// NYI-LABEL: @test_vld3_s16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int16x4x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int16x4x3_t [[TMP6]]
+// int16x4x3_t test_vld3_s16(int16_t const *a) {
+//   return vld3_s16(a);
+// }
+
+// NYI-LABEL: @test_vld3_s32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %a)
+// NYI:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int32x2x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int32x2x3_t [[TMP6]]
+// int32x2x3_t test_vld3_s32(int32_t const *a) {
+//   return vld3_s32(a);
+// }
+
+// NYI-LABEL: @test_vld3_s64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int64x1x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int64x1x3_t [[TMP6]]
+// int64x1x3_t test_vld3_s64(int64_t const *a) {
+//   return vld3_s64(a);
+// }
+
+// NYI-LABEL: @test_vld3_f16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0(ptr %a)
+// NYI:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float16x4x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float16x4x3_t [[TMP6]]
+// float16x4x3_t test_vld3_f16(float16_t const *a) {
+//   return vld3_f16(a);
+// }
+
+// NYI-LABEL: @test_vld3_f32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr %a)
+// NYI:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float32x2x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float32x2x3_t [[TMP6]]
+// float32x2x3_t test_vld3_f32(float32_t const *a) {
+//   return vld3_f32(a);
+// }
+
+// NYI-LABEL: @test_vld3_f64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr %a)
+// NYI:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float64x1x3_t [[TMP6]]
+// float64x1x3_t test_vld3_f64(float64_t const *a) {
+//   return vld3_f64(a);
+// }
+
+// NYI-LABEL: @test_vld3_p8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.poly8x8x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly8x8x3_t [[TMP5]]
+// poly8x8x3_t test_vld3_p8(poly8_t const *a) {
+//   return vld3_p8(a);
+// }
+
+// NYI-LABEL: @test_vld3_p16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly16x4x3_t [[TMP6]]
+// poly16x4x3_t test_vld3_p16(poly16_t const *a) {
+//   return vld3_p16(a);
+// }
+
+// NYI-LABEL: @test_vld4q_u8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.uint8x16x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint8x16x4_t [[TMP5]]
+// uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
+//   return vld4q_u8(a);
+// }
+
+// NYI-LABEL: @test_vld4q_u16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint16x8x4_t [[TMP6]]
+// uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
+//   return vld4q_u16(a);
+// }
+
+// NYI-LABEL: @test_vld4q_u32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %a)
+// NYI:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint32x4x4_t [[TMP6]]
+// uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
+//   return vld4q_u32(a);
+// }
+
+// NYI-LABEL: @test_vld4q_u64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint64x2x4_t [[TMP6]]
+// uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
+//   return vld4q_u64(a);
+// }
+
+// NYI-LABEL: @test_vld4q_s8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.int8x16x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int8x16x4_t [[TMP5]]
+// int8x16x4_t test_vld4q_s8(int8_t const *a) {
+//   return vld4q_s8(a);
+// }
+
+// NYI-LABEL: @test_vld4q_s16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int16x8x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int16x8x4_t [[TMP6]]
+// int16x8x4_t test_vld4q_s16(int16_t const *a) {
+//   return vld4q_s16(a);
+// }
+
+// NYI-LABEL: @test_vld4q_s32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %a)
+// NYI:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int32x4x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int32x4x4_t [[TMP6]]
+// int32x4x4_t test_vld4q_s32(int32_t const *a) {
+//   return vld4q_s32(a);
+// }
+
+// NYI-LABEL: @test_vld4q_s64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int64x2x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int64x2x4_t [[TMP6]]
+// int64x2x4_t test_vld4q_s64(int64_t const *a) {
+//   return vld4q_s64(a);
+// }
+
+// NYI-LABEL: @test_vld4q_f16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0(ptr %a)
+// NYI:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float16x8x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float16x8x4_t [[TMP6]]
+// float16x8x4_t test_vld4q_f16(float16_t const *a) {
+//   return vld4q_f16(a);
+// }
+
+// NYI-LABEL: @test_vld4q_f32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr %a)
+// NYI:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float32x4x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float32x4x4_t [[TMP6]]
+// float32x4x4_t test_vld4q_f32(float32_t const *a) {
+//   return vld4q_f32(a);
+// }
+
+// NYI-LABEL: @test_vld4q_f64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr %a)
+// NYI:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float64x2x4_t [[TMP6]]
+// float64x2x4_t test_vld4q_f64(float64_t const *a) {
+//   return vld4q_f64(a);
+// }
+
+// NYI-LABEL: @test_vld4q_p8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.poly8x16x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly8x16x4_t [[TMP5]]
+// poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
+//   return vld4q_p8(a);
+// }
+
+// NYI-LABEL: @test_vld4q_p16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly16x8x4_t [[TMP6]]
+// poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
+//   return vld4q_p16(a);
+// }
+
+// NYI-LABEL: @test_vld4_u8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.uint8x8x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint8x8x4_t [[TMP5]]
+// uint8x8x4_t test_vld4_u8(uint8_t const *a) {
+//   return vld4_u8(a);
+// }
+
+// NYI-LABEL: @test_vld4_u16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint16x4x4_t [[TMP6]]
+// uint16x4x4_t test_vld4_u16(uint16_t const *a) {
+//   return vld4_u16(a);
+// }
+
+// NYI-LABEL: @test_vld4_u32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %a)
+// NYI:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint32x2x4_t [[TMP6]]
+// uint32x2x4_t test_vld4_u32(uint32_t const *a) {
+//   return vld4_u32(a);
+// }
+
+// NYI-LABEL: @test_vld4_u64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint64x1x4_t [[TMP6]]
+// uint64x1x4_t test_vld4_u64(uint64_t const *a) {
+//   return vld4_u64(a);
+// }
+
+// NYI-LABEL: @test_vld4_s8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.int8x8x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int8x8x4_t [[TMP5]]
+// int8x8x4_t test_vld4_s8(int8_t const *a) {
+//   return vld4_s8(a);
+// }
+
+// NYI-LABEL: @test_vld4_s16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int16x4x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int16x4x4_t [[TMP6]]
+// int16x4x4_t test_vld4_s16(int16_t const *a) {
+//   return vld4_s16(a);
+// }
+
+// NYI-LABEL: @test_vld4_s32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %a)
+// NYI:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int32x2x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int32x2x4_t [[TMP6]]
+// int32x2x4_t test_vld4_s32(int32_t const *a) {
+//   return vld4_s32(a);
+// }
+
+// NYI-LABEL: @test_vld4_s64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int64x1x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int64x1x4_t [[TMP6]]
+// int64x1x4_t test_vld4_s64(int64_t const *a) {
+//   return vld4_s64(a);
+// }
+
+// NYI-LABEL: @test_vld4_f16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0(ptr %a)
+// NYI:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float16x4x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float16x4x4_t [[TMP6]]
+// float16x4x4_t test_vld4_f16(float16_t const *a) {
+//   return vld4_f16(a);
+// }
+
+// NYI-LABEL: @test_vld4_f32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr %a)
+// NYI:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float32x2x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float32x2x4_t [[TMP6]]
+// float32x2x4_t test_vld4_f32(float32_t const *a) {
+//   return vld4_f32(a);
+// }
+
+// NYI-LABEL: @test_vld4_f64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr %a)
+// NYI:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float64x1x4_t [[TMP6]]
+// float64x1x4_t test_vld4_f64(float64_t const *a) {
+//   return vld4_f64(a);
+// }
+
+// NYI-LABEL: @test_vld4_p8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.poly8x8x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly8x8x4_t [[TMP5]]
+// poly8x8x4_t test_vld4_p8(poly8_t const *a) {
+//   return vld4_p8(a);
+// }
+
+// NYI-LABEL: @test_vld4_p16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly16x4x4_t [[TMP6]]
+// poly16x4x4_t test_vld4_p16(poly16_t const *a) {
+//   return vld4_p16(a);
+// }
+
+void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
+  vst1q_u8(a, b);
+  // CIR-LABEL: @test_vst1q_u8
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: cir.store align(1) %{{.*}}, %[[CAST]] : !cir.vector<!u8i x 16>, !cir.ptr<!cir.vector<!u8i x 16>>
+
+  // LLVM-LABEL: @test_vst1q_u8
+  // LLVM:   store <16 x i8> %{{.*}}, ptr %0, align 1,
+}
+
+void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
+  vst1q_u16(a, b);
+  // CIR-LABEL: @test_vst1q_u16
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u16i x 8>>
+  // CIR: cir.store align(2) %{{.*}}, %[[CAST]] : !cir.vector<!u16i x 8>, !cir.ptr<!cir.vector<!u16i x 8>>
+
+  // LLVM-LABEL: @test_vst1q_u16
+  // LLVM:   store <8 x i16> %{{.*}}, ptr %0, align 2,
+}
+
+void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
+  vst1q_u32(a, b);
+  // CIR-LABEL: @test_vst1q_u32
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: cir.store align(4) %{{.*}}, %[[CAST]] : !cir.vector<!u32i x 4>, !cir.ptr<!cir.vector<!u32i x 4>>
+
+  // LLVM-LABEL: @test_vst1q_u32
+  // LLVM:   store <4 x i32> %{{.*}}, ptr %0, align 4,
+}
+
+void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
+  vst1q_u64(a, b);
+  // CIR-LABEL: @test_vst1q_u64
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!u64i x 2>>
+  // CIR: cir.store align(8) %{{.*}}, %[[CAST]] : !cir.vector<!u64i x 2>, !cir.ptr<!cir.vector<!u64i x 2>>
+
+  // LLVM-LABEL: @test_vst1q_u64
+  // LLVM:   store <2 x i64> %{{.*}}, ptr %0, align 8,
+}
+
+void test_vst1q_s8(int8_t *a, int8x16_t b) {
+  vst1q_s8(a, b);
+  // CIR-LABEL: @test_vst1q_s8
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s8i x 16>>
+  // CIR: cir.store align(1) %{{.*}}, %[[CAST]] : !cir.vector<!s8i x 16>, !cir.ptr<!cir.vector<!s8i x 16>>
+
+  // LLVM-LABEL: @test_vst1q_s8
+  // LLVM:   store <16 x i8> %{{.*}}, ptr %0, align 1,
+}
+
+void test_vst1q_s16(int16_t *a, int16x8_t b) {
+  vst1q_s16(a, b);
+  // CIR-LABEL: @test_vst1q_s16
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: cir.store align(2) %{{.*}}, %[[CAST]] : !cir.vector<!s16i x 8>, !cir.ptr<!cir.vector<!s16i x 8>>
+
+  // LLVM-LABEL: @test_vst1q_s16
+  // LLVM:   store <8 x i16> %{{.*}}, ptr %0, align 2,
+}
+
+void test_vst1q_s32(int32_t *a, int32x4_t b) {
+  vst1q_s32(a, b);
+  // CIR-LABEL: @test_vst1q_s32
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR: cir.store align(4) %{{.*}}, %[[CAST]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // LLVM-LABEL: @test_vst1q_s32
+  // LLVM:   store <4 x i32> %{{.*}}, ptr %0, align 4,
+}
+
+void test_vst1q_s64(int64_t *a, int64x2_t b) {
+  vst1q_s64(a, b);
+  // CIR-LABEL: @test_vst1q_s64
+  // CIR: %[[CAST:.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.vector<!s64i x 2>>
+  // CIR: cir.store align(8) %{{.*}}, %[[CAST]] : !cir.vector<!s64i x 2>, !cir.ptr<!cir.vector<!s64i x 2>>
+
+  // LLVM-LABEL: @test_vst1q_s64
+  // LLVM:   store <2 x i64> %{{.*}}, ptr %0, align 8,
+}
+
+// NYI-LABEL: @test_vst1q_f16(
+// NYI:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// NYI:   store <8 x half> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1q_f16(float16_t *a, float16x8_t b) {
+//   vst1q_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_f32(
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// NYI:   store <4 x float> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1q_f32(float32_t *a, float32x4_t b) {
+//   vst1q_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_f64(
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// NYI:   store <2 x double> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1q_f64(float64_t *a, float64x2_t b) {
+//   vst1q_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_p8(
+// NYI:   store <16 x i8> %b, ptr %a
+// NYI:   ret void
+// void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
+//   vst1q_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_p16(
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   store <8 x i16> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
+//   vst1q_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_u8(
+// NYI:   store <8 x i8> %b, ptr %a
+// NYI:   ret void
+// void test_vst1_u8(uint8_t *a, uint8x8_t b) {
+//   vst1_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_u16(
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   store <4 x i16> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_u16(uint16_t *a, uint16x4_t b) {
+//   vst1_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_u32(
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   store <2 x i32> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_u32(uint32_t *a, uint32x2_t b) {
+//   vst1_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_u64(
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   store <1 x i64> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_u64(uint64_t *a, uint64x1_t b) {
+//   vst1_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_s8(
+// NYI:   store <8 x i8> %b, ptr %a
+// NYI:   ret void
+// void test_vst1_s8(int8_t *a, int8x8_t b) {
+//   vst1_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_s16(
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   store <4 x i16> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_s16(int16_t *a, int16x4_t b) {
+//   vst1_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_s32(
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   store <2 x i32> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_s32(int32_t *a, int32x2_t b) {
+//   vst1_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_s64(
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   store <1 x i64> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_s64(int64_t *a, int64x1_t b) {
+//   vst1_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_f16(
+// NYI:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// NYI:   store <4 x half> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_f16(float16_t *a, float16x4_t b) {
+//   vst1_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_f32(
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// NYI:   store <2 x float> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_f32(float32_t *a, float32x2_t b) {
+//   vst1_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_f64(
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// NYI:   store <1 x double> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_f64(float64_t *a, float64x1_t b) {
+//   vst1_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_p8(
+// NYI:   store <8 x i8> %b, ptr %a
+// NYI:   ret void
+// void test_vst1_p8(poly8_t *a, poly8x8_t b) {
+//   vst1_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_p16(
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   store <4 x i16> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_p16(poly16_t *a, poly16x4_t b) {
+//   vst1_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_u8(
+// NYI:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
+//   vst2q_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_u16(
+// NYI:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
+//   vst2q_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_u32(
+// NYI:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// NYI:   call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
+//   vst2q_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_u64(
+// NYI:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
+//   vst2q_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_s8(
+// NYI:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
+//   vst2q_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_s16(
+// NYI:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
+//   vst2q_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_s32(
+// NYI:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// NYI:   call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
+//   vst2q_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_s64(
+// NYI:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
+//   vst2q_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_f16(
+// NYI:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// NYI:   call void @llvm.aarch64.neon.st2.v8f16.p0(<8 x half> [[TMP7]], <8 x half> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
+//   vst2q_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_f32(
+// NYI:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// NYI:   call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> [[TMP7]], <4 x float> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
+//   vst2q_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_f64(
+// NYI:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// NYI:   call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
+//   vst2q_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_p8(
+// NYI:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
+//   vst2q_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_p16(
+// NYI:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
+//   vst2q_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_u8(
+// NYI:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
+// NYI:   ret void
+// void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
+//   vst2_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_u16(
+// NYI:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
+//   vst2_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_u32(
+// NYI:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// NYI:   call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
+//   vst2_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_u64(
+// NYI:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
+//   vst2_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_s8(
+// NYI:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
+// NYI:   ret void
+// void test_vst2_s8(int8_t *a, int8x8x2_t b) {
+//   vst2_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_s16(
+// NYI:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_s16(int16_t *a, int16x4x2_t b) {
+//   vst2_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_s32(
+// NYI:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// NYI:   call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_s32(int32_t *a, int32x2x2_t b) {
+//   vst2_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_s64(
+// NYI:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_s64(int64_t *a, int64x1x2_t b) {
+//   vst2_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_f16(
+// NYI:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// NYI:   call void @llvm.aarch64.neon.st2.v4f16.p0(<4 x half> [[TMP7]], <4 x half> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_f16(float16_t *a, float16x4x2_t b) {
+//   vst2_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_f32(
+// NYI:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// NYI:   call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> [[TMP7]], <2 x float> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_f32(float32_t *a, float32x2x2_t b) {
+//   vst2_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_f64(
+// NYI:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// NYI:   call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_f64(float64_t *a, float64x1x2_t b) {
+//   vst2_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_p8(
+// NYI:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
+// NYI:   ret void
+// void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
+//   vst2_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_p16(
+// NYI:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
+//   vst2_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_u8(
+// NYI:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
+// NYI:   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
+//   vst3q_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_u16(
+// NYI:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
+//   vst3q_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_u32(
+// NYI:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
+// NYI:   call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
+//   vst3q_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_u64(
+// NYI:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
+//   vst3q_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_s8(
+// NYI:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
+// NYI:   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
+//   vst3q_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_s16(
+// NYI:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
+//   vst3q_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_s32(
+// NYI:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
+// NYI:   call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
+//   vst3q_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_s64(
+// NYI:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
+//   vst3q_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_f16(
+// NYI:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// NYI:   call void @llvm.aarch64.neon.st3.v8f16.p0(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
+//   vst3q_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_f32(
+// NYI:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
+// NYI:   call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
+//   vst3q_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_f64(
+// NYI:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
+// NYI:   call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
+//   vst3q_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_p8(
+// NYI:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
+// NYI:   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
+//   vst3q_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_p16(
+// NYI:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
+//   vst3q_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_u8(
+// NYI:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
+// NYI:   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
+// NYI:   ret void
+// void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
+//   vst3_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_u16(
+// NYI:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
+//   vst3_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_u32(
+// NYI:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
+// NYI:   call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
+//   vst3_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_u64(
+// NYI:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
+//   vst3_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_s8(
+// NYI:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
+// NYI:   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
+// NYI:   ret void
+// void test_vst3_s8(int8_t *a, int8x8x3_t b) {
+//   vst3_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_s16(
+// NYI:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_s16(int16_t *a, int16x4x3_t b) {
+//   vst3_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_s32(
+// NYI:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
+// NYI:   call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_s32(int32_t *a, int32x2x3_t b) {
+//   vst3_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_s64(
+// NYI:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_s64(int64_t *a, int64x1x3_t b) {
+//   vst3_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_f16(
+// NYI:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// NYI:   call void @llvm.aarch64.neon.st3.v4f16.p0(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_f16(float16_t *a, float16x4x3_t b) {
+//   vst3_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_f32(
+// NYI:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
+// NYI:   call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_f32(float32_t *a, float32x2x3_t b) {
+//   vst3_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_f64(
+// NYI:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
+// NYI:   call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_f64(float64_t *a, float64x1x3_t b) {
+//   vst3_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_p8(
+// NYI:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
+// NYI:   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
+// NYI:   ret void
+// void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
+//   vst3_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_p16(
+// NYI:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
+//   vst3_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_u8(
+// NYI:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
+// NYI:   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
+//   vst4q_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_u16(
+// NYI:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
+//   vst4q_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_u32(
+// NYI:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
+// NYI:   call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
+//   vst4q_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_u64(
+// NYI:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
+//   vst4q_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_s8(
+// NYI:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
+// NYI:   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
+//   vst4q_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_s16(
+// NYI:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
+//   vst4q_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_s32(
+// NYI:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
+// NYI:   call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
+//   vst4q_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_s64(
+// NYI:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
+//   vst4q_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_f16(
+// NYI:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
+// NYI:   call void @llvm.aarch64.neon.st4.v8f16.p0(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
+//   vst4q_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_f32(
+// NYI:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
+// NYI:   call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
+//   vst4q_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_f64(
+// NYI:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
+// NYI:   call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
+//   vst4q_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_p8(
+// NYI:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
+// NYI:   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
+//   vst4q_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_p16(
+// NYI:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
+//   vst4q_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_u8(
+// NYI:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
+// NYI:   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
+// NYI:   ret void
+// void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
+//   vst4_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_u16(
+// NYI:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
+//   vst4_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_u32(
+// NYI:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
+// NYI:   call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
+//   vst4_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_u64(
+// NYI:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
+//   vst4_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_s8(
+// NYI:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
+// NYI:   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
+// NYI:   ret void
+// void test_vst4_s8(int8_t *a, int8x8x4_t b) {
+//   vst4_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_s16(
+// NYI:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_s16(int16_t *a, int16x4x4_t b) {
+//   vst4_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_s32(
+// NYI:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
+// NYI:   call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_s32(int32_t *a, int32x2x4_t b) {
+//   vst4_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_s64(
+// NYI:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_s64(int64_t *a, int64x1x4_t b) {
+//   vst4_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_f16(
+// NYI:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
+// NYI:   call void @llvm.aarch64.neon.st4.v4f16.p0(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_f16(float16_t *a, float16x4x4_t b) {
+//   vst4_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_f32(
+// NYI:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
+// NYI:   call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_f32(float32_t *a, float32x2x4_t b) {
+//   vst4_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_f64(
+// NYI:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
+// NYI:   call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_f64(float64_t *a, float64x1x4_t b) {
+//   vst4_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_p8(
+// NYI:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
+// NYI:   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
+// NYI:   ret void
+// void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
+//   vst4_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_p16(
+// NYI:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
+//   vst4_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vld1q_f64_x2(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %a)
+// NYI:   store { <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float64x2x2_t [[TMP6]]
+// float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
+//   return vld1q_f64_x2(a);
+// }
+
+// NYI-LABEL: @test_vld1q_p64_x2(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
+// NYI:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly64x2x2_t [[TMP6]]
+// poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
+//   return vld1q_p64_x2(a);
+// }
+
+// NYI-LABEL: @test_vld1_f64_x2(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %a)
+// NYI:   store { <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float64x1x2_t [[TMP6]]
+// float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
+//   return vld1_f64_x2(a);
+// }
+
+// NYI-LABEL: @test_vld1_p64_x2(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
+// NYI:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly64x1x2_t [[TMP6]]
+// poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
+//   return vld1_p64_x2(a);
+// }
+
+// NYI-LABEL: @test_vld1q_f64_x3(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %a)
+// NYI:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float64x2x3_t [[TMP6]]
+// float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
+//   return vld1q_f64_x3(a);
+// }
+
+// NYI-LABEL: @test_vld1q_p64_x3(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
+// NYI:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly64x2x3_t [[TMP6]]
+// poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
+//   return vld1q_p64_x3(a);
+// }
+
+// NYI-LABEL: @test_vld1_f64_x3(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %a)
+// NYI:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float64x1x3_t [[TMP6]]
+// float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
+//   return vld1_f64_x3(a);
+// }
+
+// NYI-LABEL: @test_vld1_p64_x3(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
+// NYI:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly64x1x3_t [[TMP6]]
+// poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
+//   return vld1_p64_x3(a);
+// }
+
+// NYI-LABEL: @test_vld1q_f64_x4(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %a)
+// NYI:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float64x2x4_t [[TMP6]]
+// float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
+//   return vld1q_f64_x4(a);
+// }
+
+// NYI-LABEL: @test_vld1q_p64_x4(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
+// NYI:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly64x2x4_t [[TMP6]]
+// poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
+//   return vld1q_p64_x4(a);
+// }
+
+// NYI-LABEL: @test_vld1_f64_x4(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %a)
+// NYI:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float64x1x4_t [[TMP6]]
+// float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
+//   return vld1_f64_x4(a);
+// }
+
+// NYI-LABEL: @test_vld1_p64_x4(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
+// NYI:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly64x1x4_t [[TMP6]]
+// poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
+//   return vld1_p64_x4(a);
+// }
+
+// NYI-LABEL: @test_vst1q_f64_x2(
+// NYI:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// NYI:   call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
+//   vst1q_f64_x2(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_p64_x2(
+// NYI:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
+//   vst1q_p64_x2(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_f64_x2(
+// NYI:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// NYI:   call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
+//   vst1_f64_x2(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_p64_x2(
+// NYI:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
+//   vst1_p64_x2(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_f64_x3(
+// NYI:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
+// NYI:   call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
+//   vst1q_f64_x3(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_p64_x3(
+// NYI:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
+//   vst1q_p64_x3(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_f64_x3(
+// NYI:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
+// NYI:   call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
+//   vst1_f64_x3(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_p64_x3(
+// NYI:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
+//   vst1_p64_x3(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_f64_x4(
+// NYI:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
+// NYI:   call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
+//   vst1q_f64_x4(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_p64_x4(
+// NYI:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
+//   vst1q_p64_x4(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_f64_x4(
+// NYI:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
+// NYI:   call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
+//   vst1_f64_x4(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_p64_x4(
+// NYI:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[B]], i32 0, i32 0
+// NYI:   store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
+//   vst1_p64_x4(a, b);
+// }
+
+// NYI-LABEL: @test_vceqd_s64(
+// NYI:   [[TMP0:%.*]] = icmp eq i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vceqd_s64(int64_t a, int64_t b) {
+//   return (uint64_t)vceqd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vceqd_u64(
+// NYI:   [[TMP0:%.*]] = icmp eq i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
+//   return (int64_t)vceqd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vceqzd_s64(
+// NYI:   [[TMP0:%.*]] = icmp eq i64 %a, 0
+// NYI:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQZ_I]]
+// uint64_t test_vceqzd_s64(int64_t a) {
+//   return (uint64_t)vceqzd_s64(a);
+// }
+
+// NYI-LABEL: @test_vceqzd_u64(
+// NYI:   [[TMP0:%.*]] = icmp eq i64 %a, 0
+// NYI:   [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQZD_I]]
+// int64_t test_vceqzd_u64(int64_t a) {
+//   return (int64_t)vceqzd_u64(a);
+// }
+
+// NYI-LABEL: @test_vcged_s64(
+// NYI:   [[TMP0:%.*]] = icmp sge i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcged_s64(int64_t a, int64_t b) {
+//   return (uint64_t)vcged_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcged_u64(
+// NYI:   [[TMP0:%.*]] = icmp uge i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vcged_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgezd_s64(
+// NYI:   [[TMP0:%.*]] = icmp sge i64 %a, 0
+// NYI:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCGEZ_I]]
+// uint64_t test_vcgezd_s64(int64_t a) {
+//   return (uint64_t)vcgezd_s64(a);
+// }
+
+// NYI-LABEL: @test_vcgtd_s64(
+// NYI:   [[TMP0:%.*]] = icmp sgt i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcgtd_s64(int64_t a, int64_t b) {
+//   return (uint64_t)vcgtd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgtd_u64(
+// NYI:   [[TMP0:%.*]] = icmp ugt i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vcgtd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgtzd_s64(
+// NYI:   [[TMP0:%.*]] = icmp sgt i64 %a, 0
+// NYI:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCGTZ_I]]
+// uint64_t test_vcgtzd_s64(int64_t a) {
+//   return (uint64_t)vcgtzd_s64(a);
+// }
+
+// NYI-LABEL: @test_vcled_s64(
+// NYI:   [[TMP0:%.*]] = icmp sle i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcled_s64(int64_t a, int64_t b) {
+//   return (uint64_t)vcled_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcled_u64(
+// NYI:   [[TMP0:%.*]] = icmp ule i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vcled_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vclezd_s64(
+// NYI:   [[TMP0:%.*]] = icmp sle i64 %a, 0
+// NYI:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCLEZ_I]]
+// uint64_t test_vclezd_s64(int64_t a) {
+//   return (uint64_t)vclezd_s64(a);
+// }
+
+// NYI-LABEL: @test_vcltd_s64(
+// NYI:   [[TMP0:%.*]] = icmp slt i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcltd_s64(int64_t a, int64_t b) {
+//   return (uint64_t)vcltd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcltd_u64(
+// NYI:   [[TMP0:%.*]] = icmp ult i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vcltd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vcltzd_s64(
+// NYI:   [[TMP0:%.*]] = icmp slt i64 %a, 0
+// NYI:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCLTZ_I]]
+// uint64_t test_vcltzd_s64(int64_t a) {
+//   return (uint64_t)vcltzd_s64(a);
+// }
+
+// NYI-LABEL: @test_vtstd_s64(
+// NYI:   [[TMP0:%.*]] = and i64 %a, %b
+// NYI:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
+// NYI:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
+// NYI:   ret i64 [[VTSTD_I]]
+// uint64_t test_vtstd_s64(int64_t a, int64_t b) {
+//   return (uint64_t)vtstd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vtstd_u64(
+// NYI:   [[TMP0:%.*]] = and i64 %a, %b
+// NYI:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
+// NYI:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
+// NYI:   ret i64 [[VTSTD_I]]
+// uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vtstd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vabsd_s64(
+// NYI:   [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a)
+// NYI:   ret i64 [[VABSD_S64_I]]
+// int64_t test_vabsd_s64(int64_t a) {
+//   return (int64_t)vabsd_s64(a);
+// }
+
+// NYI-LABEL: @test_vqabsb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqabsb_s8(int8_t a) {
+//   return (int8_t)vqabsb_s8(a);
+// }
+
+// NYI-LABEL: @test_vqabsh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqabsh_s16(int16_t a) {
+//   return (int16_t)vqabsh_s16(a);
+// }
+
+// NYI-LABEL: @test_vqabss_s32(
+// NYI:   [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
+// NYI:   ret i32 [[VQABSS_S32_I]]
+// int32_t test_vqabss_s32(int32_t a) {
+//   return (int32_t)vqabss_s32(a);
+// }
+
+// NYI-LABEL: @test_vqabsd_s64(
+// NYI:   [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a)
+// NYI:   ret i64 [[VQABSD_S64_I]]
+// int64_t test_vqabsd_s64(int64_t a) {
+//   return (int64_t)vqabsd_s64(a);
+// }
+
+// NYI-LABEL: @test_vnegd_s64(
+// NYI:   [[VNEGD_I:%.*]] = sub i64 0, %a
+// NYI:   ret i64 [[VNEGD_I]]
+// int64_t test_vnegd_s64(int64_t a) {
+//   return (int64_t)vnegd_s64(a);
+// }
+
+// NYI-LABEL: @test_vqnegb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqnegb_s8(int8_t a) {
+//   return (int8_t)vqnegb_s8(a);
+// }
+
+// NYI-LABEL: @test_vqnegh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqnegh_s16(int16_t a) {
+//   return (int16_t)vqnegh_s16(a);
+// }
+
+// NYI-LABEL: @test_vqnegs_s32(
+// NYI:   [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a)
+// NYI:   ret i32 [[VQNEGS_S32_I]]
+// int32_t test_vqnegs_s32(int32_t a) {
+//   return (int32_t)vqnegs_s32(a);
+// }
+
+// NYI-LABEL: @test_vqnegd_s64(
+// NYI:   [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a)
+// NYI:   ret i64 [[VQNEGD_S64_I]]
+// int64_t test_vqnegd_s64(int64_t a) {
+//   return (int64_t)vqnegd_s64(a);
+// }
+
+// NYI-LABEL: @test_vuqaddb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// int8_t test_vuqaddb_s8(int8_t a, uint8_t b) {
+//   return (int8_t)vuqaddb_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vuqaddh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vuqaddh_s16(int16_t a, uint16_t b) {
+//   return (int16_t)vuqaddh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vuqadds_s32(
+// NYI:   [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VUQADDS_S32_I]]
+// int32_t test_vuqadds_s32(int32_t a, uint32_t b) {
+//   return (int32_t)vuqadds_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vuqaddd_s64(
+// NYI:   [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VUQADDD_S64_I]]
+// int64_t test_vuqaddd_s64(int64_t a, uint64_t b) {
+//   return (int64_t)vuqaddd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddb_u8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// uint8_t test_vsqaddb_u8(uint8_t a, int8_t b) {
+//   return (uint8_t)vsqaddb_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddh_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// uint16_t test_vsqaddh_u16(uint16_t a, int16_t b) {
+//   return (uint16_t)vsqaddh_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsqadds_u32(
+// NYI:   [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VSQADDS_U32_I]]
+// uint32_t test_vsqadds_u32(uint32_t a, int32_t b) {
+//   return (uint32_t)vsqadds_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddd_u64(
+// NYI:   [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VSQADDD_U64_I]]
+// uint64_t test_vsqaddd_u64(uint64_t a, int64_t b) {
+//   return (uint64_t)vsqaddd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmlalh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %c, i64 0
+// NYI:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
+// NYI:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]])
+// NYI:   ret i32 [[VQDMLXL1_I]]
+// int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
+//   return (int32_t)vqdmlalh_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlals_s32(
+// NYI:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
+// NYI:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]])
+// NYI:   ret i64 [[VQDMLXL1_I]]
+// int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
+//   return (int64_t)vqdmlals_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlslh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %c, i64 0
+// NYI:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
+// NYI:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]])
+// NYI:   ret i32 [[VQDMLXL1_I]]
+// int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
+//   return (int32_t)vqdmlslh_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlsls_s32(
+// NYI:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
+// NYI:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]])
+// NYI:   ret i64 [[VQDMLXL1_I]]
+// int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
+//   return (int64_t)vqdmlsls_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmullh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
+// NYI:   ret i32 [[TMP2]]
+// int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
+//   return (int32_t)vqdmullh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmulls_s32(
+// NYI:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b)
+// NYI:   ret i64 [[VQDMULLS_S32_I]]
+// int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
+//   return (int64_t)vqdmulls_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqmovunh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// uint8_t test_vqmovunh_s16(int16_t a) {
+//   return (uint8_t)vqmovunh_s16(a);
+// }
+
+// NYI-LABEL: @test_vqmovuns_s32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// uint16_t test_vqmovuns_s32(int32_t a) {
+//   return (uint16_t)vqmovuns_s32(a);
+// }
+
+// NYI-LABEL: @test_vqmovund_s64(
+// NYI:   [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a)
+// NYI:   ret i32 [[VQMOVUND_S64_I]]
+// uint32_t test_vqmovund_s64(int64_t a) {
+//   return (uint32_t)vqmovund_s64(a);
+// }
+
+// NYI-LABEL: @test_vqmovnh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqmovnh_s16(int16_t a) {
+//   return (int8_t)vqmovnh_s16(a);
+// }
+
+// NYI-LABEL: @test_vqmovns_s32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqmovns_s32(int32_t a) {
+//   return (int16_t)vqmovns_s32(a);
+// }
+
+// NYI-LABEL: @test_vqmovnd_s64(
+// NYI:   [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a)
+// NYI:   ret i32 [[VQMOVND_S64_I]]
+// int32_t test_vqmovnd_s64(int64_t a) {
+//   return (int32_t)vqmovnd_s64(a);
+// }
+
+// NYI-LABEL: @test_vqmovnh_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqmovnh_u16(int16_t a) {
+//   return (int8_t)vqmovnh_u16(a);
+// }
+
+// NYI-LABEL: @test_vqmovns_u32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqmovns_u32(int32_t a) {
+//   return (int16_t)vqmovns_u32(a);
+// }
+
+// NYI-LABEL: @test_vqmovnd_u64(
+// NYI:   [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a)
+// NYI:   ret i32 [[VQMOVND_U64_I]]
+// int32_t test_vqmovnd_u64(int64_t a) {
+//   return (int32_t)vqmovnd_u64(a);
+// }
+
+// NYI-LABEL: @test_vceqs_f32(
+// NYI:   [[TMP0:%.*]] = fcmp oeq float %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCMPD_I]]
+// uint32_t test_vceqs_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vceqs_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vceqd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp oeq double %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCMPD_I]]
+// uint64_t test_vceqd_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vceqd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vceqzs_f32(
+// NYI:   [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
+// NYI:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCEQZ_I]]
+// uint32_t test_vceqzs_f32(float32_t a) {
+//   return (uint32_t)vceqzs_f32(a);
+// }
+
+// NYI-LABEL: @test_vceqzd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
+// NYI:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQZ_I]]
+// uint64_t test_vceqzd_f64(float64_t a) {
+//   return (uint64_t)vceqzd_f64(a);
+// }
+
+// NYI-LABEL: @test_vcges_f32(
+// NYI:   [[TMP0:%.*]] = fcmp oge float %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCMPD_I]]
+// uint32_t test_vcges_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vcges_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vcged_f64(
+// NYI:   [[TMP0:%.*]] = fcmp oge double %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCMPD_I]]
+// uint64_t test_vcged_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vcged_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgezs_f32(
+// NYI:   [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
+// NYI:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCGEZ_I]]
+// uint32_t test_vcgezs_f32(float32_t a) {
+//   return (uint32_t)vcgezs_f32(a);
+// }
+
+// NYI-LABEL: @test_vcgezd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
+// NYI:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCGEZ_I]]
+// uint64_t test_vcgezd_f64(float64_t a) {
+//   return (uint64_t)vcgezd_f64(a);
+// }
+
+// NYI-LABEL: @test_vcgts_f32(
+// NYI:   [[TMP0:%.*]] = fcmp ogt float %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCMPD_I]]
+// uint32_t test_vcgts_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vcgts_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vcgtd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp ogt double %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCMPD_I]]
+// uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vcgtd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgtzs_f32(
+// NYI:   [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
+// NYI:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCGTZ_I]]
+// uint32_t test_vcgtzs_f32(float32_t a) {
+//   return (uint32_t)vcgtzs_f32(a);
+// }
+
+// NYI-LABEL: @test_vcgtzd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
+// NYI:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCGTZ_I]]
+// uint64_t test_vcgtzd_f64(float64_t a) {
+//   return (uint64_t)vcgtzd_f64(a);
+// }
+
+// NYI-LABEL: @test_vcles_f32(
+// NYI:   [[TMP0:%.*]] = fcmp ole float %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCMPD_I]]
+// uint32_t test_vcles_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vcles_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vcled_f64(
+// NYI:   [[TMP0:%.*]] = fcmp ole double %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCMPD_I]]
+// uint64_t test_vcled_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vcled_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vclezs_f32(
+// NYI:   [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
+// NYI:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCLEZ_I]]
+// uint32_t test_vclezs_f32(float32_t a) {
+//   return (uint32_t)vclezs_f32(a);
+// }
+
+// NYI-LABEL: @test_vclezd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
+// NYI:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCLEZ_I]]
+// uint64_t test_vclezd_f64(float64_t a) {
+//   return (uint64_t)vclezd_f64(a);
+// }
+
+// NYI-LABEL: @test_vclts_f32(
+// NYI:   [[TMP0:%.*]] = fcmp olt float %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCMPD_I]]
+// uint32_t test_vclts_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vclts_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vcltd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp olt double %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCMPD_I]]
+// uint64_t test_vcltd_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vcltd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcltzs_f32(
+// NYI:   [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
+// NYI:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCLTZ_I]]
+// uint32_t test_vcltzs_f32(float32_t a) {
+//   return (uint32_t)vcltzs_f32(a);
+// }
+
+// NYI-LABEL: @test_vcltzd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
+// NYI:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCLTZ_I]]
+// uint64_t test_vcltzd_f64(float64_t a) {
+//   return (uint64_t)vcltzd_f64(a);
+// }
+
+// NYI-LABEL: @test_vcages_f32(
+// NYI:   [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b)
+// NYI:   ret i32 [[VCAGES_F32_I]]
+// uint32_t test_vcages_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vcages_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vcaged_f64(
+// NYI:   [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b)
+// NYI:   ret i64 [[VCAGED_F64_I]]
+// uint64_t test_vcaged_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vcaged_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcagts_f32(
+// NYI:   [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b)
+// NYI:   ret i32 [[VCAGTS_F32_I]]
+// uint32_t test_vcagts_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vcagts_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vcagtd_f64(
+// NYI:   [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b)
+// NYI:   ret i64 [[VCAGTD_F64_I]]
+// uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vcagtd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcales_f32(
+// NYI:   [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a)
+// NYI:   ret i32 [[VCALES_F32_I]]
+// uint32_t test_vcales_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vcales_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vcaled_f64(
+// NYI:   [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a)
+// NYI:   ret i64 [[VCALED_F64_I]]
+// uint64_t test_vcaled_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vcaled_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcalts_f32(
+// NYI:   [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a)
+// NYI:   ret i32 [[VCALTS_F32_I]]
+// uint32_t test_vcalts_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vcalts_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vcaltd_f64(
+// NYI:   [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a)
+// NYI:   ret i64 [[VCALTD_F64_I]]
+// uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vcaltd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vshrd_n_s64(
+// NYI:   [[SHRD_N:%.*]] = ashr i64 %a, 1
+// NYI:   ret i64 [[SHRD_N]]
+// int64_t test_vshrd_n_s64(int64_t a) {
+//   return (int64_t)vshrd_n_s64(a, 1);
+// }
+
+// NYI-LABEL: @test_vshr_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
+// NYI:   ret <1 x i64> [[VSHR_N]]
+// int64x1_t test_vshr_n_s64(int64x1_t a) {
+//   return vshr_n_s64(a, 1);
+// }
+
+// NYI-LABEL: @test_vshrd_n_u64(
+// NYI:   ret i64 0
+// uint64_t test_vshrd_n_u64(uint64_t a) {
+//   return (uint64_t)vshrd_n_u64(a, 64);
+// }
+
+// NYI-LABEL: @test_vshrd_n_u64_2(
+// NYI:   ret i64 0
+// uint64_t test_vshrd_n_u64_2() {
+//   uint64_t a = UINT64_C(0xf000000000000000);
+//   return vshrd_n_u64(a, 64);
+// }
+
+// NYI-LABEL: @test_vshr_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
+// NYI:   ret <1 x i64> [[VSHR_N]]
+// uint64x1_t test_vshr_n_u64(uint64x1_t a) {
+//   return vshr_n_u64(a, 1);
+// }
+
+// NYI-LABEL: @test_vrshrd_n_s64(
+// NYI:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63)
+// NYI:   ret i64 [[VRSHR_N]]
+// int64_t test_vrshrd_n_s64(int64_t a) {
+//   return (int64_t)vrshrd_n_s64(a, 63);
+// }
+
+// NYI-LABEL: @test_vrshr_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
+// NYI:   ret <1 x i64> [[VRSHR_N1]]
+// int64x1_t test_vrshr_n_s64(int64x1_t a) {
+//   return vrshr_n_s64(a, 1);
+// }
+
+// NYI-LABEL: @test_vrshrd_n_u64(
+// NYI:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63)
+// NYI:   ret i64 [[VRSHR_N]]
+// uint64_t test_vrshrd_n_u64(uint64_t a) {
+//   return (uint64_t)vrshrd_n_u64(a, 63);
+// }
+
+// NYI-LABEL: @test_vrshr_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
+// NYI:   ret <1 x i64> [[VRSHR_N1]]
+// uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
+//   return vrshr_n_u64(a, 1);
+// }
+
+// NYI-LABEL: @test_vsrad_n_s64(
+// NYI:   [[SHRD_N:%.*]] = ashr i64 %b, 63
+// NYI:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
+// NYI:   ret i64 [[TMP0]]
+// int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
+//   return (int64_t)vsrad_n_s64(a, b, 63);
+// }
+
+// NYI-LABEL: @test_vsra_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
+// NYI:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
+// NYI:   ret <1 x i64> [[TMP4]]
+// int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
+//   return vsra_n_s64(a, b, 1);
+// }
+
+// NYI-LABEL: @test_vsrad_n_u64(
+// NYI:   [[SHRD_N:%.*]] = lshr i64 %b, 63
+// NYI:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
+// NYI:   ret i64 [[TMP0]]
+// uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vsrad_n_u64(a, b, 63);
+// }
+
+// NYI-LABEL: @test_vsrad_n_u64_2(
+// NYI:   ret i64 %a
+// uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
+//   return (uint64_t)vsrad_n_u64(a, b, 64);
+// }
+
+// NYI-LABEL: @test_vsra_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
+// NYI:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
+// NYI:   ret <1 x i64> [[TMP4]]
+// uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
+//   return vsra_n_u64(a, b, 1);
+// }
+
+// NYI-LABEL: @test_vrsrad_n_s64(
+// NYI:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63)
+// NYI:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
+// NYI:   ret i64 [[TMP1]]
+// int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
+//   return (int64_t)vrsrad_n_s64(a, b, 63);
+// }
+
+// NYI-LABEL: @test_vrsra_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
+// NYI:   ret <1 x i64> [[TMP3]]
+// int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
+//   return vrsra_n_s64(a, b, 1);
+// }
+
+// NYI-LABEL: @test_vrsrad_n_u64(
+// NYI:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
+// NYI:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
+// NYI:   ret i64 [[TMP1]]
+// uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vrsrad_n_u64(a, b, 63);
+// }
+
+// NYI-LABEL: @test_vrsra_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
+// NYI:   ret <1 x i64> [[TMP3]]
+// uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
+//   return vrsra_n_u64(a, b, 1);
+// }
+
+// NYI-LABEL: @test_vshld_n_s64(
+// NYI:   [[SHLD_N:%.*]] = shl i64 %a, 1
+// NYI:   ret i64 [[SHLD_N]]
+// int64_t test_vshld_n_s64(int64_t a) {
+//   return (int64_t)vshld_n_s64(a, 1);
+// }
+
+// NYI-LABEL: @test_vshl_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
+// NYI:   ret <1 x i64> [[VSHL_N]]
+// int64x1_t test_vshl_n_s64(int64x1_t a) {
+//   return vshl_n_s64(a, 1);
+// }
+
+// NYI-LABEL: @test_vshld_n_u64(
+// NYI:   [[SHLD_N:%.*]] = shl i64 %a, 63
+// NYI:   ret i64 [[SHLD_N]]
+// uint64_t test_vshld_n_u64(uint64_t a) {
+//   return (uint64_t)vshld_n_u64(a, 63);
+// }
+
+// NYI-LABEL: @test_vshl_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
+// NYI:   ret <1 x i64> [[VSHL_N]]
+// uint64x1_t test_vshl_n_u64(uint64x1_t a) {
+//   return vshl_n_u64(a, 1);
+// }
+
+// NYI-LABEL: @test_vqshlb_n_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqshlb_n_s8(int8_t a) {
+//   return (int8_t)vqshlb_n_s8(a, 7);
+// }
+
+// NYI-LABEL: @test_vqshlh_n_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqshlh_n_s16(int16_t a) {
+//   return (int16_t)vqshlh_n_s16(a, 15);
+// }
+
+// NYI-LABEL: @test_vqshls_n_s32(
+// NYI:   [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
+// NYI:   ret i32 [[VQSHLS_N_S32]]
+// int32_t test_vqshls_n_s32(int32_t a) {
+//   return (int32_t)vqshls_n_s32(a, 31);
+// }
+
+// NYI-LABEL: @test_vqshld_n_s64(
+// NYI:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63)
+// NYI:   ret i64 [[VQSHL_N]]
+// int64_t test_vqshld_n_s64(int64_t a) {
+//   return (int64_t)vqshld_n_s64(a, 63);
+// }
+
+// NYI-LABEL: @test_vqshl_n_s8(
+// NYI:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
+// NYI:   ret <8 x i8> [[VQSHL_N]]
+// int8x8_t test_vqshl_n_s8(int8x8_t a) {
+//   return vqshl_n_s8(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_s8(
+// NYI:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
+// NYI:   ret <16 x i8> [[VQSHL_N]]
+// int8x16_t test_vqshlq_n_s8(int8x16_t a) {
+//   return vqshlq_n_s8(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshl_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
+// NYI:   ret <4 x i16> [[VQSHL_N1]]
+// int16x4_t test_vqshl_n_s16(int16x4_t a) {
+//   return vqshl_n_s16(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
+// NYI:   ret <8 x i16> [[VQSHL_N1]]
+// int16x8_t test_vqshlq_n_s16(int16x8_t a) {
+//   return vqshlq_n_s16(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshl_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
+// NYI:   ret <2 x i32> [[VQSHL_N1]]
+// int32x2_t test_vqshl_n_s32(int32x2_t a) {
+//   return vqshl_n_s32(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
+// NYI:   ret <4 x i32> [[VQSHL_N1]]
+// int32x4_t test_vqshlq_n_s32(int32x4_t a) {
+//   return vqshlq_n_s32(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
+// NYI:   ret <2 x i64> [[VQSHL_N1]]
+// int64x2_t test_vqshlq_n_s64(int64x2_t a) {
+//   return vqshlq_n_s64(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshl_n_u8(
+// NYI:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
+// NYI:   ret <8 x i8> [[VQSHL_N]]
+// uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
+//   return vqshl_n_u8(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_u8(
+// NYI:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
+// NYI:   ret <16 x i8> [[VQSHL_N]]
+// uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
+//   return vqshlq_n_u8(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshl_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
+// NYI:   ret <4 x i16> [[VQSHL_N1]]
+// uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
+//   return vqshl_n_u16(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
+// NYI:   ret <8 x i16> [[VQSHL_N1]]
+// uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
+//   return vqshlq_n_u16(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshl_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
+// NYI:   ret <2 x i32> [[VQSHL_N1]]
+// uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
+//   return vqshl_n_u32(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
+// NYI:   ret <4 x i32> [[VQSHL_N1]]
+// uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
+//   return vqshlq_n_u32(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
+// NYI:   ret <2 x i64> [[VQSHL_N1]]
+// uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
+//   return vqshlq_n_u64(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshl_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
+// NYI:   ret <1 x i64> [[VQSHL_N1]]
+// int64x1_t test_vqshl_n_s64(int64x1_t a) {
+//   return vqshl_n_s64(a, 1);
+// }
+
+// NYI-LABEL: @test_vqshlb_n_u8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// uint8_t test_vqshlb_n_u8(uint8_t a) {
+//   return (uint8_t)vqshlb_n_u8(a, 7);
+// }
+
+// NYI-LABEL: @test_vqshlh_n_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// uint16_t test_vqshlh_n_u16(uint16_t a) {
+//   return (uint16_t)vqshlh_n_u16(a, 15);
+// }
+
+// NYI-LABEL: @test_vqshls_n_u32(
+// NYI:   [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
+// NYI:   ret i32 [[VQSHLS_N_U32]]
+// uint32_t test_vqshls_n_u32(uint32_t a) {
+//   return (uint32_t)vqshls_n_u32(a, 31);
+// }
+
+// NYI-LABEL: @test_vqshld_n_u64(
+// NYI:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63)
+// NYI:   ret i64 [[VQSHL_N]]
+// uint64_t test_vqshld_n_u64(uint64_t a) {
+//   return (uint64_t)vqshld_n_u64(a, 63);
+// }
+
+// NYI-LABEL: @test_vqshl_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
+// NYI:   ret <1 x i64> [[VQSHL_N1]]
+// uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
+//   return vqshl_n_u64(a, 1);
+// }
+
+// NYI-LABEL: @test_vqshlub_n_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqshlub_n_s8(int8_t a) {
+//   return (int8_t)vqshlub_n_s8(a, 7);
+// }
+
+// NYI-LABEL: @test_vqshluh_n_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqshluh_n_s16(int16_t a) {
+//   return (int16_t)vqshluh_n_s16(a, 15);
+// }
+
+// NYI-LABEL: @test_vqshlus_n_s32(
+// NYI:   [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
+// NYI:   ret i32 [[VQSHLUS_N_S32]]
+// int32_t test_vqshlus_n_s32(int32_t a) {
+//   return (int32_t)vqshlus_n_s32(a, 31);
+// }
+
+// NYI-LABEL: @test_vqshlud_n_s64(
+// NYI:   [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63)
+// NYI:   ret i64 [[VQSHLU_N]]
+// int64_t test_vqshlud_n_s64(int64_t a) {
+//   return (int64_t)vqshlud_n_s64(a, 63);
+// }
+
+// NYI-LABEL: @test_vqshlu_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
+// NYI:   ret <1 x i64> [[VQSHLU_N1]]
+// uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
+//   return vqshlu_n_s64(a, 1);
+// }
+
+// NYI-LABEL: @test_vsrid_n_s64(
+// NYI:   [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
+// NYI:   [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
+// NYI:   [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
+// NYI:   [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
+// NYI:   ret i64 [[VSRID_N_S643]]
+// int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
+//   return (int64_t)vsrid_n_s64(a, b, 63);
+// }
+
+// NYI-LABEL: @test_vsri_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
+// NYI:   ret <1 x i64> [[VSRI_N2]]
+// int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
+//   return vsri_n_s64(a, b, 1);
+// }
+
+// NYI-LABEL: @test_vsrid_n_u64(
+// NYI:   [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
+// NYI:   [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
+// NYI:   [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
+// NYI:   [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
+// NYI:   ret i64 [[VSRID_N_U643]]
+// uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vsrid_n_u64(a, b, 63);
+// }
+
+// NYI-LABEL: @test_vsri_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
+// NYI:   ret <1 x i64> [[VSRI_N2]]
+// uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
+//   return vsri_n_u64(a, b, 1);
+// }
+
+// NYI-LABEL: @test_vslid_n_s64(
+// NYI:   [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
+// NYI:   [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
+// NYI:   [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
+// NYI:   [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
+// NYI:   ret i64 [[VSLID_N_S643]]
+// int64_t test_vslid_n_s64(int64_t a, int64_t b) {
+//   return (int64_t)vslid_n_s64(a, b, 63);
+// }
+
+// NYI-LABEL: @test_vsli_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
+// NYI:   ret <1 x i64> [[VSLI_N2]]
+// int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
+//   return vsli_n_s64(a, b, 1);
+// }
+
+// NYI-LABEL: @test_vslid_n_u64(
+// NYI:   [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
+// NYI:   [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
+// NYI:   [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
+// NYI:   [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
+// NYI:   ret i64 [[VSLID_N_U643]]
+// uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vslid_n_u64(a, b, 63);
+// }
+
+// NYI-LABEL: @test_vsli_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
+// NYI:   ret <1 x i64> [[VSLI_N2]]
+// uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
+//   return vsli_n_u64(a, b, 1);
+// }
+
+// NYI-LABEL: @test_vqshrnh_n_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqshrnh_n_s16(int16_t a) {
+//   return (int8_t)vqshrnh_n_s16(a, 8);
+// }
+
+// NYI-LABEL: @test_vqshrns_n_s32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqshrns_n_s32(int32_t a) {
+//   return (int16_t)vqshrns_n_s32(a, 16);
+// }
+
+// NYI-LABEL: @test_vqshrnd_n_s64(
+// NYI:   [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
+// NYI:   ret i32 [[VQSHRND_N_S64]]
+// int32_t test_vqshrnd_n_s64(int64_t a) {
+//   return (int32_t)vqshrnd_n_s64(a, 32);
+// }
+
+// NYI-LABEL: @test_vqshrnh_n_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// uint8_t test_vqshrnh_n_u16(uint16_t a) {
+//   return (uint8_t)vqshrnh_n_u16(a, 8);
+// }
+
+// NYI-LABEL: @test_vqshrns_n_u32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// uint16_t test_vqshrns_n_u32(uint32_t a) {
+//   return (uint16_t)vqshrns_n_u32(a, 16);
+// }
+
+// NYI-LABEL: @test_vqshrnd_n_u64(
+// NYI:   [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
+// NYI:   ret i32 [[VQSHRND_N_U64]]
+// uint32_t test_vqshrnd_n_u64(uint64_t a) {
+//   return (uint32_t)vqshrnd_n_u64(a, 32);
+// }
+
+// NYI-LABEL: @test_vqrshrnh_n_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqrshrnh_n_s16(int16_t a) {
+//   return (int8_t)vqrshrnh_n_s16(a, 8);
+// }
+
+// NYI-LABEL: @test_vqrshrns_n_s32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqrshrns_n_s32(int32_t a) {
+//   return (int16_t)vqrshrns_n_s32(a, 16);
+// }
+
+// NYI-LABEL: @test_vqrshrnd_n_s64(
+// NYI:   [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
+// NYI:   ret i32 [[VQRSHRND_N_S64]]
+// int32_t test_vqrshrnd_n_s64(int64_t a) {
+//   return (int32_t)vqrshrnd_n_s64(a, 32);
+// }
+
+// NYI-LABEL: @test_vqrshrnh_n_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// uint8_t test_vqrshrnh_n_u16(uint16_t a) {
+//   return (uint8_t)vqrshrnh_n_u16(a, 8);
+// }
+
+// NYI-LABEL: @test_vqrshrns_n_u32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// uint16_t test_vqrshrns_n_u32(uint32_t a) {
+//   return (uint16_t)vqrshrns_n_u32(a, 16);
+// }
+
+// NYI-LABEL: @test_vqrshrnd_n_u64(
+// NYI:   [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
+// NYI:   ret i32 [[VQRSHRND_N_U64]]
+// uint32_t test_vqrshrnd_n_u64(uint64_t a) {
+//   return (uint32_t)vqrshrnd_n_u64(a, 32);
+// }
+
+// NYI-LABEL: @test_vqshrunh_n_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqshrunh_n_s16(int16_t a) {
+//   return (int8_t)vqshrunh_n_s16(a, 8);
+// }
+
+// NYI-LABEL: @test_vqshruns_n_s32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqshruns_n_s32(int32_t a) {
+//   return (int16_t)vqshruns_n_s32(a, 16);
+// }
+
+// NYI-LABEL: @test_vqshrund_n_s64(
+// NYI:   [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
+// NYI:   ret i32 [[VQSHRUND_N_S64]]
+// int32_t test_vqshrund_n_s64(int64_t a) {
+//   return (int32_t)vqshrund_n_s64(a, 32);
+// }
+
+// NYI-LABEL: @test_vqrshrunh_n_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// uint8_t test_vqrshrunh_n_s16(int16_t a) {
+//   return (uint8_t)vqrshrunh_n_s16(a, 8);
+// }
+
+// NYI-LABEL: @test_vqrshruns_n_s32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// uint16_t test_vqrshruns_n_s32(int32_t a) {
+//   return (uint16_t)vqrshruns_n_s32(a, 16);
+// }
+
+// NYI-LABEL: @test_vqrshrund_n_s64(
+// NYI:   [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
+// NYI:   ret i32 [[VQRSHRUND_N_S64]]
+// uint32_t test_vqrshrund_n_s64(int64_t a) {
+//   return (uint32_t)vqrshrund_n_s64(a, 32);
+// }
+
+// NYI-LABEL: @test_vcvts_n_f32_s32(
+// NYI:   [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
+// NYI:   ret float [[VCVTS_N_F32_S32]]
+// float32_t test_vcvts_n_f32_s32(int32_t a) {
+//   return vcvts_n_f32_s32(a, 1);
+// }
+
+// NYI-LABEL: @test_vcvtd_n_f64_s64(
+// NYI:   [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
+// NYI:   ret double [[VCVTD_N_F64_S64]]
+// float64_t test_vcvtd_n_f64_s64(int64_t a) {
+//   return vcvtd_n_f64_s64(a, 1);
+// }
+
+// NYI-LABEL: @test_vcvts_n_f32_u32(
+// NYI:   [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
+// NYI:   ret float [[VCVTS_N_F32_U32]]
+// float32_t test_vcvts_n_f32_u32(uint32_t a) {
+//   return vcvts_n_f32_u32(a, 32);
+// }
+
+// NYI-LABEL: @test_vcvtd_n_f64_u64(
+// NYI:   [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
+// NYI:   ret double [[VCVTD_N_F64_U64]]
+// float64_t test_vcvtd_n_f64_u64(uint64_t a) {
+//   return vcvtd_n_f64_u64(a, 64);
+// }
+
+// NYI-LABEL: @test_vcvts_n_s32_f32(
+// NYI:   [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
+// NYI:   ret i32 [[VCVTS_N_S32_F32]]
+// int32_t test_vcvts_n_s32_f32(float32_t a) {
+//   return (int32_t)vcvts_n_s32_f32(a, 1);
+// }
+
+// NYI-LABEL: @test_vcvtd_n_s64_f64(
+// NYI:   [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
+// NYI:   ret i64 [[VCVTD_N_S64_F64]]
+// int64_t test_vcvtd_n_s64_f64(float64_t a) {
+//   return (int64_t)vcvtd_n_s64_f64(a, 1);
+// }
+
+// NYI-LABEL: @test_vcvts_n_u32_f32(
+// NYI:   [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
+// NYI:   ret i32 [[VCVTS_N_U32_F32]]
+// uint32_t test_vcvts_n_u32_f32(float32_t a) {
+//   return (uint32_t)vcvts_n_u32_f32(a, 32);
+// }
+
+// NYI-LABEL: @test_vcvtd_n_u64_f64(
+// NYI:   [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
+// NYI:   ret i64 [[VCVTD_N_U64_F64]]
+// uint64_t test_vcvtd_n_u64_f64(float64_t a) {
+//   return (uint64_t)vcvtd_n_u64_f64(a, 64);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
+//   return vreinterpret_s8_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
+//   return vreinterpret_s8_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
+//   return vreinterpret_s8_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_u8(
+// NYI:   ret <8 x i8> %a
+// int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
+//   return vreinterpret_s8_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
+//   return vreinterpret_s8_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
+//   return vreinterpret_s8_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
+//   return vreinterpret_s8_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
+//   return vreinterpret_s8_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
+//   return vreinterpret_s8_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
+//   return vreinterpret_s8_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_p8(
+// NYI:   ret <8 x i8> %a
+// int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
+//   return vreinterpret_s8_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
+//   return vreinterpret_s8_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
+//   return vreinterpret_s8_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
+//   return vreinterpret_s16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
+//   return vreinterpret_s16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
+//   return vreinterpret_s16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
+//   return vreinterpret_s16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_u16(
+// NYI:   ret <4 x i16> %a
+// int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
+//   return vreinterpret_s16_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
+//   return vreinterpret_s16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
+//   return vreinterpret_s16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
+//   return vreinterpret_s16_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
+//   return vreinterpret_s16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
+//   return vreinterpret_s16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
+//   return vreinterpret_s16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_p16(
+// NYI:   ret <4 x i16> %a
+// int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
+//   return vreinterpret_s16_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
+//   return vreinterpret_s16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
+//   return vreinterpret_s32_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
+//   return vreinterpret_s32_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
+//   return vreinterpret_s32_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
+//   return vreinterpret_s32_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
+//   return vreinterpret_s32_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_u32(
+// NYI:   ret <2 x i32> %a
+// int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
+//   return vreinterpret_s32_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
+//   return vreinterpret_s32_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
+//   return vreinterpret_s32_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
+//   return vreinterpret_s32_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
+//   return vreinterpret_s32_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
+//   return vreinterpret_s32_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
+//   return vreinterpret_s32_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
+//   return vreinterpret_s32_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
+//   return vreinterpret_s64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
+//   return vreinterpret_s64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
+//   return vreinterpret_s64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
+//   return vreinterpret_s64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
+//   return vreinterpret_s64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
+//   return vreinterpret_s64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_u64(
+// NYI:   ret <1 x i64> %a
+// int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
+//   return vreinterpret_s64_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
+//   return vreinterpret_s64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
+//   return vreinterpret_s64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
+//   return vreinterpret_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
+//   return vreinterpret_s64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
+//   return vreinterpret_s64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_p64(
+// NYI:   ret <1 x i64> %a
+// int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
+//   return vreinterpret_s64_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_s8(
+// NYI:   ret <8 x i8> %a
+// uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
+//   return vreinterpret_u8_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
+//   return vreinterpret_u8_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
+//   return vreinterpret_u8_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
+//   return vreinterpret_u8_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
+//   return vreinterpret_u8_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
+//   return vreinterpret_u8_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
+//   return vreinterpret_u8_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
+//   return vreinterpret_u8_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
+//   return vreinterpret_u8_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
+//   return vreinterpret_u8_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_p8(
+// NYI:   ret <8 x i8> %a
+// uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
+//   return vreinterpret_u8_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
+//   return vreinterpret_u8_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
+//   return vreinterpret_u8_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
+//   return vreinterpret_u16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_s16(
+// NYI:   ret <4 x i16> %a
+// uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
+//   return vreinterpret_u16_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
+//   return vreinterpret_u16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
+//   return vreinterpret_u16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
+//   return vreinterpret_u16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
+//   return vreinterpret_u16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
+//   return vreinterpret_u16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
+//   return vreinterpret_u16_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
+//   return vreinterpret_u16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
+//   return vreinterpret_u16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
+//   return vreinterpret_u16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_p16(
+// NYI:   ret <4 x i16> %a
+// uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
+//   return vreinterpret_u16_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
+//   return vreinterpret_u16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
+//   return vreinterpret_u32_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
+//   return vreinterpret_u32_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_s32(
+// NYI:   ret <2 x i32> %a
+// uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
+//   return vreinterpret_u32_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
+//   return vreinterpret_u32_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
+//   return vreinterpret_u32_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
+//   return vreinterpret_u32_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
+//   return vreinterpret_u32_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
+//   return vreinterpret_u32_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
+//   return vreinterpret_u32_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
+//   return vreinterpret_u32_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
+//   return vreinterpret_u32_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
+//   return vreinterpret_u32_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
+//   return vreinterpret_u32_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
+//   return vreinterpret_u64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
+//   return vreinterpret_u64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
+//   return vreinterpret_u64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_s64(
+// NYI:   ret <1 x i64> %a
+// uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
+//   return vreinterpret_u64_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
+//   return vreinterpret_u64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
+//   return vreinterpret_u64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
+//   return vreinterpret_u64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
+//   return vreinterpret_u64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
+//   return vreinterpret_u64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
+//   return vreinterpret_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
+//   return vreinterpret_u64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
+//   return vreinterpret_u64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_p64(
+// NYI:   ret <1 x i64> %a
+// uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
+//   return vreinterpret_u64_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
+//   return vreinterpret_f16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
+//   return vreinterpret_f16_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
+//   return vreinterpret_f16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
+//   return vreinterpret_f16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
+//   return vreinterpret_f16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
+//   return vreinterpret_f16_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
+//   return vreinterpret_f16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
+//   return vreinterpret_f16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
+//   return vreinterpret_f16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
+//   return vreinterpret_f16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
+//   return vreinterpret_f16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
+//   return vreinterpret_f16_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
+//   return vreinterpret_f16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
+//   return vreinterpret_f32_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
+//   return vreinterpret_f32_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
+//   return vreinterpret_f32_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
+//   return vreinterpret_f32_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
+//   return vreinterpret_f32_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
+//   return vreinterpret_f32_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
+//   return vreinterpret_f32_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
+//   return vreinterpret_f32_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
+//   return vreinterpret_f32_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
+//   return vreinterpret_f32_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
+//   return vreinterpret_f32_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
+//   return vreinterpret_f32_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
+//   return vreinterpret_f32_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
+//   return vreinterpret_f64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
+//   return vreinterpret_f64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
+//   return vreinterpret_f64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
+//   return vreinterpret_f64_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
+//   return vreinterpret_f64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
+//   return vreinterpret_f64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
+//   return vreinterpret_f64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
+//   return vreinterpret_f64_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
+//   return vreinterpret_f64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
+//   return vreinterpret_f64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
+//   return vreinterpret_f64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
+//   return vreinterpret_f64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
+//   return vreinterpret_f64_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_s8(
+// NYI:   ret <8 x i8> %a
+// poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
+//   return vreinterpret_p8_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
+//   return vreinterpret_p8_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
+//   return vreinterpret_p8_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
+//   return vreinterpret_p8_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_u8(
+// NYI:   ret <8 x i8> %a
+// poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
+//   return vreinterpret_p8_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
+//   return vreinterpret_p8_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
+//   return vreinterpret_p8_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
+//   return vreinterpret_p8_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
+//   return vreinterpret_p8_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
+//   return vreinterpret_p8_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
+//   return vreinterpret_p8_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
+//   return vreinterpret_p8_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
+//   return vreinterpret_p8_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
+//   return vreinterpret_p16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_s16(
+// NYI:   ret <4 x i16> %a
+// poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
+//   return vreinterpret_p16_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
+//   return vreinterpret_p16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
+//   return vreinterpret_p16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
+//   return vreinterpret_p16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_u16(
+// NYI:   ret <4 x i16> %a
+// poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
+//   return vreinterpret_p16_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
+//   return vreinterpret_p16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
+//   return vreinterpret_p16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
+//   return vreinterpret_p16_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
+//   return vreinterpret_p16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
+//   return vreinterpret_p16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
+//   return vreinterpret_p16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
+//   return vreinterpret_p16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
+//   return vreinterpret_p64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
+//   return vreinterpret_p64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
+//   return vreinterpret_p64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_s64(
+// NYI:   ret <1 x i64> %a
+// poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
+//   return vreinterpret_p64_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
+//   return vreinterpret_p64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
+//   return vreinterpret_p64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
+//   return vreinterpret_p64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_u64(
+// NYI:   ret <1 x i64> %a
+// poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
+//   return vreinterpret_p64_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
+//   return vreinterpret_p64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
+//   return vreinterpret_p64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
+//   return vreinterpret_p64_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
+//   return vreinterpret_p64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
+//   return vreinterpret_p64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
+//   return vreinterpretq_s8_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
+//   return vreinterpretq_s8_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
+//   return vreinterpretq_s8_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_u8(
+// NYI:   ret <16 x i8> %a
+// int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
+//   return vreinterpretq_s8_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
+//   return vreinterpretq_s8_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
+//   return vreinterpretq_s8_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
+//   return vreinterpretq_s8_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
+//   return vreinterpretq_s8_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
+//   return vreinterpretq_s8_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
+//   return vreinterpretq_s8_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_p8(
+// NYI:   ret <16 x i8> %a
+// int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
+//   return vreinterpretq_s8_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
+//   return vreinterpretq_s8_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
+//   return vreinterpretq_s8_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
+//   return vreinterpretq_s16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
+//   return vreinterpretq_s16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
+//   return vreinterpretq_s16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
+//   return vreinterpretq_s16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_u16(
+// NYI:   ret <8 x i16> %a
+// int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
+//   return vreinterpretq_s16_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
+//   return vreinterpretq_s16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
+//   return vreinterpretq_s16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
+//   return vreinterpretq_s16_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
+//   return vreinterpretq_s16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
+//   return vreinterpretq_s16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
+//   return vreinterpretq_s16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_p16(
+// NYI:   ret <8 x i16> %a
+// int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
+//   return vreinterpretq_s16_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
+//   return vreinterpretq_s16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
+//   return vreinterpretq_s32_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
+//   return vreinterpretq_s32_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
+//   return vreinterpretq_s32_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
+//   return vreinterpretq_s32_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
+//   return vreinterpretq_s32_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_u32(
+// NYI:   ret <4 x i32> %a
+// int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
+//   return vreinterpretq_s32_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
+//   return vreinterpretq_s32_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
+//   return vreinterpretq_s32_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
+//   return vreinterpretq_s32_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
+//   return vreinterpretq_s32_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
+//   return vreinterpretq_s32_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
+//   return vreinterpretq_s32_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
+//   return vreinterpretq_s32_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
+//   return vreinterpretq_s64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
+//   return vreinterpretq_s64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
+//   return vreinterpretq_s64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
+//   return vreinterpretq_s64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
+//   return vreinterpretq_s64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
+//   return vreinterpretq_s64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_u64(
+// NYI:   ret <2 x i64> %a
+// int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
+//   return vreinterpretq_s64_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
+//   return vreinterpretq_s64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
+//   return vreinterpretq_s64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
+//   return vreinterpretq_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
+//   return vreinterpretq_s64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
+//   return vreinterpretq_s64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_p64(
+// NYI:   ret <2 x i64> %a
+// int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
+//   return vreinterpretq_s64_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_s8(
+// NYI:   ret <16 x i8> %a
+// uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
+//   return vreinterpretq_u8_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
+//   return vreinterpretq_u8_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
+//   return vreinterpretq_u8_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
+//   return vreinterpretq_u8_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
+//   return vreinterpretq_u8_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
+//   return vreinterpretq_u8_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
+//   return vreinterpretq_u8_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
+//   return vreinterpretq_u8_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
+//   return vreinterpretq_u8_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
+//   return vreinterpretq_u8_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_p8(
+// NYI:   ret <16 x i8> %a
+// uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
+//   return vreinterpretq_u8_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
+//   return vreinterpretq_u8_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
+//   return vreinterpretq_u8_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
+//   return vreinterpretq_u16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_s16(
+// NYI:   ret <8 x i16> %a
+// uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
+//   return vreinterpretq_u16_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
+//   return vreinterpretq_u16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
+//   return vreinterpretq_u16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
+//   return vreinterpretq_u16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
+//   return vreinterpretq_u16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
+//   return vreinterpretq_u16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
+//   return vreinterpretq_u16_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
+//   return vreinterpretq_u16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
+//   return vreinterpretq_u16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
+//   return vreinterpretq_u16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_p16(
+// NYI:   ret <8 x i16> %a
+// uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
+//   return vreinterpretq_u16_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
+//   return vreinterpretq_u16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
+//   return vreinterpretq_u32_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
+//   return vreinterpretq_u32_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_s32(
+// NYI:   ret <4 x i32> %a
+// uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
+//   return vreinterpretq_u32_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
+//   return vreinterpretq_u32_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
+//   return vreinterpretq_u32_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
+//   return vreinterpretq_u32_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
+//   return vreinterpretq_u32_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
+//   return vreinterpretq_u32_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
+//   return vreinterpretq_u32_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
+//   return vreinterpretq_u32_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
+//   return vreinterpretq_u32_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
+//   return vreinterpretq_u32_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
+//   return vreinterpretq_u32_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
+//   return vreinterpretq_u64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
+//   return vreinterpretq_u64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
+//   return vreinterpretq_u64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_s64(
+// NYI:   ret <2 x i64> %a
+// uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
+//   return vreinterpretq_u64_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
+//   return vreinterpretq_u64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
+//   return vreinterpretq_u64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
+//   return vreinterpretq_u64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
+//   return vreinterpretq_u64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
+//   return vreinterpretq_u64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
+//   return vreinterpretq_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
+//   return vreinterpretq_u64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
+//   return vreinterpretq_u64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_p64(
+// NYI:   ret <2 x i64> %a
+// uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
+//   return vreinterpretq_u64_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
+//   return vreinterpretq_f16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
+//   return vreinterpretq_f16_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
+//   return vreinterpretq_f16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
+//   return vreinterpretq_f16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
+//   return vreinterpretq_f16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
+//   return vreinterpretq_f16_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
+//   return vreinterpretq_f16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
+//   return vreinterpretq_f16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
+//   return vreinterpretq_f16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
+//   return vreinterpretq_f16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
+//   return vreinterpretq_f16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
+//   return vreinterpretq_f16_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
+//   return vreinterpretq_f16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
+//   return vreinterpretq_f32_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
+//   return vreinterpretq_f32_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
+//   return vreinterpretq_f32_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
+//   return vreinterpretq_f32_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
+//   return vreinterpretq_f32_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
+//   return vreinterpretq_f32_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
+//   return vreinterpretq_f32_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
+//   return vreinterpretq_f32_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
+//   return vreinterpretq_f32_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
+//   return vreinterpretq_f32_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
+//   return vreinterpretq_f32_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
+//   return vreinterpretq_f32_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
+//   return vreinterpretq_f32_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
+//   return vreinterpretq_f64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
+//   return vreinterpretq_f64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
+//   return vreinterpretq_f64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
+//   return vreinterpretq_f64_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
+//   return vreinterpretq_f64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
+//   return vreinterpretq_f64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
+//   return vreinterpretq_f64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
+//   return vreinterpretq_f64_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
+//   return vreinterpretq_f64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
+//   return vreinterpretq_f64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
+//   return vreinterpretq_f64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
+//   return vreinterpretq_f64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
+//   return vreinterpretq_f64_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_s8(
+// NYI:   ret <16 x i8> %a
+// poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
+//   return vreinterpretq_p8_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
+//   return vreinterpretq_p8_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
+//   return vreinterpretq_p8_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
+//   return vreinterpretq_p8_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_u8(
+// NYI:   ret <16 x i8> %a
+// poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
+//   return vreinterpretq_p8_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
+//   return vreinterpretq_p8_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
+//   return vreinterpretq_p8_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
+//   return vreinterpretq_p8_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
+//   return vreinterpretq_p8_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
+//   return vreinterpretq_p8_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
+//   return vreinterpretq_p8_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
+//   return vreinterpretq_p8_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
+//   return vreinterpretq_p8_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
+//   return vreinterpretq_p16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_s16(
+// NYI:   ret <8 x i16> %a
+// poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
+//   return vreinterpretq_p16_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
+//   return vreinterpretq_p16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
+//   return vreinterpretq_p16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
+//   return vreinterpretq_p16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_u16(
+// NYI:   ret <8 x i16> %a
+// poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
+//   return vreinterpretq_p16_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
+//   return vreinterpretq_p16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
+//   return vreinterpretq_p16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
+//   return vreinterpretq_p16_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
+//   return vreinterpretq_p16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
+//   return vreinterpretq_p16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
+//   return vreinterpretq_p16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
+//   return vreinterpretq_p16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
+//   return vreinterpretq_p64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
+//   return vreinterpretq_p64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
+//   return vreinterpretq_p64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_s64(
+// NYI:   ret <2 x i64> %a
+// poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
+//   return vreinterpretq_p64_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
+//   return vreinterpretq_p64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
+//   return vreinterpretq_p64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
+//   return vreinterpretq_p64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_u64(
+// NYI:   ret <2 x i64> %a
+// poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
+//   return vreinterpretq_p64_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
+//   return vreinterpretq_p64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
+//   return vreinterpretq_p64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
+//   return vreinterpretq_p64_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
+//   return vreinterpretq_p64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
+//   return vreinterpretq_p64_p16(a);
+// }
+
+// NYI-LABEL: @test_vabds_f32(
+// NYI:   [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b)
+// NYI:   ret float [[VABDS_F32_I]]
+// float32_t test_vabds_f32(float32_t a, float32_t b) {
+//   return vabds_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vabdd_f64(
+// NYI:   [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b)
+// NYI:   ret double [[VABDD_F64_I]]
+// float64_t test_vabdd_f64(float64_t a, float64_t b) {
+//   return vabdd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vuqaddq_s8(
+// NYI: entry:
+// NYI-NEXT:  [[V:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI-NEXT:  ret <16 x i8> [[V]]
+// int8x16_t test_vuqaddq_s8(int8x16_t a, uint8x16_t b) {
+//   return vuqaddq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vuqaddq_s32(
+// NYI: [[V:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI-NEXT:  ret <4 x i32> [[V]]
+// int32x4_t test_vuqaddq_s32(int32x4_t a, uint32x4_t b) {
+//   return vuqaddq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vuqaddq_s64(
+// NYI: [[V:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI-NEXT:  ret <2 x i64> [[V]]
+// int64x2_t test_vuqaddq_s64(int64x2_t a, uint64x2_t b) {
+//   return vuqaddq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vuqaddq_s16(
+// NYI: [[V:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI-NEXT:  ret <8 x i16> [[V]]
+// int16x8_t test_vuqaddq_s16(int16x8_t a, uint16x8_t b) {
+//   return vuqaddq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vuqadd_s8(
+// NYI: entry:
+// NYI-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI-NEXT: ret <8 x i8> [[V]]
+// int8x8_t test_vuqadd_s8(int8x8_t a, uint8x8_t b) {
+//   return vuqadd_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vuqadd_s32(
+// NYI: [[V:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI-NEXT:  ret <2 x i32> [[V]]
+// int32x2_t test_vuqadd_s32(int32x2_t a, uint32x2_t b) {
+//   return vuqadd_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vuqadd_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   ret <1 x i64> [[VUQADD2_I]]
+// int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
+//   return vuqadd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vuqadd_s16(
+// NYI: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI-NEXT:  ret <4 x i16> [[V]]
+// int16x4_t test_vuqadd_s16(int16x4_t a, uint16x4_t b) {
+//   return vuqadd_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vsqadd_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   ret <1 x i64> [[VSQADD2_I]]
+// uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
+//   return vsqadd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vsqadd_u8(
+// NYI:   [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VSQADD_I]]
+// uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) {
+//   return vsqadd_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddq_u8(
+// NYI:   [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VSQADD_I]]
+// uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) {
+//   return vsqaddq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsqadd_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VSQADD2_I]]
+// uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) {
+//   return vsqadd_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VSQADD2_I]]
+// uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) {
+//   return vsqaddq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsqadd_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VSQADD2_I]]
+// uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) {
+//   return vsqadd_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VSQADD2_I]]
+// uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) {
+//   return vsqaddq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   ret <2 x i64> [[VSQADD2_I]]
+// uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) {
+//   return vsqaddq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vabs_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a)
+// NYI:   ret <1 x i64> [[VABS1_I]]
+// int64x1_t test_vabs_s64(int64x1_t a) {
+//   return vabs_s64(a);
+// }
+
+// NYI-LABEL: @test_vqabs_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a)
+// NYI:   [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQABS_V1_I]]
+// int64x1_t test_vqabs_s64(int64x1_t a) {
+//   return vqabs_s64(a);
+// }
+
+// NYI-LABEL: @test_vqneg_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a)
+// NYI:   [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQNEG_V1_I]]
+// int64x1_t test_vqneg_s64(int64x1_t a) {
+//   return vqneg_s64(a);
+// }
+
+// NYI-LABEL: @test_vneg_s64(
+// NYI:   [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
+// NYI:   ret <1 x i64> [[SUB_I]]
+// int64x1_t test_vneg_s64(int64x1_t a) {
+//   return vneg_s64(a);
+// }
+
+// NYI-LABEL: @test_vaddv_f32(
+// NYI:   [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VADDV_F32_I]]
+// float32_t test_vaddv_f32(float32x2_t a) {
+//   return vaddv_f32(a);
+// }
+
+// NYI-LABEL: @test_vaddvq_f32(
+// NYI:   [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a)
+// NYI:   ret float [[VADDVQ_F32_I]]
+// float32_t test_vaddvq_f32(float32x4_t a) {
+//   return vaddvq_f32(a);
+// }
+
+// NYI-LABEL: @test_vaddvq_f64(
+// NYI:   [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VADDVQ_F64_I]]
+// float64_t test_vaddvq_f64(float64x2_t a) {
+//   return vaddvq_f64(a);
+// }
+
+// NYI-LABEL: @test_vmaxv_f32(
+// NYI:   [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VMAXV_F32_I]]
+// float32_t test_vmaxv_f32(float32x2_t a) {
+//   return vmaxv_f32(a);
+// }
+
+// NYI-LABEL: @test_vmaxvq_f64(
+// NYI:   [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VMAXVQ_F64_I]]
+// float64_t test_vmaxvq_f64(float64x2_t a) {
+//   return vmaxvq_f64(a);
+// }
+
+// NYI-LABEL: @test_vminv_f32(
+// NYI:   [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VMINV_F32_I]]
+// float32_t test_vminv_f32(float32x2_t a) {
+//   return vminv_f32(a);
+// }
+
+// NYI-LABEL: @test_vminvq_f64(
+// NYI:   [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VMINVQ_F64_I]]
+// float64_t test_vminvq_f64(float64x2_t a) {
+//   return vminvq_f64(a);
+// }
+
+// NYI-LABEL: @test_vmaxnmvq_f64(
+// NYI:   [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VMAXNMVQ_F64_I]]
+// float64_t test_vmaxnmvq_f64(float64x2_t a) {
+//   return vmaxnmvq_f64(a);
+// }
+
+// NYI-LABEL: @test_vmaxnmv_f32(
+// NYI:   [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VMAXNMV_F32_I]]
+// float32_t test_vmaxnmv_f32(float32x2_t a) {
+//   return vmaxnmv_f32(a);
+// }
+
+// NYI-LABEL: @test_vminnmvq_f64(
+// NYI:   [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VMINNMVQ_F64_I]]
+// float64_t test_vminnmvq_f64(float64x2_t a) {
+//   return vminnmvq_f64(a);
+// }
+
+// NYI-LABEL: @test_vminnmv_f32(
+// NYI:   [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VMINNMV_F32_I]]
+// float32_t test_vminnmv_f32(float32x2_t a) {
+//   return vminnmv_f32(a);
+// }
+
+// NYI-LABEL: @test_vpaddq_s64(
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VPADDQ_V2_I]]
+// int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
+//   return vpaddq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_u64(
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VPADDQ_V2_I]]
+// uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
+//   return vpaddq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddd_u64(
+// NYI:   [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
+// NYI:   ret i64 [[VPADDD_U64_I]]
+// uint64_t test_vpaddd_u64(uint64x2_t a) {
+//   return vpaddd_u64(a);
+// }
+
+// NYI-LABEL: @test_vaddvq_s64(
+// NYI:   [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a)
+// NYI:   ret i64 [[VADDVQ_S64_I]]
+// int64_t test_vaddvq_s64(int64x2_t a) {
+//   return vaddvq_s64(a);
+// }
+
+// NYI-LABEL: @test_vaddvq_u64(
+// NYI:   [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
+// NYI:   ret i64 [[VADDVQ_U64_I]]
+// uint64_t test_vaddvq_u64(uint64x2_t a) {
+//   return vaddvq_u64(a);
+// }
+
+// NYI-LABEL: @test_vadd_f64(
+// NYI:   [[ADD_I:%.*]] = fadd <1 x double> %a, %b
+// NYI:   ret <1 x double> [[ADD_I]]
+// float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
+//   return vadd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vmul_f64(
+// NYI:   [[MUL_I:%.*]] = fmul <1 x double> %a, %b
+// NYI:   ret <1 x double> [[MUL_I]]
+// float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
+//   return vmul_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vdiv_f64(
+// NYI:   [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
+// NYI:   ret <1 x double> [[DIV_I]]
+// float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
+//   return vdiv_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vmla_f64(
+// NYI:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
+// NYI:   [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
+// NYI:   ret <1 x double> [[ADD_I]]
+// float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+//   return vmla_f64(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmls_f64(
+// NYI:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
+// NYI:   [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
+// NYI:   ret <1 x double> [[SUB_I]]
+// float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+//   return vmls_f64(a, b, c);
+// }
+
+// NYI-LABEL: @test_vfma_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
+// NYI:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
+// NYI:   ret <1 x double> [[TMP3]]
+// float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+//   return vfma_f64(a, b, c);
+// }
+
+// NYI-LABEL: @test_vfms_f64(
+// NYI:   [[SUB_I:%.*]] = fneg <1 x double> %b
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
+// NYI:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a)
+// NYI:   ret <1 x double> [[TMP3]]
+// float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+//   return vfms_f64(a, b, c);
+// }
+
+// NYI-LABEL: @test_vsub_f64(
+// NYI:   [[SUB_I:%.*]] = fsub <1 x double> %a, %b
+// NYI:   ret <1 x double> [[SUB_I]]
+// float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
+//   return vsub_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vabd_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x double> [[VABD2_I]]
+// float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
+//   return vabd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vmax_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x double> [[VMAX2_I]]
+// float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
+//   return vmax_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vmin_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x double> [[VMIN2_I]]
+// float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
+//   return vmin_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxnm_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x double> [[VMAXNM2_I]]
+// float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
+//   return vmaxnm_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vminnm_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x double> [[VMINNM2_I]]
+// float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
+//   return vminnm_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vabs_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VABS1_I]]
+// float64x1_t test_vabs_f64(float64x1_t a) {
+//   return vabs_f64(a);
+// }
+
+// NYI-LABEL: @test_vneg_f64(
+// NYI:   [[SUB_I:%.*]] = fneg <1 x double> %a
+// NYI:   ret <1 x double> [[SUB_I]]
+// float64x1_t test_vneg_f64(float64x1_t a) {
+//   return vneg_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvt_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[TMP1]]
+// int64x1_t test_vcvt_s64_f64(float64x1_t a) {
+//   return vcvt_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvt_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[TMP1]]
+// uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
+//   return vcvt_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvtn_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTN1_I]]
+// int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
+//   return vcvtn_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvtn_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTN1_I]]
+// uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
+//   return vcvtn_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvtp_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTP1_I]]
+// int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
+//   return vcvtp_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvtp_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTP1_I]]
+// uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
+//   return vcvtp_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvtm_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTM1_I]]
+// int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
+//   return vcvtm_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvtm_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTM1_I]]
+// uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
+//   return vcvtm_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvta_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTA1_I]]
+// int64x1_t test_vcvta_s64_f64(float64x1_t a) {
+//   return vcvta_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvta_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTA1_I]]
+// uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
+//   return vcvta_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvt_f64_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double>
+// NYI:   ret <1 x double> [[VCVT_I]]
+// float64x1_t test_vcvt_f64_s64(int64x1_t a) {
+//   return vcvt_f64_s64(a);
+// }
+
+// NYI-LABEL: @test_vcvt_f64_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double>
+// NYI:   ret <1 x double> [[VCVT_I]]
+// float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
+//   return vcvt_f64_u64(a);
+// }
+
+// NYI-LABEL: @test_vcvt_n_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// NYI:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
+// NYI:   ret <1 x i64> [[VCVT_N1]]
+// int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
+//   return vcvt_n_s64_f64(a, 64);
+// }
+
+// NYI-LABEL: @test_vcvt_n_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// NYI:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
+// NYI:   ret <1 x i64> [[VCVT_N1]]
+// uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
+//   return vcvt_n_u64_f64(a, 64);
+// }
+
+// NYI-LABEL: @test_vcvt_n_f64_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
+// NYI:   ret <1 x double> [[VCVT_N1]]
+// float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
+//   return vcvt_n_f64_s64(a, 64);
+// }
+
+// NYI-LABEL: @test_vcvt_n_f64_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
+// NYI:   ret <1 x double> [[VCVT_N1]]
+// float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
+//   return vcvt_n_f64_u64(a, 64);
+// }
+
+// NYI-LABEL: @test_vrndn_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDN1_I:%.*]] = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDN1_I]]
+// float64x1_t test_vrndn_f64(float64x1_t a) {
+//   return vrndn_f64(a);
+// }
+
+// NYI-LABEL: @test_vrnda_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDA1_I]]
+// float64x1_t test_vrnda_f64(float64x1_t a) {
+//   return vrnda_f64(a);
+// }
+
+// NYI-LABEL: @test_vrndp_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDP1_I]]
+// float64x1_t test_vrndp_f64(float64x1_t a) {
+//   return vrndp_f64(a);
+// }
+
+// NYI-LABEL: @test_vrndm_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDM1_I]]
+// float64x1_t test_vrndm_f64(float64x1_t a) {
+//   return vrndm_f64(a);
+// }
+
+// NYI-LABEL: @test_vrndx_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDX1_I]]
+// float64x1_t test_vrndx_f64(float64x1_t a) {
+//   return vrndx_f64(a);
+// }
+
+// NYI-LABEL: @test_vrnd_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDZ1_I]]
+// float64x1_t test_vrnd_f64(float64x1_t a) {
+//   return vrnd_f64(a);
+// }
+
+// NYI-LABEL: @test_vrndi_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDI1_I]]
+// float64x1_t test_vrndi_f64(float64x1_t a) {
+//   return vrndi_f64(a);
+// }
+
+// NYI-LABEL: @test_vrsqrte_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRSQRTE_V1_I]]
+// float64x1_t test_vrsqrte_f64(float64x1_t a) {
+//   return vrsqrte_f64(a);
+// }
+
+// NYI-LABEL: @test_vrecpe_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRECPE_V1_I]]
+// float64x1_t test_vrecpe_f64(float64x1_t a) {
+//   return vrecpe_f64(a);
+// }
+
+// NYI-LABEL: @test_vsqrt_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VSQRT_I]]
+// float64x1_t test_vsqrt_f64(float64x1_t a) {
+//   return vsqrt_f64(a);
+// }
+
+// NYI-LABEL: @test_vrecps_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x double> [[VRECPS_V2_I]]
+// float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
+//   return vrecps_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vrsqrts_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
+// NYI:   ret <1 x double> [[VRSQRTS_V2_I]]
+// float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
+//   return vrsqrts_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vminv_s32(
+// NYI:   [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a)
+// NYI:   ret i32 [[VMINV_S32_I]]
+// int32_t test_vminv_s32(int32x2_t a) {
+//   return vminv_s32(a);
+// }
+
+// NYI-LABEL: @test_vminv_u32(
+// NYI:   [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a)
+// NYI:   ret i32 [[VMINV_U32_I]]
+// uint32_t test_vminv_u32(uint32x2_t a) {
+//   return vminv_u32(a);
+// }
+
+// NYI-LABEL: @test_vmaxv_s32(
+// NYI:   [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a)
+// NYI:   ret i32 [[VMAXV_S32_I]]
+// int32_t test_vmaxv_s32(int32x2_t a) {
+//   return vmaxv_s32(a);
+// }
+
+// NYI-LABEL: @test_vmaxv_u32(
+// NYI:   [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a)
+// NYI:   ret i32 [[VMAXV_U32_I]]
+// uint32_t test_vmaxv_u32(uint32x2_t a) {
+//   return vmaxv_u32(a);
+// }
+
+// NYI-LABEL: @test_vaddv_s32(
+// NYI:   [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
+// NYI:   ret i32 [[VADDV_S32_I]]
+// int32_t test_vaddv_s32(int32x2_t a) {
+//   return vaddv_s32(a);
+// }
+
+// NYI-LABEL: @test_vaddv_u32(
+// NYI:   [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a)
+// NYI:   ret i32 [[VADDV_U32_I]]
+// uint32_t test_vaddv_u32(uint32x2_t a) {
+//   return vaddv_u32(a);
+// }
+
+// NYI-LABEL: @test_vaddlv_s32(
+// NYI:   [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a)
+// NYI:   ret i64 [[VADDLV_S32_I]]
+// int64_t test_vaddlv_s32(int32x2_t a) {
+//   return vaddlv_s32(a);
+// }
+
+// NYI-LABEL: @test_vaddlv_u32(
+// NYI:   [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a)
+// NYI:   ret i64 [[VADDLV_U32_I]]
+// uint64_t test_vaddlv_u32(uint32x2_t a) {
+//   return vaddlv_u32(a);
+// }
diff --git a/clang/test/CIR/CodeGen/aarch64-neon-vdup-lane.c b/clang/test/CIR/CodeGen/aarch64-neon-vdup-lane.c
new file mode 100644
index 000000000000..4799e0931c55
--- /dev/null
+++ b/clang/test/CIR/CodeGen/aarch64-neon-vdup-lane.c
@@ -0,0 +1,216 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:            -emit-cir -target-feature +neon %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:            -emit-llvm -target-feature +neon %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// Tetsting normal situation of vdup lane intrinsics.
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+#include <arm_neon.h>
+
+int8_t test_vdupb_lane_s8(int8x8_t src) {
+  return vdupb_lane_s8(src, 7);
+}
+
+// CIR-LABEL: test_vdupb_lane_s8
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<7> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 8>
+
+// LLVM: define dso_local i8 @test_vdupb_lane_s8(<8 x i8> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i8>, i64 1, align 8
+// LLVM: store <8 x i8> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <8 x i8>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <8 x i8> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <8 x i8>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <8 x i8> [[INTRN_ARG]], i32 7
+// LLVM: ret i8 {{%.*}}
+
+int8_t test_vdupb_laneq_s8(int8x16_t a) {
+  return vdupb_laneq_s8(a, 15);
+}
+
+// CIR-LABEL: test_vdupb_laneq_s8
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<15> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 16>
+
+// LLVM: define dso_local i8 @test_vdupb_laneq_s8(<16 x i8> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <16 x i8>, i64 1, align 16
+// LLVM: store <16 x i8> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <16 x i8>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <16 x i8> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <16 x i8>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <16 x i8> [[INTRN_ARG]], i32 15
+// LLVM: ret i8 {{%.*}}
+
+int16_t test_vduph_lane_s16(int16x4_t src) {
+  return vduph_lane_s16(src, 3);
+}
+
+// CIR-LABEL: test_vduph_lane_s16
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 4>
+
+
+// LLVM: define dso_local i16 @test_vduph_lane_s16(<4 x i16> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i16>, i64 1, align 8
+// LLVM: store <4 x i16> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <4 x i16>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <4 x i16> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <4 x i16>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <4 x i16> [[INTRN_ARG]], i32 3
+// LLVM: ret i16 {{%.*}}
+
+int16_t test_vduph_laneq_s16(int16x8_t a) {
+  return vduph_laneq_s16(a, 7);
+}
+
+// CIR-LABEL: test_vduph_laneq_s16
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<7> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 8>
+
+// LLVM: define dso_local i16 @test_vduph_laneq_s16(<8 x i16> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i16>, i64 1, align 16
+// LLVM: store <8 x i16> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <8 x i16>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <8 x i16> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <8 x i16>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <8 x i16> [[INTRN_ARG]], i32 7
+// LLVM: ret i16 {{%.*}}
+
+int32_t test_vdups_lane_s32(int32x2_t a) {
+  return vdups_lane_s32(a, 1);
+}
+
+// CIR-LABEL: test_vdups_lane_s32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 2>
+
+// LLVM: define dso_local i32 @test_vdups_lane_s32(<2 x i32> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i32>, i64 1, align 8
+// LLVM: store <2 x i32> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <2 x i32>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <2 x i32> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <2 x i32>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <2 x i32> [[INTRN_ARG]], i32 1
+// LLVM: ret i32 {{%.*}}
+
+int32_t test_vdups_laneq_s32(int32x4_t a) {
+  return vdups_laneq_s32(a, 3);
+}
+
+// CIR-LABEL: test_vdups_laneq_s32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 4>
+
+// LLVM: define dso_local i32 @test_vdups_laneq_s32(<4 x i32> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: store <4 x i32> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <4 x i32>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <4 x i32> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <4 x i32>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <4 x i32> [[INTRN_ARG]], i32 3
+// LLVM: ret i32 {{%.*}}
+
+int64_t test_vdupd_lane_s64(int64x1_t src) {
+  return vdupd_lane_s64(src, 0);
+}
+
+// CIR-LABEL: test_vdupd_lane_s64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<0> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 1>
+
+// LLVM: define dso_local i64 @test_vdupd_lane_s64(<1 x i64> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x i64>, i64 1, align 8
+// LLVM: store <1 x i64> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <1 x i64>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <1 x i64> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <1 x i64>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <1 x i64> [[INTRN_ARG]], i32 0
+// LLVM: ret i64 {{%.*}}
+
+int64_t test_vdupd_laneq_s64(int64x2_t a) {
+  return vdupd_laneq_s64(a, 1);
+}
+
+// CIR-LABEL: test_vdupd_laneq_s64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 2>
+
+// LLVM: define dso_local i64 @test_vdupd_laneq_s64(<2 x i64> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i64>, i64 1, align 16
+// LLVM: store <2 x i64> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <2 x i64>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <2 x i64> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <2 x i64>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <2 x i64> [[INTRN_ARG]], i32 1
+// LLVM: ret i64 {{%.*}}
+
+float32_t test_vdups_lane_f32(float32x2_t src) {
+  return vdups_lane_f32(src, 1);
+}
+
+// CIR-LABEL: test_vdups_lane_f32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
+
+// LLVM: define dso_local float @test_vdups_lane_f32(<2 x float> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x float>, i64 1, align 8
+// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <2 x float>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <2 x float> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <2 x float>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <2 x float> [[INTRN_ARG]], i32 1
+// LLVM: ret float {{%.*}}
+
+float64_t test_vdupd_lane_f64(float64x1_t src) {
+  return vdupd_lane_f64(src, 0);
+}
+
+// CIR-LABEL: test_vdupd_lane_f64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<0> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 1>
+
+// LLVM: define dso_local double @test_vdupd_lane_f64(<1 x double> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x double>, i64 1, align 8
+// LLVM: store <1 x double> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <1 x double>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <1 x double> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <1 x double>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <1 x double> [[INTRN_ARG]], i32 0
+// LLVM: ret double {{%.*}}
+
+float32_t test_vdups_laneq_f32(float32x4_t src) {
+  return vdups_laneq_f32(src, 3);
+}
+
+// CIR-LABEL: test_vdups_laneq_f32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
+
+// LLVM: define dso_local float @test_vdups_laneq_f32(<4 x float> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x float>, i64 1, align 16
+// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <4 x float>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <4 x float> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <4 x float>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <4 x float> [[INTRN_ARG]], i32 3
+// LLVM: ret float {{%.*}}
+
+float64_t test_vdupd_laneq_f64(float64x2_t src) {
+  return vdupd_laneq_f64(src, 1);
+}
+
+// CIR-LABEL: test_vdupd_laneq_f64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 2>
+
+// LLVM: define dso_local double @test_vdupd_laneq_f64(<2 x double> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x double>, i64 1, align 16
+// LLVM: store <2 x double> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <2 x double>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <2 x double> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <2 x double>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <2 x double> [[INTRN_ARG]], i32 1
+// LLVM: ret double {{%.*}}
diff --git a/clang/test/CIR/CodeGen/aarch64-neon-vget.c b/clang/test/CIR/CodeGen/aarch64-neon-vget.c
new file mode 100644
index 000000000000..b16648691d1b
--- /dev/null
+++ b/clang/test/CIR/CodeGen/aarch64-neon-vget.c
@@ -0,0 +1,219 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:            -emit-cir -target-feature +neon %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:            -emit-llvm -target-feature +neon %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// This test file contains test cases to those of 
+// clang/test/CodeGen/aarch64-neon-vget.c 
+// The difference is that this file only tests uses vget intrinsics, as we feel
+// it would be proper to have a separate test file testing vset intrinsics 
+// with the file name aarch64-neon-vset.c
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+#include <arm_neon.h>
+
+uint8_t test_vget_lane_u8(uint8x8_t a) {
+  return vget_lane_u8(a, 7);
+}
+
+// CIR-LABEL: test_vget_lane_u8
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<7> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 8>
+
+// LLVM: define dso_local i8 @test_vget_lane_u8(<8 x i8> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i8>, i64 1, align 8
+// LLVM: store <8 x i8> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <8 x i8>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <8 x i8> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <8 x i8>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <8 x i8> [[INTRN_ARG]], i32 7
+// LLVM: ret i8 {{%.*}}
+
+uint8_t test_vgetq_lane_u8(uint8x16_t a) {
+  return vgetq_lane_u8(a, 15);
+}
+
+// CIR-LABEL: test_vgetq_lane_u8
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<15> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 16>
+
+// LLVM: define dso_local i8 @test_vgetq_lane_u8(<16 x i8> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <16 x i8>, i64 1, align 16
+// LLVM: store <16 x i8> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <16 x i8>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <16 x i8> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <16 x i8>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <16 x i8> [[INTRN_ARG]], i32 15
+// LLVM: ret i8 {{%.*}}
+
+uint16_t test_vget_lane_u16(uint16x4_t a) {
+  return vget_lane_u16(a, 3);
+}
+
+// CIR-LABEL: test_vget_lane_u16
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 4>
+
+// LLVM: define dso_local i16 @test_vget_lane_u16(<4 x i16> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i16>, i64 1, align 8
+// LLVM: store <4 x i16> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <4 x i16>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <4 x i16> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <4 x i16>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <4 x i16> [[INTRN_ARG]], i32 3
+// LLVM: ret i16 {{%.*}}
+
+uint16_t test_vgetq_lane_u16(uint16x8_t a) {
+  return vgetq_lane_u16(a, 7);
+}
+
+// CIR-LABEL: test_vgetq_lane_u16
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<7> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 8>
+
+// LLVM: define dso_local i16 @test_vgetq_lane_u16(<8 x i16> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i16>, i64 1, align 16
+// LLVM: store <8 x i16> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <8 x i16>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <8 x i16> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <8 x i16>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <8 x i16> [[INTRN_ARG]], i32 7
+// LLVM: ret i16 {{%.*}}
+
+uint32_t test_vget_lane_u32(uint32x2_t a) {
+  return vget_lane_u32(a, 1);
+}
+
+// CIR-LABEL: test_vget_lane_u32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 2>
+
+// LLVM: define dso_local i32 @test_vget_lane_u32(<2 x i32> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i32>, i64 1, align 8
+// LLVM: store <2 x i32> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <2 x i32>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <2 x i32> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <2 x i32>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <2 x i32> [[INTRN_ARG]], i32 1
+// LLVM: ret i32 {{%.*}}
+
+uint32_t test_vgetq_lane_u32(uint32x4_t a) {
+  return vgetq_lane_u32(a, 3);
+}
+
+// CIR-LABEL: test_vgetq_lane_u32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 4>
+
+// LLVM: define dso_local i32 @test_vgetq_lane_u32(<4 x i32> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: store <4 x i32> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <4 x i32>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <4 x i32> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <4 x i32>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <4 x i32> [[INTRN_ARG]], i32 3
+// LLVM: ret i32 {{%.*}}
+
+uint64_t test_vget_lane_u64(uint64x1_t a) {
+  return vget_lane_u64(a, 0);
+}
+
+// CIR-LABEL: test_vget_lane_u64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<0> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 1>
+
+// LLVM: define dso_local i64 @test_vget_lane_u64(<1 x i64> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x i64>, i64 1, align 8
+// LLVM: store <1 x i64> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <1 x i64>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <1 x i64> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <1 x i64>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <1 x i64> [[INTRN_ARG]], i32 0
+// LLVM: ret i64 {{%.*}}
+
+uint64_t test_vgetq_lane_u64(uint64x2_t a) {
+  return vgetq_lane_u64(a, 1);
+}
+
+// CIR-LABEL: test_vgetq_lane_u64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 2>
+
+// LLVM: define dso_local i64 @test_vgetq_lane_u64(<2 x i64> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i64>, i64 1, align 16
+// LLVM: store <2 x i64> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <2 x i64>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <2 x i64> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <2 x i64>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <2 x i64> [[INTRN_ARG]], i32 1
+// LLVM: ret i64 {{%.*}}
+
+float32_t test_vget_lane_f32(float32x2_t a) {
+  return vget_lane_f32(a, 1);
+}
+
+// CIR-LABEL: test_vget_lane_f32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
+
+// LLVM: define dso_local float @test_vget_lane_f32(<2 x float> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x float>, i64 1, align 8
+// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <2 x float>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <2 x float> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <2 x float>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <2 x float> [[INTRN_ARG]], i32 1
+// LLVM: ret float {{%.*}}
+
+float64_t test_vget_lane_f64(float64x1_t a) {
+  return vget_lane_f64(a, 0);
+}
+
+// CIR-LABEL: test_vget_lane_f64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<0> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 1>
+
+// LLVM: define dso_local double @test_vget_lane_f64(<1 x double> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x double>, i64 1, align 8
+// LLVM: store <1 x double> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <1 x double>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <1 x double> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <1 x double>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <1 x double> [[INTRN_ARG]], i32 0
+// LLVM: ret double {{%.*}}
+
+float32_t test_vgetq_lane_f32(float32x4_t a) {
+  return vgetq_lane_f32(a, 3);
+}
+
+// CIR-LABEL: test_vgetq_lane_f32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
+
+// LLVM: define dso_local float @test_vgetq_lane_f32(<4 x float> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x float>, i64 1, align 16
+// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <4 x float>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <4 x float> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <4 x float>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <4 x float> [[INTRN_ARG]], i32 3
+// LLVM: ret float {{%.*}}
+
+float64_t test_vgetq_lane_f64(float64x2_t a) {
+  return vgetq_lane_f64(a, 1);
+}
+
+// CIR-LABEL: test_vgetq_lane_f64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 2>
+
+// LLVM: define dso_local double @test_vgetq_lane_f64(<2 x double> [[ARG:%.*]])
+// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x double>, i64 1, align 16
+// LLVM: store <2 x double> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <2 x double>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <2 x double> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <2 x double>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <2 x double> [[INTRN_ARG]], i32 1
+// LLVM: ret double {{%.*}}
diff --git a/clang/test/CIR/CodeGen/aarch64-neon-vset.c b/clang/test/CIR/CodeGen/aarch64-neon-vset.c
new file mode 100644
index 000000000000..5da779ff69eb
--- /dev/null
+++ b/clang/test/CIR/CodeGen/aarch64-neon-vset.c
@@ -0,0 +1,238 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:            -emit-cir -target-feature +neon %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:            -emit-llvm -target-feature +neon %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// This test file is similar to but not the same as 
+// clang/test/CodeGen/aarch64-neon-vget.c 
+// The difference is that this file only tests uses vset intrinsics, as we feel
+// it would be proper to have a separate test file testing vget intrinsics 
+// with the file name aarch64-neon-vget.c 
+// Also, for each integer type, we only test signed or unsigned, not both. 
+// This is because integer types of the same size just use same intrinsic.
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+#include <arm_neon.h>
+
+uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
+  return vset_lane_u8(a, b, 7);
+}
+
+// CIR-LABEL: test_vset_lane_u8
+// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i loc(#loc7)
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s8i x 8>
+
+// LLVM: define dso_local <8 x i8> @test_vset_lane_u8(i8 [[A:%.*]], <8 x i8> [[B:%.*]])
+// LLVM: [[A_ADR:%.*]] = alloca i8, i64 1, align 1
+// LLVM: [[B_ADR:%.*]] = alloca <8 x i8>, i64 1, align 8
+// LLVM: store i8 [[A]], ptr [[A_ADR]], align 1
+// LLVM: store <8 x i8> [[B]], ptr [[B_ADR]], align 8
+// LLVM: [[TMP_A0:%.*]] = load i8, ptr [[A_ADR]], align 1
+// LLVM: store i8 [[TMP_A0]], ptr [[S0:%.*]], align 1
+// LLVM: [[TMP_B0:%.*]] = load <8 x i8>, ptr [[B_ADR]], align 8
+// LLVM: store <8 x i8> [[TMP_B0]], ptr [[S1:%.*]], align 8
+// LLVM: [[INTRN_ARG0:%.*]] = load i8, ptr [[S0]], align 1
+// LLVM: [[INTRN_ARG1:%.*]] = load <8 x i8>, ptr [[S1]], align 8
+// LLVM: [[INTRN_RES:%.*]] = insertelement <8 x i8> [[INTRN_ARG1]], i8 [[INTRN_ARG0]], i32 7
+// LLVM: ret <8 x i8> {{%.*}}
+
+uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
+  return vset_lane_u16(a, b, 3);
+}
+
+// CIR-LABEL: test_vset_lane_u16
+// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s16i x 4>
+
+// LLVM: define dso_local <4 x i16> @test_vset_lane_u16(i16 [[A:%.*]], <4 x i16> [[B:%.*]])
+// LLVM: [[A_ADR:%.*]] = alloca i16, i64 1, align 2
+// LLVM: [[B_ADR:%.*]] = alloca <4 x i16>, i64 1, align 8
+// LLVM: store i16 [[A]], ptr [[A_ADR]], align 2
+// LLVM: store <4 x i16> [[B]], ptr [[B_ADR]], align 8
+// LLVM: [[TMP_A0:%.*]] = load i16, ptr [[A_ADR]], align 2
+// LLVM: store i16 [[TMP_A0]], ptr [[S0:%.*]], align 2
+// LLVM: [[TMP_B0:%.*]] = load <4 x i16>, ptr [[B_ADR]], align 8
+// LLVM: store <4 x i16> [[TMP_B0]], ptr [[S1:%.*]], align 8
+// LLVM: [[INTRN_ARG0:%.*]] = load i16, ptr [[S0]], align 2
+// LLVM: [[INTRN_ARG1:%.*]] = load <4 x i16>, ptr [[S1]], align 8
+// LLVM: [[INTRN_RES:%.*]] = insertelement <4 x i16> [[INTRN_ARG1]], i16 [[INTRN_ARG0]], i32 3
+// LLVM: ret <4 x i16> {{%.*}}
+
+uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
+  return vset_lane_u32(a, b, 1);
+}
+
+// CIR-LABEL: test_vset_lane_u32
+// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s32i x 2>
+
+// LLVM: define dso_local <2 x i32> @test_vset_lane_u32(i32 [[A:%.*]], <2 x i32> [[B:%.*]])
+// LLVM: [[A_ADR:%.*]] = alloca i32, i64 1, align 4
+// LLVM: [[B_ADR:%.*]] = alloca <2 x i32>, i64 1, align 8
+// LLVM: store i32 [[A]], ptr [[A_ADR]], align 4
+// LLVM: store <2 x i32> [[B]], ptr [[B_ADR]], align 8
+// LLVM: [[TMP_A0:%.*]] = load i32, ptr [[A_ADR]], align 4
+// LLVM: store i32 [[TMP_A0]], ptr [[S0:%.*]], align 4
+// LLVM: [[TMP_B0:%.*]] = load <2 x i32>, ptr [[B_ADR]], align 8
+// LLVM: store <2 x i32> [[TMP_B0]], ptr [[S1:%.*]], align 8
+// LLVM: [[INTRN_ARG0:%.*]] = load i32, ptr [[S0]], align 4
+// LLVM: [[INTRN_ARG1:%.*]] = load <2 x i32>, ptr [[S1]], align 8
+// LLVM: [[INTRN_RES:%.*]] = insertelement <2 x i32> [[INTRN_ARG1]], i32 [[INTRN_ARG0]], i32 1
+// LLVM: ret <2 x i32> {{%.*}}
+
+
+int64x1_t test_vset_lane_u64(int64_t a, int64x1_t b) {
+  return vset_lane_u64(a, b, 0);
+}
+
+// CIR-LABEL: test_vset_lane_u64
+// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s64i x 1>
+
+// LLVM: define dso_local <1 x i64> @test_vset_lane_u64(i64 [[A:%.*]], <1 x i64> [[B:%.*]])
+// LLVM: [[A_ADR:%.*]] = alloca i64, i64 1, align 8
+// LLVM: [[B_ADR:%.*]] = alloca <1 x i64>, i64 1, align 8
+// LLVM: store i64 [[A]], ptr [[A_ADR]], align 8
+// LLVM: store <1 x i64> [[B]], ptr [[B_ADR]], align 8
+// LLVM: [[TMP_A0:%.*]] = load i64, ptr [[A_ADR]], align 8
+// LLVM: store i64 [[TMP_A0]], ptr [[S0:%.*]], align 8
+// LLVM: [[TMP_B0:%.*]] = load <1 x i64>, ptr [[B_ADR]], align 8
+// LLVM: store <1 x i64> [[TMP_B0]], ptr [[S1:%.*]], align 8
+// LLVM: [[INTRN_ARG0:%.*]] = load i64, ptr [[S0]], align 8
+// LLVM: [[INTRN_ARG1:%.*]] = load <1 x i64>, ptr [[S1]], align 8
+// LLVM: [[INTRN_RES:%.*]] = insertelement <1 x i64> [[INTRN_ARG1]], i64 [[INTRN_ARG0]], i32 0
+// LLVM: ret <1 x i64> {{%.*}}
+
+float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
+  return vset_lane_f32(a, b, 1);
+}
+
+// CIR-LABEL: test_vset_lane_f32
+// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
+
+// LLVM: define dso_local <2 x float> @test_vset_lane_f32(float [[A:%.*]], <2 x float> [[B:%.*]])
+// LLVM: [[A_ADR:%.*]] = alloca float, i64 1, align 4
+// LLVM: [[B_ADR:%.*]] = alloca <2 x float>, i64 1, align 8
+// LLVM: store float [[A]], ptr [[A_ADR]], align 4
+// LLVM: store <2 x float> [[B]], ptr [[B_ADR]], align 8
+// LLVM: [[TMP_A0:%.*]] = load float, ptr [[A_ADR]], align 4
+// LLVM: store float [[TMP_A0]], ptr [[S0:%.*]], align 4
+// LLVM: [[TMP_B0:%.*]] = load <2 x float>, ptr [[B_ADR]], align 8
+// LLVM: store <2 x float> [[TMP_B0]], ptr [[S1:%.*]], align 8
+// LLVM: [[INTRN_ARG0:%.*]] = load float, ptr [[S0]], align 4
+// LLVM: [[INTRN_ARG1:%.*]] = load <2 x float>, ptr [[S1]], align 8
+// LLVM: [[INTRN_RES:%.*]] = insertelement <2 x float> [[INTRN_ARG1]], float [[INTRN_ARG0]], i32 1
+// LLVM: ret <2 x float> {{%.*}}
+
+uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
+  return vsetq_lane_u8(a, b, 15);
+}
+
+// CIR-LABEL: test_vsetq_lane_u8
+// CIR: [[IDX:%.*]] = cir.const #cir.int<15> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s8i x 16>
+
+// LLVM: define dso_local <16 x i8> @test_vsetq_lane_u8(i8 [[A:%.*]], <16 x i8> [[B:%.*]])
+// LLVM: [[A_ADR:%.*]] = alloca i8, i64 1, align 1
+// LLVM: [[B_ADR:%.*]] = alloca <16 x i8>, i64 1, align 16
+// LLVM: store i8 [[A]], ptr [[A_ADR]], align 1
+// LLVM: store <16 x i8> [[B]], ptr [[B_ADR]], align 16
+// LLVM: [[TMP_A0:%.*]] = load i8, ptr [[A_ADR]], align 1
+// LLVM: store i8 [[TMP_A0]], ptr [[S0:%.*]], align 1
+// LLVM: [[TMP_B0:%.*]] = load <16 x i8>, ptr [[B_ADR]], align 16
+// LLVM: store <16 x i8> [[TMP_B0]], ptr [[S1:%.*]], align 16
+// LLVM: [[INTRN_ARG0:%.*]] = load i8, ptr [[S0]], align 1
+// LLVM: [[INTRN_ARG1:%.*]] = load <16 x i8>, ptr [[S1]], align 16
+// LLVM: [[INTRN_RES:%.*]] = insertelement <16 x i8> [[INTRN_ARG1]], i8 [[INTRN_ARG0]], i32 15
+// LLVM: ret <16 x i8> {{%.*}}
+
+uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
+  return vsetq_lane_u16(a, b, 7);
+}
+
+// CIR-LABEL: test_vsetq_lane_u16
+// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s16i x 8>
+
+// LLVM: define dso_local <8 x i16> @test_vsetq_lane_u16(i16 [[A:%.*]], <8 x i16> [[B:%.*]])
+// LLVM: [[A_ADR:%.*]] = alloca i16, i64 1, align 2
+// LLVM: [[B_ADR:%.*]] = alloca <8 x i16>, i64 1, align 16
+// LLVM: store i16 [[A]], ptr [[A_ADR]], align 2
+// LLVM: store <8 x i16> [[B]], ptr [[B_ADR]], align 16
+// LLVM: [[TMP_A0:%.*]] = load i16, ptr [[A_ADR]], align 2
+// LLVM: store i16 [[TMP_A0]], ptr [[S0:%.*]], align 2
+// LLVM: [[TMP_B0:%.*]] = load <8 x i16>, ptr [[B_ADR]], align 16
+// LLVM: store <8 x i16> [[TMP_B0]], ptr [[S1:%.*]], align 16
+// LLVM: [[INTRN_ARG0:%.*]] = load i16, ptr [[S0]], align 2
+// LLVM: [[INTRN_ARG1:%.*]] = load <8 x i16>, ptr [[S1]], align 16
+// LLVM: [[INTRN_RES:%.*]] = insertelement <8 x i16> [[INTRN_ARG1]], i16 [[INTRN_ARG0]], i32 7
+// LLVM: ret <8 x i16> {{%.*}}
+
+uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
+  return vsetq_lane_u32(a, b, 3);
+}
+
+// CIR-LABEL: test_vsetq_lane_u32
+// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s32i x 4>
+
+// LLVM: define dso_local <4 x i32> @test_vsetq_lane_u32(i32 [[A:%.*]], <4 x i32> [[B:%.*]])
+// LLVM: [[A_ADR:%.*]] = alloca i32, i64 1, align 4
+// LLVM: [[B_ADR:%.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: store i32 [[A]], ptr [[A_ADR]], align 4
+// LLVM: store <4 x i32> [[B]], ptr [[B_ADR]], align 16
+// LLVM: [[TMP_A0:%.*]] = load i32, ptr [[A_ADR]], align 4
+// LLVM: store i32 [[TMP_A0]], ptr [[S0:%.*]], align 4
+// LLVM: [[TMP_B0:%.*]] = load <4 x i32>, ptr [[B_ADR]], align 16
+// LLVM: store <4 x i32> [[TMP_B0]], ptr [[S1:%.*]], align 16
+// LLVM: [[INTRN_ARG0:%.*]] = load i32, ptr [[S0]], align 4
+// LLVM: [[INTRN_ARG1:%.*]] = load <4 x i32>, ptr [[S1]], align 16
+// LLVM: [[INTRN_RES:%.*]] = insertelement <4 x i32> [[INTRN_ARG1]], i32 [[INTRN_ARG0]], i32 3
+// LLVM: ret <4 x i32> {{%.*}}
+
+int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
+  return vsetq_lane_s64(a, b, 1);
+}
+
+// CIR-LABEL: test_vsetq_lane_s64
+// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s64i x 2>
+
+// LLVM: define dso_local <2 x i64> @test_vsetq_lane_s64(i64 [[A:%.*]], <2 x i64> [[B:%.*]])
+// LLVM: [[A_ADR:%.*]] = alloca i64, i64 1, align 8
+// LLVM: [[B_ADR:%.*]] = alloca <2 x i64>, i64 1, align 16
+// LLVM: store i64 [[A]], ptr [[A_ADR]], align 8
+// LLVM: store <2 x i64> [[B]], ptr [[B_ADR]], align 16
+// LLVM: [[TMP_A0:%.*]] = load i64, ptr [[A_ADR]], align 8
+// LLVM: store i64 [[TMP_A0]], ptr [[S0:%.*]], align 8
+// LLVM: [[TMP_B0:%.*]] = load <2 x i64>, ptr [[B_ADR]], align 16
+// LLVM: store <2 x i64> [[TMP_B0]], ptr [[S1:%.*]], align 16
+// LLVM: [[INTRN_ARG0:%.*]] = load i64, ptr [[S0]], align 8
+// LLVM: [[INTRN_ARG1:%.*]] = load <2 x i64>, ptr [[S1]], align 16
+// LLVM: [[INTRN_RES:%.*]] = insertelement <2 x i64> [[INTRN_ARG1]], i64 [[INTRN_ARG0]], i32 1
+// LLVM: ret <2 x i64> {{%.*}}
+
+float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
+  return vsetq_lane_f32(a, b, 3);
+}
+
+// CIR-LABEL: test_vsetq_lane_f32
+// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
+
+// LLVM: define dso_local <4 x float> @test_vsetq_lane_f32(float [[A:%.*]], <4 x float> [[B:%.*]])
+// LLVM: [[A_ADR:%.*]] = alloca float, i64 1, align 4
+// LLVM: [[B_ADR:%.*]] = alloca <4 x float>, i64 1, align 16
+// LLVM: store float [[A]], ptr [[A_ADR]], align 4
+// LLVM: store <4 x float> [[B]], ptr [[B_ADR]], align 16
+// LLVM: [[TMP_A0:%.*]] = load float, ptr [[A_ADR]], align 4
+// LLVM: store float [[TMP_A0]], ptr [[S0:%.*]], align 4
+// LLVM: [[TMP_B0:%.*]] = load <4 x float>, ptr [[B_ADR]], align 16
+// LLVM: store <4 x float> [[TMP_B0]], ptr [[S1:%.*]], align 16
+// LLVM: [[INTRN_ARG0:%.*]] = load float, ptr [[S0]], align 4
+// LLVM: [[INTRN_ARG1:%.*]] = load <4 x float>, ptr [[S1]], align 16
+// LLVM: [[INTRN_RES:%.*]] = insertelement <4 x float> [[INTRN_ARG1]], float [[INTRN_ARG0]], i32 3
+// LLVM: ret <4 x float> {{%.*}}
diff --git a/clang/test/CIR/CodeGen/abstract-cond.c b/clang/test/CIR/CodeGen/abstract-cond.c
new file mode 100644
index 000000000000..d724c8e1ea28
--- /dev/null
+++ b/clang/test/CIR/CodeGen/abstract-cond.c
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// ?: in "lvalue"
+struct s6 { int f0; };
+int f6(int a0, struct s6 a1, struct s6 a2) {
+  return (a0 ? a1 : a2).f0;
+}
+
+// CIR-LABEL: @f6
+// CIR:  %[[A0:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a0"
+// CIR:  %[[A1:.*]] = cir.alloca !ty_s6_, !cir.ptr<!ty_s6_>, ["a1"
+// CIR:  %[[A2:.*]] = cir.alloca !ty_s6_, !cir.ptr<!ty_s6_>, ["a2"
+// CIR:  %[[TMP:.*]] = cir.alloca !ty_s6_, !cir.ptr<!ty_s6_>, ["tmp"] {alignment = 4 : i64}
+// CIR:  %[[LOAD_A0:.*]] = cir.load %[[A0]] : !cir.ptr<!s32i>, !s32i
+// CIR:  %[[COND:.*]] = cir.cast(int_to_bool, %[[LOAD_A0]] : !s32i), !cir.bool
+// CIR:  cir.if %[[COND]] {
+// CIR:    cir.copy %[[A1]] to %[[TMP]] : !cir.ptr<!ty_s6_>
+// CIR:  } else {
+// CIR:    cir.copy %[[A2]] to %[[TMP]] : !cir.ptr<!ty_s6_>
+// CIR:  }
+// CIR:  cir.get_member %[[TMP]][0] {name = "f0"} : !cir.ptr<!ty_s6_> -> !cir.ptr<!s32i>
+
+// LLVM-LABEL: @f6
+// LLVM:    %[[LOAD_A0:.*]] = load i32, ptr {{.*}}
+// LLVM:    %[[COND:.*]] = icmp ne i32 %[[LOAD_A0]], 0
+// LLVM:    br i1 %[[COND]], label %[[A1_PATH:.*]], label %[[A2_PATH:.*]],
+// LLVM:  [[A2_PATH]]:
+// LLVM:    call void @llvm.memcpy.p0.p0.i32(ptr %[[TMP:.*]], ptr {{.*}}, i32 4, i1 false)
+// LLVM:    br label %[[EXIT:[a-z0-9]+]]
+// LLVM:  [[A1_PATH]]:
+// LLVM:    call void @llvm.memcpy.p0.p0.i32(ptr %[[TMP]], ptr {{.*}}, i32 4, i1 false)
+// LLVM:    br label %[[EXIT]]
+// LLVM:  [[EXIT]]:
+// LLVM:    getelementptr {{.*}}, ptr %[[TMP]], i32 0, i32 0
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/address-space-conversion.cpp b/clang/test/CIR/CodeGen/address-space-conversion.cpp
new file mode 100644
index 000000000000..1f4ee00ca63b
--- /dev/null
+++ b/clang/test/CIR/CodeGen/address-space-conversion.cpp
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+using pi1_t = int __attribute__((address_space(1))) *;
+using pi2_t = int __attribute__((address_space(2))) *;
+
+using ri1_t = int __attribute__((address_space(1))) &;
+using ri2_t = int __attribute__((address_space(2))) &;
+
+// CIR: cir.func @{{.*test_ptr.*}}
+// LLVM: define dso_local void @{{.*test_ptr.*}}
+void test_ptr() {
+  pi1_t ptr1;
+  pi2_t ptr2 = (pi2_t)ptr1;
+  // CIR:      %[[#PTR1:]] = cir.load %{{[0-9]+}} : !cir.ptr<!cir.ptr<!s32i, addrspace(target<1>)>>, !cir.ptr<!s32i, addrspace(target<1>)>
+  // CIR-NEXT: %[[#CAST:]] = cir.cast(address_space, %[[#PTR1]] : !cir.ptr<!s32i, addrspace(target<1>)>), !cir.ptr<!s32i, addrspace(target<2>)>
+  // CIR-NEXT: cir.store %[[#CAST]], %{{[0-9]+}} : !cir.ptr<!s32i, addrspace(target<2>)>, !cir.ptr<!cir.ptr<!s32i, addrspace(target<2>)>>
+
+  // LLVM:      %[[#PTR1:]] = load ptr addrspace(1), ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: %[[#CAST:]] = addrspacecast ptr addrspace(1) %[[#PTR1]] to ptr addrspace(2)
+  // LLVM-NEXT: store ptr addrspace(2) %[[#CAST]], ptr %{{[0-9]+}}, align 8
+}
+
+// CIR: cir.func @{{.*test_ref.*}}
+// LLVM: define dso_local void @{{.*test_ref.*}}
+void test_ref() {
+  pi1_t ptr;
+  ri1_t ref1 = *ptr;
+  ri2_t ref2 = (ri2_t)ref1;
+  // CIR:      %[[#DEREF:]] = cir.load deref %{{[0-9]+}} : !cir.ptr<!cir.ptr<!s32i, addrspace(target<1>)>>, !cir.ptr<!s32i, addrspace(target<1>)>
+  // CIR-NEXT: cir.store %[[#DEREF]], %[[#ALLOCAREF1:]] : !cir.ptr<!s32i, addrspace(target<1>)>, !cir.ptr<!cir.ptr<!s32i, addrspace(target<1>)>>
+  // CIR-NEXT: %[[#REF1:]] = cir.load %[[#ALLOCAREF1]] : !cir.ptr<!cir.ptr<!s32i, addrspace(target<1>)>>, !cir.ptr<!s32i, addrspace(target<1>)>
+  // CIR-NEXT: %[[#CAST:]] = cir.cast(address_space, %[[#REF1]] : !cir.ptr<!s32i, addrspace(target<1>)>), !cir.ptr<!s32i, addrspace(target<2>)>
+  // CIR-NEXT: cir.store %[[#CAST]], %{{[0-9]+}} : !cir.ptr<!s32i, addrspace(target<2>)>, !cir.ptr<!cir.ptr<!s32i, addrspace(target<2>)>>
+
+  // LLVM:      %[[#DEREF:]] = load ptr addrspace(1), ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: store ptr addrspace(1) %[[#DEREF]], ptr %[[#ALLOCAREF1:]], align 8
+  // LLVM-NEXT: %[[#REF1:]] = load ptr addrspace(1), ptr %[[#ALLOCAREF1]], align 8
+  // LLVM-NEXT: %[[#CAST:]] = addrspacecast ptr addrspace(1) %[[#REF1]] to ptr addrspace(2)
+  // LLVM-NEXT: store ptr addrspace(2) %[[#CAST]], ptr %{{[0-9]+}}, align 8
+}
+
+// CIR: cir.func @{{.*test_nullptr.*}}
+// LLVM: define dso_local void @{{.*test_nullptr.*}}
+void test_nullptr() {
+  constexpr pi1_t null1 = nullptr;
+  pi2_t ptr = (pi2_t)null1;
+  // CIR:      %[[#NULL1:]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i, addrspace(target<1>)>
+  // CIR-NEXT: cir.store %[[#NULL1]], %{{[0-9]+}} : !cir.ptr<!s32i, addrspace(target<1>)>, !cir.ptr<!cir.ptr<!s32i, addrspace(target<1>)>>
+  // CIR-NEXT: %[[#NULL2:]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i, addrspace(target<2>)>
+  // CIR-NEXT: cir.store %[[#NULL2]], %{{[0-9]+}} : !cir.ptr<!s32i, addrspace(target<2>)>, !cir.ptr<!cir.ptr<!s32i, addrspace(target<2>)>>
+
+  // LLVM:      store ptr addrspace(1) null, ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: store ptr addrspace(2) null, ptr %{{[0-9]+}}, align 8
+}
+
+void test_side_effect(pi1_t b) {
+  pi2_t p = (pi2_t)(*b++, (int*)0);
+  // CIR:      %{{[0-9]+}} = cir.ptr_stride(%{{[0-9]+}} : !cir.ptr<!s32i, addrspace(target<1>)>, %{{[0-9]+}} : !s32i), !cir.ptr<!s32i, addrspace(target<1>)>
+  // CIR:      %[[#CAST:]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i, addrspace(target<2>)>
+  // CIR-NEXT: cir.store %[[#CAST]], %{{[0-9]+}} : !cir.ptr<!s32i, addrspace(target<2>)>, !cir.ptr<!cir.ptr<!s32i, addrspace(target<2>)>>
+
+  // LLVM:      %{{[0-9]+}} = getelementptr i32, ptr addrspace(1) %{{[0-9]+}}, i64 1
+  // LLVM:      store ptr addrspace(2) null, ptr %{{[0-9]+}}, align 8
+
+}
diff --git a/clang/test/CIR/CodeGen/address-space.c b/clang/test/CIR/CodeGen/address-space.c
new file mode 100644
index 000000000000..b1b741594257
--- /dev/null
+++ b/clang/test/CIR/CodeGen/address-space.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+// CIR: cir.func {{@.*foo.*}}(%arg0: !cir.ptr<!s32i, addrspace(target<1>)>
+// LLVM: define dso_local void @foo(ptr addrspace(1) %0)
+void foo(int __attribute__((address_space(1))) *arg) {
+  return;
+}
+
+// CIR: cir.func {{@.*bar.*}}(%arg0: !cir.ptr<!s32i, addrspace(target<0>)>
+// LLVM: define dso_local void @bar(ptr %0)
+void bar(int __attribute__((address_space(0))) *arg) {
+  return;
+}
+
+// CIR: cir.func {{@.*baz.*}}(%arg0: !cir.ptr<!s32i>
+// LLVM: define dso_local void @baz(ptr %0)
+void baz(int *arg) {
+  return;
+}
diff --git a/clang/test/CIR/CodeGen/agg-copy.c b/clang/test/CIR/CodeGen/agg-copy.c
new file mode 100644
index 000000000000..d29f296d878d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/agg-copy.c
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef struct {} S;
+
+typedef struct {
+    int a;
+    int b;
+    S s;
+} A;
+
+// CHECK: cir.func @foo1
+// CHECK:   [[TMP0:%.*]] = cir.alloca !cir.ptr<!ty_A>, !cir.ptr<!cir.ptr<!ty_A>>, ["a1", init]
+// CHECK:   [[TMP1:%.*]] = cir.alloca !cir.ptr<!ty_A>, !cir.ptr<!cir.ptr<!ty_A>>, ["a2", init]
+// CHECK:   cir.store %arg0, [[TMP0]] : !cir.ptr<!ty_A>, !cir.ptr<!cir.ptr<!ty_A>>
+// CHECK:   cir.store %arg1, [[TMP1]] : !cir.ptr<!ty_A>, !cir.ptr<!cir.ptr<!ty_A>>
+// CHECK:   [[TMP2:%.*]] = cir.load [[TMP0]] : !cir.ptr<!cir.ptr<!ty_A>>, !cir.ptr<!ty_A>
+// CHECK:   [[TMP3:%.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:   [[TMP4:%.*]] = cir.ptr_stride([[TMP2]] : !cir.ptr<!ty_A>, [[TMP3]] : !s32i), !cir.ptr<!ty_A>
+// CHECK:   [[TMP5:%.*]] = cir.load [[TMP1]] : !cir.ptr<!cir.ptr<!ty_A>>, !cir.ptr<!ty_A>
+// CHECK:   [[TMP6:%.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:   [[TMP7:%.*]] = cir.ptr_stride([[TMP5]] : !cir.ptr<!ty_A>, [[TMP6]] : !s32i), !cir.ptr<!ty_A>
+// CHECK:   cir.copy [[TMP7]] to [[TMP4]] : !cir.ptr<!ty_A>
+void foo1(A* a1, A* a2) {
+    a1[1] = a2[1];
+}
+
+// CHECK: cir.func @foo2
+// CHECK:    [[TMP0:%.*]] = cir.alloca !cir.ptr<!ty_A>, !cir.ptr<!cir.ptr<!ty_A>>, ["a1", init]
+// CHECK:    [[TMP1:%.*]] = cir.alloca !cir.ptr<!ty_A>, !cir.ptr<!cir.ptr<!ty_A>>, ["a2", init]
+// CHECK:    cir.store %arg0, [[TMP0]] : !cir.ptr<!ty_A>, !cir.ptr<!cir.ptr<!ty_A>>
+// CHECK:    cir.store %arg1, [[TMP1]] : !cir.ptr<!ty_A>, !cir.ptr<!cir.ptr<!ty_A>>
+// CHECK:    [[TMP2:%.*]] = cir.load [[TMP0]] : !cir.ptr<!cir.ptr<!ty_A>>, !cir.ptr<!ty_A>
+// CHECK:    [[TMP3:%.*]] = cir.get_member [[TMP2]][2] {name = "s"} : !cir.ptr<!ty_A> -> !cir.ptr<!ty_S>
+// CHECK:    [[TMP4:%.*]] = cir.load [[TMP1]] : !cir.ptr<!cir.ptr<!ty_A>>, !cir.ptr<!ty_A>
+// CHECK:    [[TMP5:%.*]] = cir.get_member [[TMP4]][2] {name = "s"} : !cir.ptr<!ty_A> -> !cir.ptr<!ty_S>
+// CHECK:    cir.copy [[TMP5]] to [[TMP3]] : !cir.ptr<!ty_S>
+void foo2(A* a1, A* a2) {
+    a1->s = a2->s;
+}
+
+// CHECK: cir.global external @a = #cir.zero : !ty_A
+// CHECK: cir.func @foo3
+// CHECK:    [[TMP0]] = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["__retval"] {alignment = 4 : i64}
+// CHECK:    [[TMP1]] = cir.get_global @a : !cir.ptr<!ty_A>
+// CHECK:    cir.copy [[TMP1]] to [[TMP0]] : !cir.ptr<!ty_A>
+// CHECK:    [[TMP2]] = cir.load [[TMP0]] : !cir.ptr<!ty_A>, !ty_A
+// CHECK:    cir.return [[TMP2]] : !ty_A
+A a;
+A foo3(void) {
+    return a;
+}
+
+// CHECK: cir.func @foo4
+// CHECK:    [[TMP0]] = cir.alloca !cir.ptr<!ty_A>, !cir.ptr<!cir.ptr<!ty_A>>, ["a1", init]
+// CHECK:    [[TMP1]] = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["a2", init]
+// CHECK:    cir.store %arg0, [[TMP0]] : !cir.ptr<!ty_A>, !cir.ptr<!cir.ptr<!ty_A>>
+// CHECK:    [[TMP2]] = cir.load deref [[TMP0]] : !cir.ptr<!cir.ptr<!ty_A>>, !cir.ptr<!ty_A>
+// CHECK:    cir.copy [[TMP2]] to [[TMP1]] : !cir.ptr<!ty_A>
+void foo4(A* a1) {
+    A a2 = *a1;
+}
+
+A create() { A a; return a; }
+
+// CHECK: cir.func {{.*@foo5}}
+// CHECK:   [[TMP0:%.*]] = cir.alloca !ty_A, !cir.ptr<!ty_A>,
+// CHECK:   [[TMP1:%.*]] = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["tmp"] {alignment = 4 : i64}
+// CHECK:   [[TMP2:%.*]] = cir.call @create() : () -> !ty_A
+// CHECK:   cir.store [[TMP2]], [[TMP1]] : !ty_A, !cir.ptr<!ty_A>
+// CHECK:   cir.copy [[TMP1]] to [[TMP0]] : !cir.ptr<!ty_A>
+void foo5() {
+    A a;
+    a = create();
+}
+
+void foo6(A* a1) {
+  A a2 = (*a1);
+// CHECK: cir.func {{.*@foo6}}
+// CHECK:   [[TMP0:%.*]] = cir.alloca !cir.ptr<!ty_A>, !cir.ptr<!cir.ptr<!ty_A>>, ["a1", init] {alignment = 8 : i64}
+// CHECK:   [[TMP1:%.*]] = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["a2", init] {alignment = 4 : i64}
+// CHECK:   cir.store %arg0, [[TMP0]] : !cir.ptr<!ty_A>, !cir.ptr<!cir.ptr<!ty_A>>
+// CHECK:   [[TMP2:%.*]] = cir.load deref [[TMP0]] : !cir.ptr<!cir.ptr<!ty_A>>, !cir.ptr<!ty_A>
+// CHECK:   cir.copy [[TMP2]] to [[TMP1]] : !cir.ptr<!ty_A>
+}
+
+volatile A vol_a;
+A foo7() {
+  return vol_a;
+}
+// CHECK: cir.func {{.*@foo7}}
+// CHECK:   %0 = cir.alloca
+// CHECK:   %1 = cir.get_global @vol_a
+// CHECK:   cir.copy %1 to %0 volatile
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/agg-init.cpp b/clang/test/CIR/CodeGen/agg-init.cpp
new file mode 100644
index 000000000000..0f99c8574e0f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/agg-init.cpp
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: !ty_yep_ = !cir.struct<struct "yep_" {!cir.int<u, 32>, !cir.int<u, 32>}>
+
+typedef enum xxy_ {
+  xxy_Low = 0,
+  xxy_High = 0x3f800000,
+  xxy_EnumSize = 0x7fffffff
+} xxy;
+
+typedef struct yep_ {
+  unsigned int Status;
+  xxy HC;
+} yop;
+
+void use() { yop{}; }
+
+// CHECK: cir.func @_Z3usev()
+// CHECK:   %0 = cir.alloca !ty_yep_, !cir.ptr<!ty_yep_>, ["agg.tmp.ensured"] {alignment = 4 : i64}
+// CHECK:   %1 = cir.get_member %0[0] {name = "Status"} : !cir.ptr<!ty_yep_> -> !cir.ptr<!u32i>
+// CHECK:   %2 = cir.const #cir.int<0> : !u32i
+// CHECK:   cir.store %2, %1 : !u32i, !cir.ptr<!u32i>
+// CHECK:   %3 = cir.get_member %0[1] {name = "HC"} : !cir.ptr<!ty_yep_> -> !cir.ptr<!u32i>
+// CHECK:   %4 = cir.const #cir.int<0> : !u32i
+// CHECK:   cir.store %4, %3 : !u32i, !cir.ptr<!u32i>
+// CHECK:   cir.return
+// CHECK: }
+
+typedef unsigned long long Flags;
+
+typedef enum XType {
+    A = 0,
+    Y = 1000066001,
+    X = 1000070000
+} XType;
+
+typedef struct Yo {
+    XType type;
+    const void* __attribute__((__may_alias__)) next;
+    Flags createFlags;
+} Yo;
+
+void yo() {
+  Yo ext = {X};
+  Yo ext2 = {Y, &ext};
+}
+
+// CHECK: cir.func @_Z2yov()
+// CHECK:   %0 = cir.alloca !ty_Yo, !cir.ptr<!ty_Yo>, ["ext"] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !ty_Yo, !cir.ptr<!ty_Yo>, ["ext2", init] {alignment = 8 : i64}
+// CHECK:   %2 = cir.const #cir.const_struct<{#cir.int<1000070000> : !u32i, #cir.ptr<null> : !cir.ptr<!void>, #cir.int<0> : !u64i}> : !ty_Yo
+// CHECK:   cir.store %2, %0 : !ty_Yo, !cir.ptr<!ty_Yo>
+// CHECK:   %3 = cir.get_member %1[0] {name = "type"} : !cir.ptr<!ty_Yo> -> !cir.ptr<!u32i>
+// CHECK:   %4 = cir.const #cir.int<1000066001> : !u32i
+// CHECK:   cir.store %4, %3 : !u32i, !cir.ptr<!u32i>
+// CHECK:   %5 = cir.get_member %1[1] {name = "next"} : !cir.ptr<!ty_Yo> -> !cir.ptr<!cir.ptr<!void>>
+// CHECK:   %6 = cir.cast(bitcast, %0 : !cir.ptr<!ty_Yo>), !cir.ptr<!void>
+// CHECK:   cir.store %6, %5 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CHECK:   %7 = cir.get_member %1[2] {name = "createFlags"} : !cir.ptr<!ty_Yo> -> !cir.ptr<!u64i>
+// CHECK:   %8 = cir.const #cir.int<0> : !u64i
+// CHECK:   cir.store %8, %7 : !u64i, !cir.ptr<!u64i>
diff --git a/clang/test/CIR/CodeGen/agg-init2.cpp b/clang/test/CIR/CodeGen/agg-init2.cpp
new file mode 100644
index 000000000000..644e09ae7c1b
--- /dev/null
+++ b/clang/test/CIR/CodeGen/agg-init2.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: !ty_Zero = !cir.struct<struct "Zero" {!cir.int<u, 8>}>
+
+struct Zero {
+  void yolo();
+};
+
+void f() {
+  Zero z0 = Zero();
+  // {} no element init.
+  Zero z1 = Zero{};
+}
+
+// CHECK: cir.func @_Z1fv()
+// CHECK:     %0 = cir.alloca !ty_Zero, !cir.ptr<!ty_Zero>, ["z0", init]
+// CHECK:     %1 = cir.alloca !ty_Zero, !cir.ptr<!ty_Zero>, ["z1"]
+// CHECK:     cir.call @_ZN4ZeroC1Ev(%0) : (!cir.ptr<!ty_Zero>) -> ()
+// CHECK:     cir.return
diff --git a/clang/test/CIR/CodeGen/analysis-only.cpp b/clang/test/CIR/CodeGen/analysis-only.cpp
new file mode 100644
index 000000000000..7f427f0de92f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/analysis-only.cpp
@@ -0,0 +1,8 @@
+// Check `-fclangir-analysis-only` would generate code correctly.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir-analysis-only -std=c++20 \
+// RUN:     -O2 -emit-llvm %s -o - | FileCheck %s
+
+extern "C" void foo() {}
+
+// CHECK: define{{.*}} @foo(
+
diff --git a/clang/test/CIR/CodeGen/arm-neon-directed-rounding.c b/clang/test/CIR/CodeGen/arm-neon-directed-rounding.c
new file mode 100644
index 000000000000..92b4a9298eac
--- /dev/null
+++ b/clang/test/CIR/CodeGen/arm-neon-directed-rounding.c
@@ -0,0 +1,100 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:            -ffreestanding -emit-cir -target-feature +neon %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:            -ffreestanding -emit-llvm -target-feature +neon %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+#include <arm_neon.h>
+
+float32_t test_vrndns_f32(float32_t a) {
+  return vrndns_f32(a);
+}
+// CIR: cir.func internal private  @vrndns_f32(%arg0: !cir.float {{.*}}) -> !cir.float
+// CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.float, !cir.ptr<!cir.float> 
+// CIR: [[INTRIN_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.float>, !cir.float 
+// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.roundeven.f32" [[INTRIN_ARG]] : (!cir.float)
+// CIR: cir.return {{%.*}} : !cir.float
+
+// CIR-LABEL: test_vrndns_f32
+// CIR: cir.store %arg0, [[ARG_SAVE0:%.*]] : !cir.float, !cir.ptr<!cir.float> 
+// CIR: [[FUNC_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.float>, !cir.float 
+// CIR: [[FUNC_RES:%.*]] = cir.call @vrndns_f32([[FUNC_ARG]]) : (!cir.float) -> !cir.float
+// CIR: cir.store [[FUNC_RES]], [[RET_P:%.*]] : !cir.float, !cir.ptr<!cir.float>
+// CIR: [[RET_VAL:%.*]] = cir.load [[RET_P]] : !cir.ptr<!cir.float>, !cir.float
+// CIR: cir.return [[RET_VAL]] : !cir.float loc
+
+// LLVM: define dso_local float @test_vrndns_f32(float [[ARG:%.*]])
+// LLVM: store float [[ARG]], ptr [[ARG_SAVE:%.*]], align 4
+// LLVM: [[P0:%.*]] = load float, ptr [[ARG_SAVE]], align 4,
+// LLVM: store float [[P0]], ptr [[P0_SAVE:%.*]], align 4,
+// LLVM: [[INTRIN_ARG:%.*]] = load float, ptr [[P0_SAVE]], align 4,
+// LLVM: [[INTRIN_RES:%.*]] = call float @llvm.roundeven.f32(float [[INTRIN_ARG]])
+// LLVM: store float [[INTRIN_RES]], ptr [[RES_SAVE0:%.*]], align 4, 
+// LLVM: [[RES_COPY0:%.*]] = load float, ptr [[RES_SAVE0]], align 4,
+// LLVM: store float [[RES_COPY0]], ptr [[RES_SAVE1:%.*]], align 4,
+// LLVM: [[RES_COPY1:%.*]] = load float, ptr [[RES_SAVE1]], align 4,
+// LLVM: store float [[RES_COPY1]], ptr [[RET_P:%.*]], align 4,
+// LLVM: [[RET_VAL:%.*]] = load float, ptr [[RET_P]], align 4,
+// LLVM: ret float [[RET_VAL]]
+
+float32x2_t test_vrnda_f32(float32x2_t a) {
+  return vrnda_f32(a);
+}
+
+// CIR: cir.func internal private  @vrnda_f32(%arg0: !cir.vector<!cir.float x 2>
+// CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.vector<!cir.float x 2>, !cir.ptr<!cir.vector<!cir.float x 2>>
+// CIR: [[INTRIN_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.vector<!cir.float x 2>>, !cir.vector<!cir.float x 2>
+// CIR: [[INTRIN_ARG_CAST:%.*]] = cir.cast(bitcast, [[INTRIN_ARG]] : !cir.vector<!cir.float x 2>), !cir.vector<!s8i x 8>
+// CIR: [[INTRIN_ARG_BACK:%.*]] = cir.cast(bitcast, [[INTRIN_ARG_CAST]] : !cir.vector<!s8i x 8>), !cir.vector<!cir.float x 2>
+// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.round" [[INTRIN_ARG_BACK]] : (!cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
+// CIR: cir.return {{%.*}} : !cir.vector<!cir.float x 2>
+
+// CIR-LABEL: test_vrnda_f32
+// CIR: cir.store %arg0, [[ARG_SAVE0:%.*]] :  !cir.vector<!cir.float x 2>, !cir.ptr<!cir.vector<!cir.float x 2>> 
+// CIR: [[FUNC_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.vector<!cir.float x 2>>, !cir.vector<!cir.float x 2> 
+// CIR: [[FUNC_RES:%.*]] = cir.call @vrnda_f32([[FUNC_ARG]]) : (!cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
+// CIR: cir.store [[FUNC_RES]], [[RET_P:%.*]] : !cir.vector<!cir.float x 2>, !cir.ptr<!cir.vector<!cir.float x 2>>
+// CIR: [[RET_VAL:%.*]] = cir.load [[RET_P]] : !cir.ptr<!cir.vector<!cir.float x 2>>, !cir.vector<!cir.float x 2>
+// CIR: cir.return [[RET_VAL]] : !cir.vector<!cir.float x 2>
+
+// LLVM: define dso_local <2 x float> @test_vrnda_f32(<2 x float> [[ARG:%.*]])
+// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: [[P0:%.*]] = load <2 x float>, ptr [[ARG_SAVE]], align 8,
+// LLVM: store <2 x float> [[P0]], ptr [[P0_SAVE:%.*]], align 8,
+// LLVM: [[INTRIN_ARG:%.*]] = load <2 x float>, ptr [[P0_SAVE]], align 8,
+// LLVM: [[INTRIN_RES:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> [[INTRIN_ARG]])
+// LLVM: store <2 x float> [[INTRIN_RES]], ptr [[RES_SAVE0:%.*]], align 8,
+// LLVM: [[RES_COPY0:%.*]] = load <2 x float>, ptr [[RES_SAVE0]], align 8,
+// LLVM: store <2 x float> [[RES_COPY0]], ptr [[RES_SAVE1:%.*]], align 8,
+// LLVM: [[RES_COPY1:%.*]] = load <2 x float>, ptr [[RES_SAVE1]], align 8,
+// LLVM: store <2 x float> [[RES_COPY1]], ptr [[RET_P:%.*]], align 8,
+// LLVM: [[RET_VAL:%.*]] = load <2 x float>, ptr [[RET_P]], align 8,
+// LLVM: ret <2 x float> [[RET_VAL]]
+
+float32x4_t test_vrndaq_f32(float32x4_t a) {
+  return vrndaq_f32(a);
+}
+
+// CIR: cir.func internal private  @vrndaq_f32(%arg0: !cir.vector<!cir.float x 4>
+// CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
+// CIR: [[INTRIN_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.vector<!cir.float x 4>>, !cir.vector<!cir.float x 4>
+// CIR: [[INTRIN_ARG_CAST:%.*]] = cir.cast(bitcast, [[INTRIN_ARG]] : !cir.vector<!cir.float x 4>), !cir.vector<!s8i x 16>
+// CIR: [[INTRIN_ARG_BACK:%.*]] = cir.cast(bitcast, [[INTRIN_ARG_CAST]] : !cir.vector<!s8i x 16>), !cir.vector<!cir.float x 4>
+// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.round" [[INTRIN_ARG_BACK]] : (!cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+// CIR: cir.return {{%.*}} : !cir.vector<!cir.float x 4>
+
+// LLVM: define dso_local <4 x float> @test_vrndaq_f32(<4 x float> [[ARG:%.*]])
+// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: [[P0:%.*]] = load <4 x float>, ptr [[ARG_SAVE]], align 16,
+// LLVM: store <4 x float> [[P0]], ptr [[P0_SAVE:%.*]], align 16,
+// LLVM: [[INTRIN_ARG:%.*]] = load <4 x float>, ptr [[P0_SAVE]], align 16,
+// LLVM: [[INTRIN_RES:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[INTRIN_ARG]])
+// LLVM: store <4 x float> [[INTRIN_RES]], ptr [[RES_SAVE0:%.*]], align 16,
+// LLVM: [[RES_COPY0:%.*]] = load <4 x float>, ptr [[RES_SAVE0]], align 16,
+// LLVM: store <4 x float> [[RES_COPY0]], ptr [[RES_SAVE1:%.*]], align 16,
+// LLVM: [[RES_COPY1:%.*]] = load <4 x float>, ptr [[RES_SAVE1]], align 16,
+// LLVM: store <4 x float> [[RES_COPY1]], ptr [[RET_P:%.*]], align 16,
+// LLVM: [[RET_VAL:%.*]] = load <4 x float>, ptr [[RET_P]], align 16,
+// LLVM: ret <4 x float> [[RET_VAL]]
diff --git a/clang/test/CIR/CodeGen/array-init-destroy.cpp b/clang/test/CIR/CodeGen/array-init-destroy.cpp
new file mode 100644
index 000000000000..9bc39cec84c0
--- /dev/null
+++ b/clang/test/CIR/CodeGen/array-init-destroy.cpp
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t1.cir 2>&1 | FileCheck -check-prefix=BEFORE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t2.cir 2>&1 | FileCheck -check-prefix=AFTER %s
+
+void foo() noexcept;
+
+class xpto {
+public:
+  xpto() {
+    foo();
+  }
+  int i;
+  float f;
+  ~xpto() {
+    foo();
+  }
+};
+
+void x() {
+  xpto array[2];
+}
+
+// BEFORE: cir.func @_Z1xv()
+// BEFORE:   %[[ArrayAddr:.*]] = cir.alloca !cir.array<!ty_xpto x 2>
+
+// BEFORE:   cir.array.ctor(%[[ArrayAddr]] : !cir.ptr<!cir.array<!ty_xpto x 2>>) {
+// BEFORE:   ^bb0(%arg0: !cir.ptr<!ty_xpto>
+// BEFORE:     cir.call @_ZN4xptoC1Ev(%arg0) : (!cir.ptr<!ty_xpto>) -> ()
+// BEFORE:     cir.yield
+// BEFORE:   }
+
+// BEFORE:   cir.array.dtor(%[[ArrayAddr]] : !cir.ptr<!cir.array<!ty_xpto x 2>>) {
+// BEFORE:   ^bb0(%arg0: !cir.ptr<!ty_xpto>
+// BEFORE:     cir.call @_ZN4xptoD1Ev(%arg0) : (!cir.ptr<!ty_xpto>) -> ()
+// BEFORE:     cir.yield
+// BEFORE:   }
+
+// AFTER: cir.func @_Z1xv()
+// AFTER: %[[ArrayAddr0:.*]] = cir.alloca !cir.array<!ty_xpto x 2>
+// AFTER: %[[ConstTwo:.*]] = cir.const #cir.int<2> : !u64i
+// AFTER: %[[ArrayBegin:.*]] = cir.cast(array_to_ptrdecay, %[[ArrayAddr0]] : !cir.ptr<!cir.array<!ty_xpto x 2>>), !cir.ptr<!ty_xpto>
+// AFTER: %[[ArrayPastEnd:.*]] = cir.ptr_stride(%[[ArrayBegin]] : !cir.ptr<!ty_xpto>, %[[ConstTwo]] : !u64i), !cir.ptr<!ty_xpto>
+// AFTER: %[[TmpIdx:.*]] = cir.alloca !cir.ptr<!ty_xpto>, !cir.ptr<!cir.ptr<!ty_xpto>>, ["__array_idx"] {alignment = 1 : i64}
+// AFTER: cir.store %[[ArrayBegin]], %[[TmpIdx]] : !cir.ptr<!ty_xpto>, !cir.ptr<!cir.ptr<!ty_xpto>>
+// AFTER: cir.do {
+// AFTER:   %[[ArrayElt:.*]] = cir.load %[[TmpIdx]] : !cir.ptr<!cir.ptr<!ty_xpto>>, !cir.ptr<!ty_xpto>
+// AFTER:   %[[ConstOne:.*]] = cir.const #cir.int<1> : !u64i
+// AFTER:   cir.call @_ZN4xptoC1Ev(%[[ArrayElt]]) : (!cir.ptr<!ty_xpto>) -> ()
+// AFTER:   %[[NextElt:.*]] = cir.ptr_stride(%[[ArrayElt]] : !cir.ptr<!ty_xpto>, %[[ConstOne]] : !u64i), !cir.ptr<!ty_xpto>
+// AFTER:   cir.store %[[NextElt]], %[[TmpIdx]] : !cir.ptr<!ty_xpto>, !cir.ptr<!cir.ptr<!ty_xpto>>
+// AFTER:   cir.yield
+// AFTER: } while {
+// AFTER:   %[[ArrayElt:.*]] = cir.load %[[TmpIdx]] : !cir.ptr<!cir.ptr<!ty_xpto>>, !cir.ptr<!ty_xpto>
+// AFTER:   %[[ExitCond:.*]] = cir.cmp(eq, %[[ArrayElt]], %[[ArrayPastEnd]]) : !cir.ptr<!ty_xpto>, !cir.bool
+// AFTER:   cir.condition(%[[ExitCond]])
+// AFTER: }
+
+// AFTER: cir.do {
+// AFTER:   cir.call @_ZN4xptoD1Ev({{.*}}) : (!cir.ptr<!ty_xpto>) -> ()
+// AFTER: } while {
+// AFTER: }
+
+// AFTER: cir.return
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/array-init.c b/clang/test/CIR/CodeGen/array-init.c
new file mode 100644
index 000000000000..13999f24a45d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/array-init.c
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+typedef struct {
+  int a;
+  long b;
+} T;
+
+void buz(int x) {
+  T arr[] = { {0, x}, {0, 0} };
+}
+// CHECK: cir.func @buz
+// CHECK-NEXT: [[X_ALLOCA:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK-NEXT: [[ARR:%.*]] = cir.alloca !cir.array<!ty_T x 2>, !cir.ptr<!cir.array<!ty_T x 2>>, ["arr", init] {alignment = 16 : i64}
+// CHECK-NEXT: cir.store %arg0, [[X_ALLOCA]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: [[ARR_INIT:%.*]] = cir.const #cir.zero : !cir.array<!ty_T x 2>
+// CHECK-NEXT: cir.store [[ARR_INIT]], [[ARR]] : !cir.array<!ty_T x 2>, !cir.ptr<!cir.array<!ty_T x 2>>
+// CHECK-NEXT: [[FI_EL:%.*]] = cir.cast(array_to_ptrdecay, [[ARR]] : !cir.ptr<!cir.array<!ty_T x 2>>), !cir.ptr<!ty_T>
+// CHECK-NEXT: [[A_STORAGE0:%.*]] = cir.get_member [[FI_EL]][0] {name = "a"} : !cir.ptr<!ty_T> -> !cir.ptr<!s32i>
+// CHECK-NEXT: [[B_STORAGE0:%.*]] = cir.get_member [[FI_EL]][1] {name = "b"} : !cir.ptr<!ty_T> -> !cir.ptr<!s64i>
+// CHECK-NEXT: [[X_VAL:%.*]] = cir.load [[X_ALLOCA]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: [[X_CASTED:%.*]] = cir.cast(integral, [[X_VAL]] : !s32i), !s64i
+// CHECK-NEXT: cir.store [[X_CASTED]], [[B_STORAGE0]] : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT: [[ONE:%.*]] = cir.const #cir.int<1> : !s64i
+// CHECK-NEXT: [[SE_EL:%.*]] = cir.ptr_stride([[FI_EL]] : !cir.ptr<!ty_T>, [[ONE]] : !s64i), !cir.ptr<!ty_T>
+// CHECK-NEXT: [[A_STORAGE1:%.*]] = cir.get_member [[SE_EL]][0] {name = "a"} : !cir.ptr<!ty_T> -> !cir.ptr<!s32i>
+// CHECK-NEXT: [[B_STORAGE1:%.*]] = cir.get_member [[SE_EL]][1] {name = "b"} : !cir.ptr<!ty_T> -> !cir.ptr<!s64i>
+// CHECK-NEXT: cir.return
+
+void foo() {
+  double bar[] = {9,8,7};
+}
+
+//      CHECK: %0 = cir.alloca !cir.array<!cir.double x 3>, !cir.ptr<!cir.array<!cir.double x 3>>, ["bar"] {alignment = 16 : i64}
+// CHECK-NEXT: %1 = cir.const #cir.const_array<[#cir.fp<9.000000e+00> : !cir.double, #cir.fp<8.000000e+00> : !cir.double, #cir.fp<7.000000e+00> : !cir.double]> : !cir.array<!cir.double x 3>
+// CHECK-NEXT: cir.store %1, %0 : !cir.array<!cir.double x 3>, !cir.ptr<!cir.array<!cir.double x 3>>
+void bar(int a, int b, int c) {
+  int arr[] = {a,b,c};
+}
+
+// CHECK: cir.func @bar
+// CHECK:      [[ARR:%.*]] = cir.alloca !cir.array<!s32i x 3>, !cir.ptr<!cir.array<!s32i x 3>>, ["arr", init] {alignment = 4 : i64}
+// CHECK-NEXT: cir.store %arg0, [[A:%.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: cir.store %arg1, [[B:%.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: cir.store %arg2, [[C:%.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: [[FI_EL:%.*]] = cir.cast(array_to_ptrdecay, [[ARR]] : !cir.ptr<!cir.array<!s32i x 3>>), !cir.ptr<!s32i>
+// CHECK-NEXT: [[LOAD_A:%.*]] = cir.load [[A]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: cir.store [[LOAD_A]], [[FI_EL]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: [[ONE:%.*]] = cir.const #cir.int<1> : !s64i
+// CHECK-NEXT: [[SE_EL:%.*]] = cir.ptr_stride(%4 : !cir.ptr<!s32i>, [[ONE]] : !s64i), !cir.ptr<!s32i>
+// CHECK-NEXT: [[LOAD_B:%.*]] = cir.load [[B]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: cir.store [[LOAD_B]], [[SE_EL]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: [[TH_EL:%.*]] = cir.ptr_stride(%7 : !cir.ptr<!s32i>, [[ONE]] : !s64i), !cir.ptr<!s32i>
+// CHECK-NEXT: [[LOAD_C:%.*]] = cir.load [[C]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT: cir.store [[LOAD_C]], [[TH_EL]] : !s32i, !cir.ptr<!s32i>
+
+void zero_init(int x) {
+  int arr[3] = {x};
+}
+
+// CHECK:  cir.func @zero_init
+// CHECK:    [[VAR_ALLOC:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK:    %1 = cir.alloca !cir.array<!s32i x 3>, !cir.ptr<!cir.array<!s32i x 3>>, ["arr", init] {alignment = 4 : i64}
+// CHECK:    [[TEMP:%.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp", init] {alignment = 8 : i64}
+// CHECK:    cir.store %arg0, [[VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    [[BEGIN:%.*]] = cir.cast(array_to_ptrdecay, %1 : !cir.ptr<!cir.array<!s32i x 3>>), !cir.ptr<!s32i>
+// CHECK:    [[VAR:%.*]] = cir.load [[VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+// CHECK:    cir.store [[VAR]], [[BEGIN]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    [[ONE:%.*]] = cir.const #cir.int<1> : !s64i
+// CHECK:    [[ZERO_INIT_START:%.*]] = cir.ptr_stride([[BEGIN]] : !cir.ptr<!s32i>, [[ONE]] : !s64i), !cir.ptr<!s32i>
+// CHECK:    cir.store [[ZERO_INIT_START]], [[TEMP]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:    [[SIZE:%.*]] = cir.const #cir.int<3> : !s64i
+// CHECK:    [[END:%.*]] = cir.ptr_stride([[BEGIN]] : !cir.ptr<!s32i>, [[SIZE]] : !s64i), !cir.ptr<!s32i>
+// CHECK:    cir.do {
+// CHECK:      [[CUR:%.*]] = cir.load [[TEMP]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:      [[FILLER:%.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:      cir.store [[FILLER]], [[CUR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:      [[ONE:%.*]] = cir.const #cir.int<1> : !s64i
+// CHECK:      [[NEXT:%.*]] = cir.ptr_stride([[CUR]] : !cir.ptr<!s32i>, [[ONE]] : !s64i), !cir.ptr<!s32i>
+// CHECK:      cir.store [[NEXT]], [[TEMP]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:      cir.yield
+// CHECK:    } while {
+// CHECK:      [[CUR:%.*]] = cir.load [[TEMP]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:      [[CMP:%.*]] = cir.cmp(ne, [[CUR]], [[END]]) : !cir.ptr<!s32i>, !cir.bool
+// CHECK:      cir.condition([[CMP]])
+// CHECK:    }
+// CHECK:    cir.return
diff --git a/clang/test/CIR/CodeGen/array-init.cpp b/clang/test/CIR/CodeGen/array-init.cpp
new file mode 100644
index 000000000000..e051c31a9c6c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/array-init.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef struct {
+  int a;
+  int b[2];
+} A;
+
+int bar() {
+  return 42;
+}
+
+void foo() {
+  A a = {bar(), {}};
+}
+// CHECK: %[[VAL_0:.*]] = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["a", init]
+// CHECK: %[[VAL_1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp", init]
+// CHECK: %[[VAL_2:.*]] = cir.get_member %[[VAL_0]][0] {name = "a"} : !cir.ptr<!ty_A> -> !cir.ptr<!s32i>
+// CHECK: %[[VAL_3:.*]] = cir.call @_Z3barv() : () -> !s32i
+// CHECK: cir.store %[[VAL_3]], %[[VAL_2]] : !s32i, !cir.ptr<!s32i>
+// CHECK: %[[VAL_4:.*]] = cir.get_member %[[VAL_0]][1] {name = "b"} : !cir.ptr<!ty_A> -> !cir.ptr<!cir.array<!s32i x 2>>
+// CHECK: %[[VAL_5:.*]] = cir.cast(array_to_ptrdecay, %[[VAL_4]] : !cir.ptr<!cir.array<!s32i x 2>>), !cir.ptr<!s32i>
+// CHECK: cir.store %[[VAL_5]], %[[VAL_1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: %[[VAL_6:.*]] = cir.const #cir.int<2> : !s64i
+// CHECK: %[[VAL_7:.*]] = cir.ptr_stride(%[[VAL_5]] : !cir.ptr<!s32i>, %[[VAL_6]] : !s64i), !cir.ptr<!s32i>
+// CHECK: cir.do {
+// CHECK:     %[[VAL_8:.*]] = cir.load %[[VAL_1]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:     %[[VAL_9:.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:     cir.store %[[VAL_9]], %[[VAL_8]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     %[[VAL_10:.*]] = cir.const #cir.int<1> : !s64i
+// CHECK:     %[[VAL_11:.*]] = cir.ptr_stride(%[[VAL_8]] : !cir.ptr<!s32i>, %[[VAL_10]] : !s64i), !cir.ptr<!s32i>
+// CHECK:     cir.store %[[VAL_11]], %[[VAL_1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:     cir.yield
+// CHECK: } while {
+// CHECK:     %[[VAL_8:.*]] = cir.load %[[VAL_1]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:     %[[VAL_9:.*]] = cir.cmp(ne, %[[VAL_8]], %[[VAL_7]]) : !cir.ptr<!s32i>, !cir.bool
+// CHECK:     cir.condition(%[[VAL_9]])
+// CHECK: }
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/array-unknown-bound.cpp b/clang/test/CIR/CodeGen/array-unknown-bound.cpp
new file mode 100644
index 000000000000..805b8c5d5867
--- /dev/null
+++ b/clang/test/CIR/CodeGen/array-unknown-bound.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+extern int table[];
+// CHECK: cir.global external @table = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]> : !cir.array<!s32i x 3>
+
+int *table_ptr = table;
+// CHECK: cir.global external @table_ptr = #cir.global_view<@table> : !cir.ptr<!s32i>
+
+int test() { return table[1]; }
+//      CHECK: cir.func @_Z4testv()
+// CHECK-NEXT:    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:    %1 = cir.get_global @table : !cir.ptr<!cir.array<!s32i x 3>>
+
+int table[3] {1, 2, 3};
diff --git a/clang/test/CIR/CodeGen/array.c b/clang/test/CIR/CodeGen/array.c
new file mode 100644
index 000000000000..ed83c663bd60
--- /dev/null
+++ b/clang/test/CIR/CodeGen/array.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Should implicitly zero-initialize global array elements.
+struct S {
+  int i;
+} arr[3] = {{1}};
+// CHECK: cir.global external @arr = #cir.const_array<[#cir.const_struct<{#cir.int<1> : !s32i}> : !ty_S, #cir.zero : !ty_S, #cir.zero : !ty_S]> : !cir.array<!ty_S x 3>
+
+int a[4];
+// CHECK: cir.global external @a = #cir.zero : !cir.array<!s32i x 4> 
+
+// Should create a pointer to a complete array.
+int (*complete_ptr_a)[4] = &a;
+// CHECK: cir.global external @complete_ptr_a = #cir.global_view<@a> : !cir.ptr<!cir.array<!s32i x 4>>
+
+// Should create a pointer to an incomplete array.
+int (*incomplete_ptr_a)[] = &a;
+// CHECK: cir.global external @incomplete_ptr_a = #cir.global_view<@a> : !cir.ptr<!cir.array<!s32i x 0>>
+
+// Should access incomplete array if external.
+extern int foo[];
+// CHECK: cir.global "private" external @foo : !cir.array<!s32i x 0>
+void useFoo(int i) {
+  foo[i] = 42;
+}
+// CHECK: @useFoo
+// CHECK: %[[#V2:]] = cir.get_global @foo : !cir.ptr<!cir.array<!s32i x 0>>
+// CHECK: %[[#V3:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[#V4:]] = cir.cast(array_to_ptrdecay, %[[#V2]] : !cir.ptr<!cir.array<!s32i x 0>>), !cir.ptr<!s32i>
+// CHECK: %[[#V5:]] = cir.ptr_stride(%[[#V4]] : !cir.ptr<!s32i>, %[[#V3]] : !s32i), !cir.ptr<!s32i>
+// CHECK: cir.store %{{.+}}, %[[#V5]] : !s32i, !cir.ptr<!s32i>
diff --git a/clang/test/CIR/CodeGen/array.cpp b/clang/test/CIR/CodeGen/array.cpp
new file mode 100644
index 000000000000..b0807755cfec
--- /dev/null
+++ b/clang/test/CIR/CodeGen/array.cpp
@@ -0,0 +1,92 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -Wno-return-stack-address -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void a0() {
+  int a[10];
+}
+
+// CHECK: cir.func @_Z2a0v()
+// CHECK-NEXT:   %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["a"] {alignment = 16 : i64}
+
+void a1() {
+  int a[10];
+  a[0] = 1;
+}
+
+// CHECK: cir.func @_Z2a1v()
+// CHECK-NEXT:  %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["a"] {alignment = 16 : i64}
+// CHECK-NEXT:  %1 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:  %2 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:  %3 = cir.cast(array_to_ptrdecay, %0 : !cir.ptr<!cir.array<!s32i x 10>>), !cir.ptr<!s32i>
+// CHECK-NEXT:  %4 = cir.ptr_stride(%3 : !cir.ptr<!s32i>, %2 : !s32i), !cir.ptr<!s32i>
+// CHECK-NEXT:  cir.store %1, %4 : !s32i, !cir.ptr<!s32i>
+
+int *a2() {
+  int a[4];
+  return &a[0];
+}
+
+// CHECK: cir.func @_Z2a2v() -> !cir.ptr<!s32i>
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["__retval"] {alignment = 8 : i64}
+// CHECK-NEXT:   %1 = cir.alloca !cir.array<!s32i x 4>, !cir.ptr<!cir.array<!s32i x 4>>, ["a"] {alignment = 16 : i64}
+// CHECK-NEXT:   %2 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:   %3 = cir.cast(array_to_ptrdecay, %1 : !cir.ptr<!cir.array<!s32i x 4>>), !cir.ptr<!s32i>
+// CHECK-NEXT:   %4 = cir.ptr_stride(%3 : !cir.ptr<!s32i>, %2 : !s32i), !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.store %4, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-NEXT:   %5 = cir.load %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.return %5 : !cir.ptr<!s32i>
+
+void local_stringlit() {
+  const char *s = "whatnow";
+}
+
+// CHECK: cir.global "private" constant internal dsolocal @".str" = #cir.const_array<"whatnow\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8> {alignment = 1 : i64}
+// CHECK: cir.func @_Z15local_stringlitv()
+// CHECK-NEXT:  %0 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["s", init] {alignment = 8 : i64}
+// CHECK-NEXT:  %1 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 8>>
+// CHECK-NEXT:  %2 = cir.cast(array_to_ptrdecay, %1 : !cir.ptr<!cir.array<!s8i x 8>>), !cir.ptr<!s8i>
+// CHECK-NEXT:  cir.store %2, %0 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+
+int multidim(int i, int j) {
+  int arr[2][2];
+  return arr[i][j];
+}
+
+// CHECK: %3 = cir.alloca !cir.array<!cir.array<!s32i x 2> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 2> x 2>>
+// Stride first dimension (stride = 2)
+// CHECK: %4 = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %5 = cir.cast(array_to_ptrdecay, %3 : !cir.ptr<!cir.array<!cir.array<!s32i x 2> x 2>>), !cir.ptr<!cir.array<!s32i x 2>>
+// CHECK: %6 = cir.ptr_stride(%5 : !cir.ptr<!cir.array<!s32i x 2>>, %4 : !s32i), !cir.ptr<!cir.array<!s32i x 2>>
+// Stride second dimension (stride = 1)
+// CHECK: %7 = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %8 = cir.cast(array_to_ptrdecay, %6 : !cir.ptr<!cir.array<!s32i x 2>>), !cir.ptr<!s32i>
+// CHECK: %9 = cir.ptr_stride(%8 : !cir.ptr<!s32i>, %7 : !s32i), !cir.ptr<!s32i>
+
+// Should globally zero-initialize null arrays.
+int globalNullArr[] = {0, 0};
+// CHECK: cir.global external @globalNullArr = #cir.zero : !cir.array<!s32i x 2>
+
+// Should implicitly zero-initialize global array elements.
+struct S {
+  int i;
+} arr[3] = {{1}};
+// CHECK: cir.global external @arr = #cir.const_array<[#cir.const_struct<{#cir.int<1> : !s32i}> : !ty_S, #cir.zero : !ty_S, #cir.zero : !ty_S]> : !cir.array<!ty_S x 3>
+
+void testPointerDecaySubscriptAccess(int arr[]) {
+// CHECK: cir.func @{{.+}}testPointerDecaySubscriptAccess
+  arr[1];
+  // CHECK: %[[#BASE:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CHECK: %[[#DIM1:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: cir.ptr_stride(%[[#BASE]] : !cir.ptr<!s32i>, %[[#DIM1]] : !s32i), !cir.ptr<!s32i>
+}
+
+void testPointerDecayedArrayMultiDimSubscriptAccess(int arr[][3]) {
+// CHECK: cir.func @{{.+}}testPointerDecayedArrayMultiDimSubscriptAccess
+  arr[1][2];
+  // CHECK: %[[#V1:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!cir.array<!s32i x 3>>>, !cir.ptr<!cir.array<!s32i x 3>>
+  // CHECK: %[[#V2:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: %[[#V3:]] = cir.ptr_stride(%[[#V1]] : !cir.ptr<!cir.array<!s32i x 3>>, %[[#V2]] : !s32i), !cir.ptr<!cir.array<!s32i x 3>>
+  // CHECK: %[[#V4:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: %[[#V5:]] = cir.cast(array_to_ptrdecay, %[[#V3]] : !cir.ptr<!cir.array<!s32i x 3>>), !cir.ptr<!s32i>
+  // CHECK: cir.ptr_stride(%[[#V5]] : !cir.ptr<!s32i>, %[[#V4]] : !s32i), !cir.ptr<!s32i>
+}
diff --git a/clang/test/CIR/CodeGen/asm.c b/clang/test/CIR/CodeGen/asm.c
new file mode 100644
index 000000000000..19b9c7d18637
--- /dev/null
+++ b/clang/test/CIR/CodeGen/asm.c
@@ -0,0 +1,349 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+
+// CHECK: cir.asm(x86_att, 
+// CHECK:   out = [],
+// CHECK:   in = [],
+// CHECK:   in_out = [],
+// CHECK:   {"" "~{dirflag},~{fpsr},~{flags}"}) side_effects
+void empty1() {
+  __asm__ volatile("" : : : );
+}
+
+// CHECK: cir.asm(x86_att, 
+// CHECK:   out = [],
+// CHECK:   in = [],
+// CHECK:   in_out = [],
+// CHECK:   {"xyz" "~{dirflag},~{fpsr},~{flags}"}) side_effects
+void empty2() {
+  __asm__ volatile("xyz" : : : );
+}
+
+// CHECK: cir.asm(x86_att, 
+// CHECK:   out = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+// CHECK:   in = [],
+// CHECK:   in_out = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+// CHECK:   {"" "=*m,*m,~{dirflag},~{fpsr},~{flags}"}) side_effects
+void empty3(int x) {
+  __asm__ volatile("" : "+m"(x));
+}
+
+// CHECK: cir.asm(x86_att, 
+// CHECK:   out = [],
+// CHECK:   in = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+// CHECK:   in_out = [],
+// CHECK:   {"" "*m,~{dirflag},~{fpsr},~{flags}"}) side_effects
+void empty4(int x) {
+  __asm__ volatile("" : : "m"(x));
+}
+
+// CHECK: cir.asm(x86_att, 
+// CHECK:   out = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+// CHECK:   in = [],
+// CHECK:   in_out = [],
+// CHECK:   {"" "=*m,~{dirflag},~{fpsr},~{flags}"}) side_effects
+void empty5(int x) {
+  __asm__ volatile("" : "=m"(x));
+}
+
+// CHECK: %3 = cir.asm(x86_att, 
+// CHECK:   out = [],
+// CHECK:   in = [],
+// CHECK:   in_out = [%2 : !s32i],
+// CHECK:   {"" "=&r,=&r,1,~{dirflag},~{fpsr},~{flags}"}) side_effects -> !ty_anon2E0_
+void empty6(int x) {
+  __asm__ volatile("" : "=&r"(x), "+&r"(x));
+}
+
+// CHECK: [[TMP0:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"] 
+// CHECK: [[TMP1:%.*]] = cir.load %0 : !cir.ptr<!u32i>, !u32i
+// CHECK: [[TMP2:%.*]] = cir.asm(x86_att, 
+// CHECK:       out = [],
+// CHECK:       in = [%3 : !u32i],
+// CHECK:       in_out = [],
+// CHECK:       {"addl $$42, $1" "=r,r,~{dirflag},~{fpsr},~{flags}"}) -> !s32i
+// CHECK: cir.store [[TMP2]], [[TMP0]] : !s32i, !cir.ptr<!s32i> loc(#loc42)
+unsigned add1(unsigned int x) {
+  int a;
+  __asm__("addl $42, %[val]"
+      : "=r" (a)
+      : [val] "r" (x)
+      );
+  
+  return a;
+}
+
+// CHECK: [[TMP0:%.*]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+// CHECK: cir.store %arg0, [[TMP0]] : !u32i, !cir.ptr<!u32i>
+// CHECK: [[TMP1:%.*]] = cir.load [[TMP0]] : !cir.ptr<!u32i>, !u32i
+// CHECK: [[TMP2:%.*]] = cir.asm(x86_att, 
+// CHECK:       out = [],
+// CHECK:       in = [],
+// CHECK:       in_out = [%2 : !u32i],
+// CHECK:       {"addl $$42, $0" "=r,0,~{dirflag},~{fpsr},~{flags}"}) -> !u32i
+// CHECK: cir.store [[TMP2]], [[TMP0]] : !u32i, !cir.ptr<!u32i>
+unsigned add2(unsigned int x) {
+  __asm__("addl $42, %[val]"
+      : [val] "+r" (x)
+      );
+  return x;
+}
+
+
+// CHECK: [[TMP0:%.*]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init]
+// CHECK: [[TMP1:%.*]] = cir.load [[TMP0]] : !cir.ptr<!u32i>, !u32i
+// CHECK: [[TMP2:%.*]] = cir.asm(x86_att, 
+// CHECK:       out = [],
+// CHECK:       in = [],
+// CHECK:       in_out = [%2 : !u32i],
+// CHECK:       {"addl $$42, $0  \0A\09          subl $$1, $0    \0A\09          imul $$2, $0" "=r,0,~{dirflag},~{fpsr},~{flags}"}) -> !u32i
+// CHECK: cir.store [[TMP2]], [[TMP0]]  : !u32i, !cir.ptr<!u32i>
+unsigned add3(unsigned int x) { // ((42 + x) - 1) * 2
+  __asm__("addl $42, %[val]  \n\t\
+          subl $1, %[val]    \n\t\
+          imul $2, %[val]"
+      : [val] "+r" (x)
+      );  
+  return x;
+}
+
+// CHECK: [[TMP0:%.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["x", init] 
+// CHECK: cir.store %arg0, [[TMP0]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: [[TMP1:%.*]] = cir.load deref [[TMP0]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: cir.asm(x86_att, 
+// CHECK:       out = [%1 : !cir.ptr<!s32i> (maybe_memory)],
+// CHECK:       in = [],
+// CHECK:       in_out = [],
+// CHECK:       {"addl $$42, $0" "=*m,~{dirflag},~{fpsr},~{flags}"}) 
+// CHECK-NEXT: cir.return
+void add4(int *x) {    
+  __asm__("addl $42, %[addr]" : [addr] "=m" (*x));
+}
+
+
+// CHECK: [[TMP0:%.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["x", init]
+// CHECK: [[TMP1:%.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["y", init]
+// CHECK: [[TMP2:%.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["r"]
+// CHECK: cir.store %arg0, [[TMP0]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK: cir.store %arg1, [[TMP1]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK: [[TMP3:%.*]] = cir.load [[TMP0]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK: [[TMP4:%.*]] = cir.load [[TMP1]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK: [[TMP5:%.*]] = cir.asm(x86_att, 
+// CHECK:       out = [],
+// CHECK:       in = [%4 : !cir.float, %5 : !cir.float],
+// CHECK:       in_out = [],
+// CHECK:       {"flds $1; flds $2; faddp" "=&{st},imr,imr,~{dirflag},~{fpsr},~{flags}"}) -> !cir.float
+// CHECK: cir.store [[TMP5]], [[TMP2]] : !cir.float, !cir.ptr<!cir.float>
+float add5(float x, float y) {
+   float r;
+  __asm__("flds %[x]; flds %[y]; faddp"
+          : "=&t" (r)
+          : [x] "g" (x), [y] "g" (y));
+  return r;
+}
+
+/*
+There are tests from clang/test/CodeGen/asm.c. No checks for now - we just make
+sure no crashes happen
+*/
+
+
+void t1(int len) {
+  __asm__ volatile("" : "=&r"(len), "+&r"(len));
+}
+
+void t2(unsigned long long t)  {
+  __asm__ volatile("" : "+m"(t));
+}
+
+void t3(unsigned char *src, unsigned long long temp) {
+  __asm__ volatile("" : "+m"(temp), "+r"(src));
+}
+
+void t4(void) {
+  unsigned long long a;
+  struct reg { unsigned long long a, b; } b;
+
+  __asm__ volatile ("":: "m"(a), "m"(b));
+}
+
+void t5(int i) {
+  asm("nop" : "=r"(i) : "0"(t5));
+}
+
+void t6(void) {
+  __asm__ volatile("" : : "i" (t6));
+}
+
+void t7(int a) {
+  __asm__ volatile("T7 NAMED: %[input]" : "+r"(a): [input] "i" (4));  
+}
+
+void t8(void) {
+  __asm__ volatile("T8 NAMED MODIFIER: %c[input]" :: [input] "i" (4));  
+}
+
+unsigned t9(unsigned int a) {
+  asm("bswap %0 %1" : "+r" (a));
+  return a;
+}
+
+void t10(int r) {
+  __asm__("PR3908 %[lf] %[xx] %[li] %[r]" : [r] "+r" (r) : [lf] "mx" (0), [li] "mr" (0), [xx] "x" ((double)(0)));
+}
+
+unsigned t11(signed char input) {
+  unsigned  output;
+  __asm__("xyz"
+          : "=a" (output)
+          : "0" (input));
+  return output;
+}
+
+unsigned char t12(unsigned input) {
+  unsigned char output;
+  __asm__("xyz"
+          : "=a" (output)
+          : "0" (input));
+  return output;
+}
+
+unsigned char t13(unsigned input) {
+  unsigned char output;
+  __asm__("xyz %1"
+          : "=a" (output)
+          : "0" (input));
+  return output;
+}
+
+struct large {
+  int x[1000];
+};
+
+unsigned long t15(int x, struct large *P) {
+  __asm__("xyz "
+          : "=r" (x)
+          : "m" (*P), "0" (x));
+  return x;
+}
+
+// bitfield destination of an asm.
+struct S {
+  int a : 4;
+};
+
+void t14(struct S *P) {
+  __asm__("abc %0" : "=r"(P->a) );
+}
+
+int t16(void) {
+  int a,b;
+  asm ( "nop;"
+       :"=%c" (a)
+       : "r" (b)
+       );
+  return 0;
+}
+
+void t17(void) {
+  int i;
+  __asm__ ( "nop": "=m"(i));
+}
+
+int t18(unsigned data) {
+  int a, b;
+
+  asm("xyz" :"=a"(a), "=d"(b) : "a"(data));
+  return a + b;
+}
+
+int t19(unsigned data) {
+  int a, b;
+
+  asm("x{abc|def|ghi}z" :"=r"(a): "r"(data));
+  return a + b;
+}
+
+// skip t20 and t21: long double is not supported
+
+// accept 'l' constraint
+unsigned char t22(unsigned char a, unsigned char b) {
+  unsigned int la = a;
+  unsigned int lb = b;
+  unsigned int bigres;
+  unsigned char res;
+  __asm__ ("0:\n1:\n" : [bigres] "=la"(bigres) : [la] "0"(la), [lb] "c"(lb) :
+                        "edx", "cc");
+  res = bigres;
+  return res;
+}
+
+// accept 'l' constraint
+unsigned char t23(unsigned char a, unsigned char b) {
+  unsigned int la = a;
+  unsigned int lb = b;
+  unsigned char res;
+  __asm__ ("0:\n1:\n" : [res] "=la"(res) : [la] "0"(la), [lb] "c"(lb) :
+                        "edx", "cc");
+  return res;
+}
+
+void *t24(char c) {
+  void *addr;
+  __asm__ ("foobar" : "=a" (addr) : "0" (c));
+  return addr;
+}
+
+void t25(void)
+{
+  __asm__ __volatile__(					   \
+		       "finit"				   \
+		       :				   \
+		       :				   \
+		       :"st","st(1)","st(2)","st(3)",	   \
+			"st(4)","st(5)","st(6)","st(7)",   \
+			"fpsr","fpcr"			   \
+							   );
+}
+
+//t26 skipped - no vector type support
+
+// Check to make sure the inline asm non-standard dialect attribute _not_ is
+// emitted.
+void t27(void) {
+  asm volatile("nop");
+}
+
+// Check handling of '*' and '#' constraint modifiers.
+void t28(void)
+{
+  asm volatile ("/* %0 */" : : "i#*X,*r" (1));
+}
+
+static unsigned t29_var[1];
+
+void t29(void) {
+  asm volatile("movl %%eax, %0"
+               :
+               : "m"(t29_var));
+}
+
+void t30(int len) {
+  __asm__ volatile(""
+                   : "+&&rm"(len));
+}
+
+void t31(int len) {
+  __asm__ volatile(""
+                   : "+%%rm"(len), "+rm"(len));
+}
+
+//t32 skipped: no goto
+
+void *t33(void *ptr)
+{
+  void *ret;
+  asm ("lea %1, %0" : "=r" (ret) : "p" (ptr));
+  return ret;  
+}
diff --git a/clang/test/CIR/CodeGen/assign-operator.cpp b/clang/test/CIR/CodeGen/assign-operator.cpp
new file mode 100644
index 000000000000..5942beb296dc
--- /dev/null
+++ b/clang/test/CIR/CodeGen/assign-operator.cpp
@@ -0,0 +1,101 @@
+// RUN: %clang_cc1 -std=c++17 -mconstructor-aliases -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -std=c++17 -mconstructor-aliases -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -clangir-disable-emit-cxx-default %s -o %t-disable.cir
+// RUN: FileCheck --input-file=%t-disable.cir %s --check-prefix=DISABLE
+
+int strlen(char const *);
+
+struct String {
+  long size;
+  long capacity;
+
+  String() : size{0}, capacity{0} {}
+  String(char const *s) : size{strlen(s)}, capacity{size} {}
+  // StringView::StringView(String const&)
+  //
+  // CHECK: cir.func linkonce_odr @_ZN10StringViewC2ERK6String
+  // CHECK:   %0 = cir.alloca !cir.ptr<!ty_StringView>, !cir.ptr<!cir.ptr<!ty_StringView>>, ["this", init] {alignment = 8 : i64}
+  // CHECK:   %1 = cir.alloca !cir.ptr<!ty_String>, !cir.ptr<!cir.ptr<!ty_String>>, ["s", init] {alignment = 8 : i64}
+  // CHECK:   cir.store %arg0, %0 : !cir.ptr<!ty_StringView>
+  // CHECK:   cir.store %arg1, %1 : !cir.ptr<!ty_String>
+  // CHECK:   %2 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_StringView>>
+
+  // Get address of `this->size`
+
+  // CHECK:   %3 = cir.get_member %2[0] {name = "size"}
+
+  // Get address of `s`
+
+  // CHECK:   %4 = cir.load %1 : !cir.ptr<!cir.ptr<!ty_String>>
+
+  // Get the address of s.size
+
+  // CHECK:   %5 = cir.get_member %4[0] {name = "size"}
+
+  // Load value from s.size and store in this->size
+
+  // CHECK:   %6 = cir.load %5 : !cir.ptr<!s64i>, !s64i
+  // CHECK:   cir.store %6, %3 : !s64i, !cir.ptr<!s64i>
+  // CHECK:   cir.return
+  // CHECK: }
+
+  // DISABLE: cir.func linkonce_odr @_ZN10StringViewC2ERK6String
+  // DISABLE-NEXT:   %0 = cir.alloca !cir.ptr<!ty_StringView>, !cir.ptr<!cir.ptr<!ty_StringView>>, ["this", init] {alignment = 8 : i64}
+
+  // StringView::operator=(StringView&&)
+  //
+  // CHECK: cir.func linkonce_odr @_ZN10StringViewaSEOS_
+  // CHECK:   %0 = cir.alloca !cir.ptr<!ty_StringView>, !cir.ptr<!cir.ptr<!ty_StringView>>, ["this", init] {alignment = 8 : i64}
+  // CHECK:   %1 = cir.alloca !cir.ptr<!ty_StringView>, !cir.ptr<!cir.ptr<!ty_StringView>>, ["", init] {alignment = 8 : i64}
+  // CHECK:   %2 = cir.alloca !cir.ptr<!ty_StringView>, !cir.ptr<!cir.ptr<!ty_StringView>>, ["__retval"] {alignment = 8 : i64}
+  // CHECK:   cir.store %arg0, %0 : !cir.ptr<!ty_StringView>
+  // CHECK:   cir.store %arg1, %1 : !cir.ptr<!ty_StringView>
+  // CHECK:   %3 = cir.load deref %0 : !cir.ptr<!cir.ptr<!ty_StringView>>
+  // CHECK:   %4 = cir.load %1 : !cir.ptr<!cir.ptr<!ty_StringView>>
+  // CHECK:   %5 = cir.get_member %4[0] {name = "size"}
+  // CHECK:   %6 = cir.load %5 : !cir.ptr<!s64i>, !s64i
+  // CHECK:   %7 = cir.get_member %3[0] {name = "size"}
+  // CHECK:   cir.store %6, %7 : !s64i, !cir.ptr<!s64i>
+  // CHECK:   cir.store %3, %2 : !cir.ptr<!ty_StringView>
+  // CHECK:   %8 = cir.load %2 : !cir.ptr<!cir.ptr<!ty_StringView>>
+  // CHECK:   cir.return %8 : !cir.ptr<!ty_StringView>
+  // CHECK: }
+
+  // DISABLE: cir.func private @_ZN10StringViewaSEOS_
+  // DISABLE-NEXT: cir.func @main()
+};
+
+struct StringView {
+  long size;
+
+  StringView(const String &s) : size{s.size} {}
+  StringView() : size{0} {}
+};
+
+int main() {
+  StringView sv;
+  {
+    String s = "Hi";
+    sv = s;
+  }
+}
+
+// CHECK: cir.func @main() -> !s32i
+// CHECK:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:     %1 = cir.alloca !ty_StringView, !cir.ptr<!ty_StringView>, ["sv", init] {alignment = 8 : i64}
+// CHECK:     cir.call @_ZN10StringViewC2Ev(%1) : (!cir.ptr<!ty_StringView>) -> ()
+// CHECK:     cir.scope {
+// CHECK:       %3 = cir.alloca !ty_String, !cir.ptr<!ty_String>, ["s", init] {alignment = 8 : i64}
+// CHECK:       %4 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 3>>
+// CHECK:       %5 = cir.cast(array_to_ptrdecay, %4 : !cir.ptr<!cir.array<!s8i x 3>>), !cir.ptr<!s8i>
+// CHECK:       cir.call @_ZN6StringC2EPKc(%3, %5) : (!cir.ptr<!ty_String>, !cir.ptr<!s8i>) -> ()
+// CHECK:       cir.scope {
+// CHECK:         %6 = cir.alloca !ty_StringView, !cir.ptr<!ty_StringView>, ["ref.tmp0"] {alignment = 8 : i64}
+// CHECK:         cir.call @_ZN10StringViewC2ERK6String(%6, %3) : (!cir.ptr<!ty_StringView>, !cir.ptr<!ty_String>) -> ()
+// CHECK:         %7 = cir.call @_ZN10StringViewaSEOS_(%1, %6) : (!cir.ptr<!ty_StringView>, !cir.ptr<!ty_StringView>) -> !cir.ptr<!ty_StringView>
+// CHECK:       }
+// CHECK:     }
+// CHECK:     %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:     cir.return %2 : !s32i
+// CHECK: }
diff --git a/clang/test/CIR/CodeGen/atomic-runtime.cpp b/clang/test/CIR/CodeGen/atomic-runtime.cpp
new file mode 100644
index 000000000000..dfe74a9e77c9
--- /dev/null
+++ b/clang/test/CIR/CodeGen/atomic-runtime.cpp
@@ -0,0 +1,309 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Test __atomic_* built-ins that have a memory order parameter with a runtime
+// value.  This requires generating a switch statement, so the amount of
+// generated code is surprisingly large.
+//
+// Only a representative sample of atomic operations are tested: one read-only
+// operation (atomic_load), one write-only operation (atomic_store), one
+// read-write operation (atomic_exchange), and the most complex operation
+// (atomic_compare_exchange).
+
+int runtime_load(int *ptr, int order) {
+  return __atomic_load_n(ptr, order);
+}
+
+// CHECK: %[[ptr:.*]] = cir.load %[[ptr_var:.*]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[order:.*]] = cir.load %[[order_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.switch (%[[order]] : !s32i) [
+// CHECK: case (default) {
+// CHECK:   %[[T8:.*]] = cir.load atomic(relaxed) %[[ptr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store %[[T8]], %[[temp_var:.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: },
+// CHECK: case (anyof, [1, 2] : !s32i) {
+// CHECK:   %[[T8:.*]] = cir.load atomic(acquire) %[[ptr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store %[[T8]], %[[temp_var]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: },
+// CHECK: case (equal, 5) {
+// CHECK:   %[[T8:.*]] = cir.load atomic(seq_cst) %[[ptr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store %[[T8]], %[[temp_var]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: ]
+
+void atomic_store_n(int* ptr, int val, int order) {
+  __atomic_store_n(ptr, val, order);
+}
+
+// CHECK: %[[ptr:.*]] = cir.load %[[ptr_var:.*]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[order:.*]] = cir.load %[[order_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[val:.*]] = cir.load %[[val_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.store %[[val]], %[[temp_var:.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK: cir.switch (%[[order]] : !s32i) [
+// CHECK: case (default) {
+// CHECK:   %[[T7:.*]] = cir.load %[[temp_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store atomic(relaxed) %[[T7]], %[[ptr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: },
+// CHECK: case (equal, 3) {
+// CHECK:   %[[T7:.*]] = cir.load %[[temp_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store atomic(release) %[[T7]], %[[ptr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: },
+// CHECK: case (equal, 5) {
+// CHECK:   %[[T7:.*]] = cir.load %[[temp_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store atomic(seq_cst) %[[T7]], %[[ptr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: ]
+
+int atomic_exchange_n(int* ptr, int val, int order) {
+  return __atomic_exchange_n(ptr, val, order);
+}
+
+// CHECK: %[[ptr:.*]] = cir.load %[[ptr_var:.*]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[order:.*]] = cir.load %[[order_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[val:.*]] = cir.load %[[val_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.store %[[val]], %[[temp_var:.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK: cir.switch (%[[order]] : !s32i) [
+// CHECK: case (default) {
+// CHECK:   %[[T11:.*]] = cir.load %[[temp_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[T12:.*]] = cir.atomic.xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[T11]] : !s32i, relaxed) : !s32i
+// CHECK:   cir.store %[[T12]], %[[result:.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: },
+// CHECK: case (anyof, [1, 2] : !s32i) {
+// CHECK:   %[[T11:.*]] = cir.load %[[temp_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[T12:.*]] = cir.atomic.xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[T11]] : !s32i, acquire) : !s32i
+// CHECK:   cir.store %[[T12]], %[[result]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: },
+// CHECK: case (equal, 3) {
+// CHECK:   %[[T11:.*]] = cir.load %[[temp_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[T12:.*]] = cir.atomic.xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[T11]] : !s32i, release) : !s32i
+// CHECK:   cir.store %[[T12]], %[[result]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: },
+// CHECK: case (equal, 4) {
+// CHECK:   %[[T11:.*]] = cir.load %[[temp_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[T12:.*]] = cir.atomic.xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[T11]] : !s32i, acq_rel) : !s32i
+// CHECK:   cir.store %[[T12]], %[[result]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: },
+// CHECK: case (equal, 5) {
+// CHECK:   %[[T11:.*]] = cir.load %[[temp_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[T12:.*]] = cir.atomic.xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[T11]] : !s32i, seq_cst) : !s32i
+// CHECK:   cir.store %[[T12]], %[[result]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: ]
+
+bool atomic_compare_exchange_n(int* ptr, int* expected,
+                               int desired, int success, int failure) {
+  return __atomic_compare_exchange_n(ptr, expected, desired, false,
+                                     success, failure);
+}
+
+// CHECK: %[[ptr:.*]] = cir.load %[[T0:.*]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[success:.*]] = cir.load %[[T3:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[expected_addr:.*]] = cir.load %[[T1:.*]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[T11:.*]] = cir.load %[[T2:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.store %[[T11]], %[[desired_var:.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK: %[[failure:.*]] = cir.load %[[T4:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[T13:.*]] = cir.const #false
+// CHECK: cir.switch (%[[success]] : !s32i) [
+// CHECK: case (default) {
+// CHECK:   cir.switch (%[[failure]] : !s32i) [
+// CHECK:   case (default) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = relaxed, failure = relaxed) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var:.*]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   },
+// CHECK:   case (anyof, [1, 2] : !s32i) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = relaxed, failure = acquire) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   },
+// CHECK:   case (equal, 5) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = relaxed, failure = seq_cst) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   ]
+// CHECK:   cir.break
+// CHECK: },
+// CHECK: case (anyof, [1, 2] : !s32i) {
+// CHECK:   cir.switch (%[[failure]] : !s32i) [
+// CHECK:   case (default) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = acquire, failure = relaxed) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   },
+// CHECK:   case (anyof, [1, 2] : !s32i) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = acquire, failure = acquire) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   },
+// CHECK:   case (equal, 5) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = acquire, failure = seq_cst) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   ]
+// CHECK:   cir.break
+// CHECK: },
+// CHECK: case (equal, 3) {
+// CHECK:   cir.switch (%[[failure]] : !s32i) [
+// CHECK:   case (default) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = release, failure = relaxed) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   },
+// CHECK:   case (anyof, [1, 2] : !s32i) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = release, failure = acquire) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   },
+// CHECK:   case (equal, 5) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = release, failure = seq_cst) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   ]
+// CHECK:   cir.break
+// CHECK: },
+// CHECK: case (equal, 4) {
+// CHECK:   cir.switch (%[[failure]] : !s32i) [
+// CHECK:   case (default) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = acq_rel, failure = relaxed) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   },
+// CHECK:   case (anyof, [1, 2] : !s32i) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = acq_rel, failure = acquire) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   },
+// CHECK:   case (equal, 5) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = acq_rel, failure = seq_cst) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   ]
+// CHECK:   cir.break
+// CHECK: },
+// CHECK: case (equal, 5) {
+// CHECK:   cir.switch (%[[failure]] : !s32i) [
+// CHECK:   case (default) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = seq_cst, failure = relaxed) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   },
+// CHECK:   case (anyof, [1, 2] : !s32i) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = seq_cst, failure = acquire) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   },
+// CHECK:   case (equal, 5) {
+// CHECK:     %[[expected:.*]] = cir.load %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = seq_cst, failure = seq_cst) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   ]
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: ]
+
diff --git a/clang/test/CIR/CodeGen/atomic-xchg-field.c b/clang/test/CIR/CodeGen/atomic-xchg-field.c
new file mode 100644
index 000000000000..53325e01f34d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/atomic-xchg-field.c
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+typedef struct __Base {
+  unsigned long id;
+  unsigned int a;
+  unsigned int n;
+  unsigned char x;
+  unsigned short u;
+} Base;
+
+struct w {
+  Base _base;
+  const void * ref;
+};
+
+typedef struct w *wPtr;
+
+void field_access(wPtr item) {
+  __atomic_exchange_n((&item->ref), (((void*)0)), 5);
+}
+
+// CHECK: ![[W:.*]] = !cir.struct<struct "w"
+// CHECK-LABEL: @field_access
+// CHECK-NEXT: %[[WADDR:.*]] = cir.alloca !cir.ptr<![[W]]>, {{.*}} {alignment = 8 : i64}
+// CHECK: %[[FIELD:.*]] = cir.load %[[WADDR]]
+// CHECK: %[[MEMBER:.*]] = cir.get_member %[[FIELD]][1] {name = "ref"}
+// CHECK: cir.atomic.xchg(%[[MEMBER]] : !cir.ptr<!cir.ptr<!void>>, {{.*}} : !u64i, seq_cst)
+
+// LLVM-LABEL: @field_access
+// LLVM: = alloca ptr, i64 1, align 8
+// LLVM: %[[VAL_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM: %[[RES_ADDR:.*]] = alloca ptr, i64 1, align 8
+
+// LLVM: %[[MEMBER:.*]] = getelementptr %struct.w, ptr {{.*}}, i32 0, i32 1
+// LLVM: store ptr null, ptr %[[VAL_ADDR]], align 8
+// LLVM: %[[VAL:.*]] = load i64, ptr %[[VAL_ADDR]], align 8
+// LLVM: %[[RES:.*]] = atomicrmw xchg ptr %[[MEMBER]], i64 %[[VAL]] seq_cst, align 8
+// LLVM: store i64 %[[RES]], ptr %4, align 8
+// LLVM: load ptr, ptr %[[RES_ADDR]], align 8
+// LLVM: ret void
+
+void structAtomicExchange(unsigned referenceCount, wPtr item) {
+  __atomic_compare_exchange_n((&item->_base.a), (&referenceCount), (referenceCount + 1), 1 , 5, 5);
+}
+
+// CHECK-LABEL: @structAtomicExchange
+// CHECK: %old, %cmp = cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!u32i>, {{.*}} : !u32i, {{.*}} : !u32i, success = seq_cst, failure = seq_cst) weak : (!u32i, !cir.bool)
+
+// LLVM-LABEL: @structAtomicExchange
+// LLVM:   load i32
+// LLVM:   add i32
+// LLVM:   store i32
+// LLVM:   %[[EXP:.*]] = load i32
+// LLVM:   %[[DES:.*]] = load i32
+// LLVM:   %[[RES:.*]] = cmpxchg weak ptr %9, i32 %[[EXP]], i32 %[[DES]] seq_cst seq_cst
+// LLVM:   %[[OLD:.*]] = extractvalue { i32, i1 } %[[RES]], 0
+// LLVM:   %[[CMP:.*]] = extractvalue { i32, i1 } %[[RES]], 1
+// LLVM:   %[[Z:.*]] = zext i1 %[[CMP]] to i8, !dbg !16
+// LLVM:   %[[X:.*]] = xor i8 %[[Z]], 1, !dbg !16
+// LLVM:   %[[FAIL:.*]] = trunc i8 %[[X]] to i1, !dbg !16
+
+// LLVM:   br i1 %[[FAIL:.*]], label %[[STORE_OLD:.*]], label %[[CONTINUE:.*]],
+// LLVM: [[STORE_OLD]]:
+// LLVM:   store i32 %[[OLD]], ptr
+// LLVM:   br label %[[CONTINUE]]
+
+// LLVM: [[CONTINUE]]:
+// LLVM:   store i8 %[[Z]], ptr {{.*}}, align 1
+// LLVM:   ret void
+
+void f2(const void *cf);
+
+void structLoad(unsigned referenceCount, wPtr item) {
+  f2(__atomic_load_n(&item->ref, 5));
+}
+
+// CHECK-LABEL: @structLoad
+// CHECK:    %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"]
+// CHECK:    %[[ATOMIC_LOAD:.*]] = cir.load atomic(seq_cst) %6 : !cir.ptr<!u64i>, !u64i
+// CHECK:    %[[RES:.*]] = cir.cast(bitcast, %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!u64i>
+// CHECK:    cir.store %[[ATOMIC_LOAD]], %[[RES]] : !u64i, !cir.ptr<!u64i>
+
+// No LLVM tests needed for this one, already covered elsewhere.
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/atomic.cpp b/clang/test/CIR/CodeGen/atomic.cpp
new file mode 100644
index 000000000000..2e7c93e32270
--- /dev/null
+++ b/clang/test/CIR/CodeGen/atomic.cpp
@@ -0,0 +1,500 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// Available on resource dir.
+#include <stdatomic.h>
+
+typedef struct _a {
+  _Atomic(int) d;
+} at;
+
+void m() { at y; }
+
+// CHECK: ![[A:.*]] = !cir.struct<struct "_a" {!cir.int<s, 32>}>
+
+int basic_binop_fetch(int *i) {
+  return __atomic_add_fetch(i, 1, memory_order_seq_cst);
+}
+
+// CHECK: cir.func @_Z17basic_binop_fetchPi
+// CHECK:  %[[ARGI:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["i", init] {alignment = 8 : i64}
+// CHECK:  %[[ONE_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, [".atomictmp"] {alignment = 4 : i64}
+// CHECK:  cir.store %arg0, %[[ARGI]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:  %[[I:.*]] = cir.load %[[ARGI]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:  %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:  cir.store %[[ONE]], %[[ONE_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:  %[[VAL:.*]] = cir.load %[[ONE_ADDR]] : !cir.ptr<!s32i>, !s32i
+// CHECK:  cir.atomic.fetch(add, %[[I]] : !cir.ptr<!s32i>, %[[VAL]] : !s32i, seq_cst) : !s32i
+
+// LLVM: define dso_local i32 @_Z17basic_binop_fetchPi
+// LLVM: %[[RMW:.*]] = atomicrmw add ptr {{.*}}, i32 %[[VAL:.*]] seq_cst, align 4
+// LLVM: add i32 %[[RMW]], %[[VAL]]
+
+int other_binop_fetch(int *i) {
+  __atomic_sub_fetch(i, 1, memory_order_relaxed);
+  __atomic_and_fetch(i, 1, memory_order_consume);
+  __atomic_or_fetch(i, 1, memory_order_acquire);
+  return __atomic_xor_fetch(i, 1, memory_order_release);
+}
+
+// CHECK: cir.func @_Z17other_binop_fetchPi
+// CHECK: cir.atomic.fetch(sub, {{.*}}, relaxed
+// CHECK: cir.atomic.fetch(and, {{.*}}, acquire
+// CHECK: cir.atomic.fetch(or, {{.*}}, acquire
+// CHECK: cir.atomic.fetch(xor, {{.*}}, release
+
+// LLVM: define dso_local i32 @_Z17other_binop_fetchPi
+// LLVM: %[[RMW_SUB:.*]] = atomicrmw sub ptr {{.*}} monotonic
+// LLVM: sub i32 %[[RMW_SUB]], {{.*}}
+// LLVM: %[[RMW_AND:.*]] = atomicrmw and ptr {{.*}} acquire
+// LLVM: and i32 %[[RMW_AND]], {{.*}}
+// LLVM: %[[RMW_OR:.*]] = atomicrmw or ptr {{.*}} acquire
+// LLVM: or i32 %[[RMW_OR]], {{.*}}
+// LLVM: %[[RMW_XOR:.*]] = atomicrmw xor ptr {{.*}} release
+// LLVM: xor i32 %[[RMW_XOR]], {{.*}}
+
+int nand_binop_fetch(int *i) {
+  return __atomic_nand_fetch(i, 1, memory_order_acq_rel);
+}
+
+// CHECK: cir.func @_Z16nand_binop_fetchPi
+// CHECK: cir.atomic.fetch(nand, {{.*}}, acq_rel
+
+// LLVM: define dso_local i32 @_Z16nand_binop_fetchPi
+// LLVM: %[[RMW_NAND:.*]] = atomicrmw nand ptr {{.*}} acq_rel
+// LLVM: %[[AND:.*]] = and i32 %[[RMW_NAND]]
+// LLVM: = xor i32 %[[AND]], -1
+
+int fp_binop_fetch(float *i) {
+  __atomic_add_fetch(i, 1, memory_order_seq_cst);
+  return __atomic_sub_fetch(i, 1, memory_order_seq_cst);
+}
+
+// CHECK: cir.func @_Z14fp_binop_fetchPf
+// CHECK: cir.atomic.fetch(add,
+// CHECK: cir.atomic.fetch(sub,
+
+// LLVM: define dso_local i32 @_Z14fp_binop_fetchPf
+// LLVM: %[[RMW_FADD:.*]] = atomicrmw fadd ptr
+// LLVM: fadd float %[[RMW_FADD]]
+// LLVM: %[[RMW_FSUB:.*]] = atomicrmw fsub ptr
+// LLVM: fsub float %[[RMW_FSUB]]
+
+int fetch_binop(int *i) {
+  __atomic_fetch_add(i, 1, memory_order_seq_cst);
+  __atomic_fetch_sub(i, 1, memory_order_seq_cst);
+  __atomic_fetch_and(i, 1, memory_order_seq_cst);
+  __atomic_fetch_or(i, 1, memory_order_seq_cst);
+  __atomic_fetch_xor(i, 1, memory_order_seq_cst);
+  return __atomic_fetch_nand(i, 1, memory_order_seq_cst);
+}
+
+// CHECK: cir.func @_Z11fetch_binopPi
+// CHECK: cir.atomic.fetch(add, {{.*}}) fetch_first
+// CHECK: cir.atomic.fetch(sub, {{.*}}) fetch_first
+// CHECK: cir.atomic.fetch(and, {{.*}}) fetch_first
+// CHECK: cir.atomic.fetch(or, {{.*}}) fetch_first
+// CHECK: cir.atomic.fetch(xor, {{.*}}) fetch_first
+// CHECK: cir.atomic.fetch(nand, {{.*}}) fetch_first
+
+// LLVM: define dso_local i32 @_Z11fetch_binopPi
+// LLVM: atomicrmw add ptr
+// LLVM-NOT: add {{.*}}
+// LLVM: atomicrmw sub ptr
+// LLVM-NOT: sub {{.*}}
+// LLVM: atomicrmw and ptr
+// LLVM-NOT: and {{.*}}
+// LLVM: atomicrmw or ptr
+// LLVM-NOT: or {{.*}}
+// LLVM: atomicrmw xor ptr
+// LLVM-NOT: xor {{.*}}
+// LLVM: atomicrmw nand ptr
+// LLVM-NOT: nand {{.*}}
+
+void min_max_fetch(int *i) {
+  __atomic_fetch_max(i, 1, memory_order_seq_cst);
+  __atomic_fetch_min(i, 1, memory_order_seq_cst);
+  __atomic_max_fetch(i, 1, memory_order_seq_cst);
+  __atomic_min_fetch(i, 1, memory_order_seq_cst);
+}
+
+// CHECK: cir.func @_Z13min_max_fetchPi
+// CHECK: = cir.atomic.fetch(max, {{.*}}) fetch_first
+// CHECK: = cir.atomic.fetch(min, {{.*}}) fetch_first
+// CHECK: = cir.atomic.fetch(max, {{.*}}) : !s32i
+// CHECK: = cir.atomic.fetch(min, {{.*}}) : !s32i
+
+// LLVM: define dso_local void @_Z13min_max_fetchPi
+// LLVM: atomicrmw max ptr
+// LLVM-NOT: icmp {{.*}}
+// LLVM: atomicrmw min ptr
+// LLVM-NOT: icmp {{.*}}
+// LLVM: %[[MAX:.*]] = atomicrmw max ptr
+// LLVM: %[[ICMP_MAX:.*]] = icmp sgt i32 %[[MAX]]
+// LLVM: select i1 %[[ICMP_MAX]], i32 %[[MAX]]
+// LLVM: %[[MIN:.*]] = atomicrmw min ptr
+// LLVM: %[[ICMP_MIN:.*]] = icmp slt i32 %[[MIN]]
+// LLVM: select i1 %[[ICMP_MIN]], i32 %[[MIN]]
+
+int fi1(_Atomic(int) *i) {
+  return __c11_atomic_load(i, memory_order_seq_cst);
+}
+
+// CHECK: cir.func @_Z3fi1PU7_Atomici
+// CHECK: cir.load atomic(seq_cst)
+
+// LLVM-LABEL: @_Z3fi1PU7_Atomici
+// LLVM: load atomic i32, ptr {{.*}} seq_cst, align 4
+
+int fi1a(int *i) {
+  int v;
+  __atomic_load(i, &v, memory_order_seq_cst);
+  return v;
+}
+
+// CHECK-LABEL: @_Z4fi1aPi
+// CHECK: cir.load atomic(seq_cst)
+
+// LLVM-LABEL: @_Z4fi1aPi
+// LLVM: load atomic i32, ptr {{.*}} seq_cst, align 4
+
+int fi1b(int *i) {
+  return __atomic_load_n(i, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: @_Z4fi1bPi
+// CHECK: cir.load atomic(seq_cst)
+
+// LLVM-LABEL: @_Z4fi1bPi
+// LLVM: load atomic i32, ptr {{.*}} seq_cst, align 4
+
+int fi1c(atomic_int *i) {
+  return atomic_load(i);
+}
+
+// CHECK-LABEL: @_Z4fi1cPU7_Atomici
+// CHECK: cir.load atomic(seq_cst)
+
+// LLVM-LABEL: @_Z4fi1cPU7_Atomici
+// LLVM: load atomic i32, ptr {{.*}} seq_cst, align 4
+
+void fi2(_Atomic(int) *i) {
+  __c11_atomic_store(i, 1, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: @_Z3fi2PU7_Atomici
+// CHECK: cir.store atomic(seq_cst)
+
+// LLVM-LABEL: @_Z3fi2PU7_Atomici
+// LLVM: store atomic i32 {{.*}} seq_cst, align 4
+
+void fi2a(int *i) {
+  int v = 1;
+  __atomic_store(i, &v, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: @_Z4fi2aPi
+// CHECK: cir.store atomic(seq_cst)
+
+// LLVM-LABEL: @_Z4fi2aPi
+// LLVM: store atomic i32 {{.*}} seq_cst, align 4
+
+void fi2b(int *i) {
+  __atomic_store_n(i, 1, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: @_Z4fi2bPi
+// CHECK: cir.store atomic(seq_cst)
+
+// LLVM-LABEL: @_Z4fi2bPi
+// LLVM: store atomic i32 {{.*}} seq_cst, align 4
+
+void fi2c(atomic_int *i) {
+  atomic_store(i, 1);
+}
+
+struct S {
+  double x;
+};
+
+// CHECK-LABEL: @_Z4fi2cPU7_Atomici
+// CHECK: cir.store atomic(seq_cst)
+
+// LLVM-LABEL: @_Z4fi2cPU7_Atomici
+// LLVM: store atomic i32 {{.*}} seq_cst, align 4
+
+void fd3(struct S *a, struct S *b, struct S *c) {
+  __atomic_exchange(a, b, c, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: @_Z3fd3P1SS0_S0_
+// CHECK: cir.atomic.xchg({{.*}} : !cir.ptr<!ty_S>, {{.*}} : !u64i, seq_cst) : !u64i
+
+// FIXME: CIR is producing an over alignment of 8, only 4 needed.
+// LLVM-LABEL: @_Z3fd3P1SS0_S0_
+// LLVM:      [[A_ADDR:%.*]] = alloca ptr
+// LLVM-NEXT: [[B_ADDR:%.*]] = alloca ptr
+// LLVM-NEXT: [[C_ADDR:%.*]] = alloca ptr
+// LLVM-NEXT: store ptr {{.*}}, ptr [[A_ADDR]]
+// LLVM-NEXT: store ptr {{.*}}, ptr [[B_ADDR]]
+// LLVM-NEXT: store ptr {{.*}}, ptr [[C_ADDR]]
+// LLVM-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]]
+// LLVM-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]]
+// LLVM-NEXT: [[LOAD_C_PTR:%.*]] = load ptr, ptr [[C_ADDR]]
+// LLVM-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]]
+// LLVM-NEXT: [[RESULT:%.*]] = atomicrmw xchg ptr [[LOAD_A_PTR]], i64 [[LOAD_B]] seq_cst
+// LLVM-NEXT: store i64 [[RESULT]], ptr [[LOAD_C_PTR]]
+
+bool fd4(struct S *a, struct S *b, struct S *c) {
+  return __atomic_compare_exchange(a, b, c, 1, 5, 5);
+}
+
+// CHECK-LABEL: @_Z3fd4P1SS0_S0_
+// CHECK: %old, %cmp = cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!ty_S>, {{.*}} : !u64i, {{.*}} : !u64i, success = seq_cst, failure = seq_cst) weak : (!u64i, !cir.bool)
+
+// LLVM-LABEL: @_Z3fd4P1SS0_S0_
+// LLVM: cmpxchg weak ptr {{.*}}, i64 {{.*}}, i64 {{.*}} seq_cst seq_cst, align 8
+
+bool fi4a(int *i) {
+  int cmp = 0;
+  int desired = 1;
+  return __atomic_compare_exchange(i, &cmp, &desired, false, memory_order_acquire, memory_order_acquire);
+}
+
+// CHECK-LABEL: @_Z4fi4aPi
+// CHECK: %old, %cmp = cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s32i>, {{.*}} : !s32i, {{.*}} : !s32i, success = acquire, failure = acquire) : (!s32i, !cir.bool)
+
+// LLVM-LABEL: @_Z4fi4aPi
+// LLVM: %[[RES:.*]] = cmpxchg ptr %7, i32 %8, i32 %9 acquire acquire, align 4
+// LLVM: extractvalue { i32, i1 } %[[RES]], 0
+// LLVM: extractvalue { i32, i1 } %[[RES]], 1
+
+bool fi4b(int *i) {
+  int cmp = 0;
+  return __atomic_compare_exchange_n(i, &cmp, 1, true, memory_order_acquire, memory_order_acquire);
+}
+
+// CHECK-LABEL: @_Z4fi4bPi
+// CHECK: %old, %cmp = cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s32i>, {{.*}} : !s32i, {{.*}} : !s32i, success = acquire, failure = acquire) weak : (!s32i, !cir.bool)
+
+// LLVM-LABEL: @_Z4fi4bPi
+// LLVM: %[[R:.*]] = cmpxchg weak ptr {{.*}}, i32 {{.*}}, i32 {{.*}} acquire acquire, align 4
+// LLVM: extractvalue { i32, i1 } %[[R]], 0
+// LLVM: extractvalue { i32, i1 } %[[R]], 1
+
+bool fi4c(atomic_int *i) {
+  int cmp = 0;
+  return atomic_compare_exchange_strong(i, &cmp, 1);
+}
+
+// CHECK-LABEL: @_Z4fi4cPU7_Atomici
+// CHECK: %old, %cmp = cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s32i>, {{.*}} : !s32i, {{.*}} : !s32i, success = seq_cst, failure = seq_cst) : (!s32i, !cir.bool)
+// CHECK: %[[CMP:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK: cir.if %[[CMP:.*]] {
+// CHECK:   cir.store %old, {{.*}} : !s32i, !cir.ptr<!s32i>
+// CHECK: }
+
+// LLVM-LABEL: @_Z4fi4cPU7_Atomici
+// LLVM: cmpxchg ptr {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
+
+bool fsb(bool *c) {
+  return __atomic_exchange_n(c, 1, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: @_Z3fsbPb
+// CHECK: cir.atomic.xchg({{.*}} : !cir.ptr<!cir.bool>, {{.*}} : !u8i, seq_cst) : !u8i
+
+// LLVM-LABEL: @_Z3fsbPb
+// LLVM: atomicrmw xchg ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+
+void atomicinit(void)
+{
+  _Atomic(unsigned int) j = 12;
+  __c11_atomic_init(&j, 1);
+}
+
+// CHECK-LABEL: @_Z10atomicinitv
+// CHECK: %[[ADDR:.*]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["j"
+// CHECK: cir.store {{.*}}, %[[ADDR]] : !u32i, !cir.ptr<!u32i>
+// CHECK: cir.store {{.*}}, %[[ADDR]] : !u32i, !cir.ptr<!u32i>
+
+// LLVM-LABEL: @_Z10atomicinitv
+// LLVM: %[[ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: store i32 12, ptr %[[ADDR]], align 4
+// LLVM: store i32 1, ptr %[[ADDR]], align 4
+
+void incdec() {
+  _Atomic(unsigned int) j = 12;
+  __c11_atomic_fetch_add(&j, 1, 0);
+  __c11_atomic_fetch_sub(&j, 1, 0);
+}
+
+// CHECK-LABEL: @_Z6incdecv
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!u32i>, {{.*}} : !u32i, relaxed) fetch_first
+// CHECK: cir.atomic.fetch(sub, {{.*}} : !cir.ptr<!u32i>, {{.*}} : !u32i, relaxed) fetch_first
+
+// LLVM-LABEL: @_Z6incdecv
+// LLVM: atomicrmw add ptr {{.*}}, i32 {{.*}} monotonic, align 4
+// LLVM: atomicrmw sub ptr {{.*}}, i32 {{.*}} monotonic, align 4
+
+void inc_int(int* a, int b) {
+  int c = __sync_fetch_and_add(a, b);
+}
+// CHECK-LABEL: @_Z7inc_int
+// CHECK: %[[PTR:.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[VAL:.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[RES:.*]] = cir.atomic.fetch(add, %[[PTR]] : !cir.ptr<!s32i>, %[[VAL]] : !s32i, seq_cst) fetch_first : !s32i
+// CHECK: cir.store %[[RES]], {{.*}} : !s32i, !cir.ptr<!s32i>
+
+// LLVM-LABEL: @_Z7inc_int
+// LLVM: atomicrmw add ptr {{.*}}, i32 {{.*}} seq_cst, align 4
+
+
+// CHECK-LABEL: @_Z8inc_long
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!s64i>, {{.*}} : !s64i, seq_cst) fetch_first : !s64i
+
+// LLVM-LABEL: @_Z8inc_long
+// LLVM: atomicrmw add ptr {{.*}}, i64 {{.*}} seq_cst, align 8
+
+void inc_long(long* a, long b) {
+  long c = __sync_fetch_and_add(a, 2);
+}
+
+// CHECK-LABEL: @_Z9inc_short
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!s16i>, {{.*}} : !s16i, seq_cst) fetch_first : !s16i
+
+// LLVM-LABEL: @_Z9inc_short
+// LLVM: atomicrmw add ptr {{.*}}, i16 {{.*}} seq_cst, align 2
+void inc_short(short* a, short b) {
+  short c = __sync_fetch_and_add(a, 2);
+}
+
+// CHECK-LABEL: @_Z8inc_byte
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!s8i>, {{.*}} : !s8i, seq_cst) fetch_first : !s8i
+
+// LLVM-LABEL: @_Z8inc_byte
+// LLVM: atomicrmw add ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+void inc_byte(char* a, char b) {
+  char c = __sync_fetch_and_add(a, b);
+}
+
+
+// CHECK-LABEL: @_Z12cmp_bool_int
+// CHECK: %[[PTR:.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[CMP:.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[UPD:.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[OLD:.*]], %[[RES:.*]] = cir.atomic.cmp_xchg(%[[PTR]] : !cir.ptr<!s32i>, %[[CMP]] : !s32i, %[[UPD]] : !s32i, success = seq_cst, failure = seq_cst) : (!s32i, !cir.bool)
+// CHECK: cir.store %[[RES]], {{.*}} : !cir.bool, !cir.ptr<!cir.bool>
+
+// LLVM-LABEL: @_Z12cmp_bool_int
+// LLVM: %[[PTR:.*]] = load ptr
+// LLVM: %[[CMP:.*]] = load i32
+// LLVM: %[[UPD:.*]] = load i32
+// LLVM: %[[RES:.*]] = cmpxchg ptr %[[PTR]], i32 %[[CMP]], i32 %[[UPD]] seq_cst seq_cst
+// LLVM: %[[TMP:.*]] = extractvalue { i32, i1 } %[[RES]], 1
+// LLVM: %[[EXT:.*]] = zext i1 %[[TMP]] to i8
+// LLVM: store i8 %[[EXT]], ptr {{.*}}
+void cmp_bool_int(int* p, int x, int u) {
+  bool r = __sync_bool_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z13cmp_bool_long
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s64i>, {{.*}} : !s64i, {{.*}} : !s64i, success = seq_cst, failure = seq_cst) : (!s64i, !cir.bool)
+
+// LLVM-LABEL: @_Z13cmp_bool_long
+// LLVM: cmpxchg ptr {{.*}}, i64 {{.*}}, i64 {{.*}} seq_cst seq_cst
+void cmp_bool_long(long* p, long x, long u) {
+  bool r = __sync_bool_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z14cmp_bool_short
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s16i>, {{.*}} : !s16i, {{.*}} : !s16i, success = seq_cst, failure = seq_cst) : (!s16i, !cir.bool)
+
+// LLVM-LABEL: @_Z14cmp_bool_short
+// LLVM: cmpxchg ptr {{.*}}, i16 {{.*}}, i16 {{.*}} seq_cst seq_cst
+void cmp_bool_short(short* p, short x, short u) {
+  bool r = __sync_bool_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z13cmp_bool_byte
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s8i>, {{.*}} : !s8i, {{.*}} : !s8i, success = seq_cst, failure = seq_cst) : (!s8i, !cir.bool)
+
+// LLVM-LABEL: @_Z13cmp_bool_byte
+// LLVM: cmpxchg ptr {{.*}}, i8 {{.*}}, i8 {{.*}} seq_cst seq_cst
+void cmp_bool_byte(char* p, char x, char u) {
+  bool r = __sync_bool_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z11cmp_val_int
+// CHECK: %[[PTR:.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[CMP:.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[UPD:.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[OLD:.*]], %[[RES:.*]] = cir.atomic.cmp_xchg(%[[PTR]] : !cir.ptr<!s32i>, %[[CMP]] : !s32i, %[[UPD]] : !s32i, success = seq_cst, failure = seq_cst) : (!s32i, !cir.bool)
+// CHECK: cir.store %[[OLD]], {{.*}} : !s32i, !cir.ptr<!s32i>
+
+// LLVM-LABEL: @_Z11cmp_val_int
+// LLVM: %[[PTR:.*]] = load ptr
+// LLVM: %[[CMP:.*]] = load i32
+// LLVM: %[[UPD:.*]] = load i32
+// LLVM: %[[RES:.*]] = cmpxchg ptr %[[PTR]], i32 %[[CMP]], i32 %[[UPD]] seq_cst seq_cst
+// LLVM: %[[TMP:.*]] = extractvalue { i32, i1 } %[[RES]], 0
+// LLVM: store i32 %[[TMP]], ptr {{.*}}
+void cmp_val_int(int* p, int x, int u) {
+  int r = __sync_val_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z12cmp_val_long
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s64i>, {{.*}} : !s64i, {{.*}} : !s64i, success = seq_cst, failure = seq_cst) : (!s64i, !cir.bool)
+
+// LLVM-LABEL: @_Z12cmp_val_long
+// LLVM: cmpxchg ptr {{.*}}, i64 {{.*}}, i64 {{.*}} seq_cst seq_cst
+void cmp_val_long(long* p, long x, long u) {
+  long r = __sync_val_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z13cmp_val_short
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s16i>, {{.*}} : !s16i, {{.*}} : !s16i, success = seq_cst, failure = seq_cst) : (!s16i, !cir.bool)
+
+// LLVM-LABEL: @_Z13cmp_val_short
+// LLVM: cmpxchg ptr {{.*}}, i16 {{.*}}, i16 {{.*}} seq_cst seq_cst
+void cmp_val_short(short* p, short x, short u) {
+  short r = __sync_val_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z12cmp_val_byte
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s8i>, {{.*}} : !s8i, {{.*}} : !s8i, success = seq_cst, failure = seq_cst) : (!s8i, !cir.bool)
+
+// LLVM-LABEL: @_Z12cmp_val_byte
+// LLVM: cmpxchg ptr {{.*}}, i8 {{.*}}, i8 {{.*}} seq_cst seq_cst
+void cmp_val_byte(char* p, char x, char u) {
+  char r = __sync_val_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z8inc_uint
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!u32i>, {{.*}} : !u32i, seq_cst) fetch_first : !u32i
+
+// LLVM-LABEL: @_Z8inc_uint
+// LLVM: atomicrmw add ptr {{.*}}, i32 {{.*}} seq_cst, align 4
+void inc_uint(unsigned int* a, int b) {
+  unsigned int c = __sync_fetch_and_add(a, b);
+}
+
+// CHECK-LABEL: @_Z9inc_ulong
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!u64i>, {{.*}} : !u64i, seq_cst) fetch_first : !u64i
+
+// LLVM-LABEL: @_Z9inc_ulong
+// LLVM: atomicrmw add ptr {{.*}}, i64 {{.*}} seq_cst, align 8
+void inc_ulong(unsigned long* a, long b) {
+  unsigned long c = __sync_fetch_and_add(a, b);
+}
+
+// CHECK-LABEL: @_Z9inc_uchar
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!u8i>, {{.*}} : !u8i, seq_cst) fetch_first : !u8i
+
+// LLVM-LABEL: @_Z9inc_uchar
+// LLVM: atomicrmw add ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+void inc_uchar(unsigned char* a, char b) {
+  unsigned char c = __sync_fetch_and_add(a, b);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/attribute-annotate-multiple.cpp b/clang/test/CIR/CodeGen/attribute-annotate-multiple.cpp
new file mode 100644
index 000000000000..ff970f3919f4
--- /dev/null
+++ b/clang/test/CIR/CodeGen/attribute-annotate-multiple.cpp
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+double *a __attribute__((annotate("withargs", "21", 12 )));
+int *b __attribute__((annotate("withargs", "21", 12 )));
+void *c __attribute__((annotate("noargvar")));
+
+enum : char { npu1 = 42};
+int tile __attribute__((annotate("cir.aie.device.tile", npu1))) = 7;
+
+void foo(int i) __attribute__((annotate("noargfunc"))) {
+}
+// redeclare with more annotate
+void foo(int i) __attribute__((annotate("withargfunc", "os", 23 )));
+void bar() __attribute__((annotate("withargfunc", "os", 22))) {
+}
+
+// BEFORE: module @{{.*}}attribute-annotate-multiple.cpp" attributes {cir.lang =
+
+// BEFORE: cir.global  external @a = #cir.ptr<null> : !cir.ptr<!cir.double>
+// BEFORE-SAME: [#cir.annotation<name = "withargs", args = ["21", 12 : i32]>]
+// BEFORE: cir.global  external @b = #cir.ptr<null> : !cir.ptr<!s32i>
+// BEFORE-SAME: [#cir.annotation<name = "withargs", args = ["21", 12 : i32]>]
+// BEFORE: cir.global  external @c = #cir.ptr<null> : !cir.ptr<!void>
+// BEFORE-SAME: [#cir.annotation<name = "noargvar", args = []>]
+// BEFORE: cir.global external @tile = #cir.int<7> : !s32i
+// BEFORE-SAME: #cir.annotation<name = "cir.aie.device.tile", args = [42 : i8]>]
+
+// BEFORE: cir.func  @_Z3fooi(%arg0: !s32i) [#cir.annotation<name = "noargfunc", args = []>,
+// BEFORE-SAME: #cir.annotation<name = "withargfunc", args = ["os", 23 : i32]>]
+// BEFORE: cir.func  @_Z3barv() [#cir.annotation<name = "withargfunc", args = ["os", 22 : i32]>]
+
+
+// AFTER: module {{.*}}attribute-annotate-multiple.cpp" attributes
+// AFTER-SAME: {cir.global_annotations = #cir<global_annotations [
+// AFTER-SAME: ["a", #cir.annotation<name = "withargs", args = ["21", 12 : i32]>],
+// AFTER-SAME: ["b", #cir.annotation<name = "withargs", args = ["21", 12 : i32]>],
+// AFTER-SAME: ["c", #cir.annotation<name = "noargvar", args = []>],
+// AFTER-SAME: ["tile", #cir.annotation<name = "cir.aie.device.tile", args = [42 : i8]>],
+// AFTER-SAME: ["_Z3fooi", #cir.annotation<name = "noargfunc", args = []>],
+// AFTER-SAME: ["_Z3fooi", #cir.annotation<name = "withargfunc", args = ["os", 23 : i32]>],
+// AFTER-SAME: ["_Z3barv", #cir.annotation<name = "withargfunc", args = ["os", 22 : i32]>]]>,
+
+
+// LLVM: @a = global ptr null
+// LLVM: @b = global ptr null
+// LLVM: @c = global ptr null
+// LLVM: @tile = global i32 7
+// LLVM: @.str.annotation = private unnamed_addr constant [9 x i8] c"withargs\00", section "llvm.metadata"
+// LLVM: @.str.1.annotation = private unnamed_addr constant [{{[0-9]+}} x i8] c"{{.*}}attribute-annotate-multiple.cpp\00", section "llvm.metadata"
+// LLVM: @.str.annotation.arg = private unnamed_addr constant [3 x i8] c"21\00", align 1
+// LLVM: @.args.annotation = private unnamed_addr constant { ptr, i32 } { ptr @.str.annotation.arg, i32 12 }, section "llvm.metadata"
+// LLVM: @.str.2.annotation = private unnamed_addr constant [9 x i8] c"noargvar\00", section "llvm.metadata"
+// LLVM: @.str.3.annotation = private unnamed_addr constant [20 x i8] c"cir.aie.device.tile\00", section "llvm.metadata"
+// LLVM: @.args.1.annotation = private unnamed_addr constant { i8 } { i8 42 }, section "llvm.metadata"
+// LLVM: @.str.4.annotation = private unnamed_addr constant [10 x i8] c"noargfunc\00", section "llvm.metadata"
+// LLVM: @.str.5.annotation = private unnamed_addr constant [12 x i8] c"withargfunc\00", section "llvm.metadata"
+// LLVM: @.str.1.annotation.arg = private unnamed_addr constant [3 x i8] c"os\00", align 1
+// LLVM: @.args.2.annotation = private unnamed_addr constant { ptr, i32 } 
+// LLVM-SAME: { ptr @.str.1.annotation.arg, i32 23 }, section "llvm.metadata"
+// LLVM: @.args.3.annotation = private unnamed_addr constant { ptr, i32 } 
+// LLVM-SAME: { ptr @.str.1.annotation.arg, i32 22 }, section "llvm.metadata"
+
+// LLVM: @llvm.global.annotations = appending global [7 x { ptr, ptr, ptr, i32, ptr }]
+// LLVM-SAME: [{ ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @a, ptr @.str.annotation, ptr @.str.1.annotation, i32 5, ptr @.args.annotation },
+// LLVM-SAME: { ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @b, ptr @.str.annotation, ptr @.str.1.annotation, i32 6, ptr @.args.annotation },
+// LLVM-SAME: { ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @c, ptr @.str.2.annotation, ptr @.str.1.annotation, i32 7, ptr null },
+// LLVM-SAME: { ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @tile, ptr @.str.3.annotation, ptr @.str.1.annotation, i32 10, ptr @.args.1.annotation },
+// LLVM-SAME: { ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @_Z3fooi, ptr @.str.4.annotation, ptr @.str.1.annotation, i32 12, ptr null },
+// LLVM-SAME: { ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @_Z3fooi, ptr @.str.5.annotation, ptr @.str.1.annotation, i32 12, ptr @.args.2.annotation },
+// LLVM-SAME: { ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @_Z3barv, ptr @.str.5.annotation, ptr @.str.1.annotation, i32 16, ptr @.args.3.annotation }],
+// LLVM-SAME: section "llvm.metadata"
+
+// LLVM: define dso_local void @_Z3fooi(i32 %0)
+// LLVM: define dso_local void @_Z3barv()
diff --git a/clang/test/CIR/CodeGen/attributes.c b/clang/test/CIR/CodeGen/attributes.c
new file mode 100644
index 000000000000..f80c479df45a
--- /dev/null
+++ b/clang/test/CIR/CodeGen/attributes.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o -  | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+extern int __attribute__((section(".shared"))) ext;
+int getExt() {
+  return ext;
+}
+// CIR:   cir.global "private" external @ext : !s32i {alignment = 4 : i64, section = ".shared"}
+// LLVM:  @ext = external global i32, section ".shared"
+
+int __attribute__((section(".shared"))) glob = 42;
+// CIR:   cir.global external @glob = #cir.int<42> : !s32i {alignment = 4 : i64, section = ".shared"}
+// LLVM:   @glob = global i32 42, section ".shared"
+
+
+void __attribute__((__visibility__("hidden"))) foo();
+// CIR: cir.func no_proto private hidden @foo(...)
+int bah()
+{
+  foo();
+  return 1;
+}
diff --git a/clang/test/CIR/CodeGen/basic.c b/clang/test/CIR/CodeGen/basic.c
new file mode 100644
index 000000000000..4fb5f6c6853c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/basic.c
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+int foo(int i);
+
+int foo(int i) {
+  i;
+  return i;
+}
+
+//      CIR: module @"{{.*}}basic.c" attributes {{{.*}}cir.lang = #cir.lang<c>
+// CIR-NEXT: cir.func @foo(%arg0: !s32i loc({{.*}})) -> !s32i
+// CIR-NEXT: %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CIR-NEXT: %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CIR-NEXT: cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.store %3, %1 : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.return %4 : !s32i
+
+int f2(void) { return 3; }
+
+// CIR: cir.func @f2() -> !s32i
+// CIR-NEXT: %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CIR-NEXT: %1 = cir.const #cir.int<3> : !s32i
+// CIR-NEXT: cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.return %2 : !s32i
+
+// LLVM: define dso_local i32 @f2()
+// LLVM-NEXT:  %1 = alloca i32, i64 1, align 4
+// LLVM-NEXT:  store i32 3, ptr %1, align 4
+// LLVM-NEXT:  %2 = load i32, ptr %1, align 4
+// LLVM-NEXT:  ret i32 %2
+
+
+
+int f3(void) {
+  int i = 3;
+  return i;
+}
+
+// CIR: cir.func @f3() -> !s32i
+// CIR-NEXT: %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CIR-NEXT: %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CIR-NEXT: %2 = cir.const #cir.int<3> : !s32i
+// CIR-NEXT: cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.return %4 : !s32i
diff --git a/clang/test/CIR/CodeGen/basic.cpp b/clang/test/CIR/CodeGen/basic.cpp
new file mode 100644
index 000000000000..8817f97dca10
--- /dev/null
+++ b/clang/test/CIR/CodeGen/basic.cpp
@@ -0,0 +1,182 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int *p0() {
+  int *p = nullptr;
+  return p;
+}
+
+// CHECK: cir.func @_Z2p0v() -> !cir.ptr<!s32i>
+// CHECK: %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["p", init]
+// CHECK: %2 = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CHECK: cir.store %2, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+
+int *p1() {
+  int *p;
+  p = nullptr;
+  return p;
+}
+
+// CHECK: cir.func @_Z2p1v() -> !cir.ptr<!s32i>
+// CHECK: %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["p"]
+// CHECK: %2 = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CHECK: cir.store %2, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+
+int *p2() {
+  int *p = nullptr;
+  {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }
+  *p = 42;
+  return p;
+}
+
+// CHECK: cir.func @_Z2p2v() -> !cir.ptr<!s32i>
+// CHECK-NEXT:  %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["__retval"] {alignment = 8 : i64}
+// CHECK-NEXT:  %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["p", init] {alignment = 8 : i64}
+// CHECK-NEXT:  %2 = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CHECK-NEXT:  cir.store %2, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-NEXT:  cir.scope {
+// CHECK-NEXT:    %7 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK-NEXT:    %8 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:    cir.store %8, %7 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:    cir.store %7, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-NEXT:    %9 = cir.const #cir.int<42> : !s32i
+// CHECK-NEXT:    %10 = cir.load deref %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:    cir.store %9, %10 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:  } loc(#[[locScope:loc[0-9]+]])
+// CHECK-NEXT:  %3 = cir.const #cir.int<42> : !s32i
+// CHECK-NEXT:  %4 = cir.load deref %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:  cir.store %3, %4 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:  %5 = cir.load %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:  cir.store %5, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-NEXT:  %6 = cir.load %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:  cir.return %6 : !cir.ptr<!s32i>
+
+void b0() { bool x = true, y = false; }
+
+// CHECK: cir.func @_Z2b0v()
+// CHECK: %2 = cir.const #true
+// CHECK: %3 = cir.const #false
+
+void b1(int a) { bool b = a; }
+
+// CHECK: cir.func @_Z2b1i(%arg0: !s32i loc({{.*}}))
+// CHECK: %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK: %3 = cir.cast(int_to_bool, %2 : !s32i), !cir.bool
+// CHECK: cir.store %3, %1 : !cir.bool, !cir.ptr<!cir.bool>
+
+void if0(int a) {
+  int x = 0;
+  if (a) {
+    x = 3;
+  } else {
+    x = 4;
+  }
+}
+
+// CHECK: cir.func @_Z3if0i(%arg0: !s32i loc({{.*}}))
+// CHECK: cir.scope {
+// CHECK:   %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:   %4 = cir.cast(int_to_bool, %3 : !s32i), !cir.bool
+// CHECK-NEXT:   cir.if %4 {
+// CHECK-NEXT:     %5 = cir.const #cir.int<3> : !s32i
+// CHECK-NEXT:     cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   } else {
+// CHECK-NEXT:     %5 = cir.const #cir.int<4> : !s32i
+// CHECK-NEXT:     cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   }
+// CHECK: }
+
+void if1(int a, bool b, bool c) {
+  int x = 0;
+  if (a) {
+    x = 3;
+    if (b) {
+      x = 8;
+    }
+  } else {
+    if (c) {
+      x = 14;
+    }
+    x = 4;
+  }
+}
+
+// CHECK: cir.func @_Z3if1ibb(%arg0: !s32i loc({{.*}}), %arg1: !cir.bool loc({{.*}}), %arg2: !cir.bool loc({{.*}}))
+// CHECK: cir.scope {
+// CHECK:   %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:   %6 = cir.cast(int_to_bool, %5 : !s32i), !cir.bool
+// CHECK:   cir.if %6 {
+// CHECK:     %7 = cir.const #cir.int<3> : !s32i
+// CHECK:     cir.store %7, %3 : !s32i, !cir.ptr<!s32i>
+// CHECK:     cir.scope {
+// CHECK:       %8 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT:       cir.if %8 {
+// CHECK-NEXT:         %9 = cir.const #cir.int<8> : !s32i
+// CHECK-NEXT:         cir.store %9, %3 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       }
+// CHECK:     }
+// CHECK:   } else {
+// CHECK:     cir.scope {
+// CHECK:       %8 = cir.load %2 : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT:       cir.if %8 {
+// CHECK-NEXT:         %9 = cir.const #cir.int<14> : !s32i
+// CHECK-NEXT:         cir.store %9, %3 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       }
+// CHECK:     }
+// CHECK:     %7 = cir.const #cir.int<4> : !s32i
+// CHECK:     cir.store %7, %3 : !s32i, !cir.ptr<!s32i>
+// CHECK:   }
+// CHECK: }
+
+enum {
+  um = 0,
+  dois = 1,
+}; // Do not crash!
+
+extern "C" {
+struct regs {
+  unsigned long sp;
+  unsigned long pc;
+};
+
+// Check it's not mangled.
+// CHECK: cir.func @use_regs()
+
+void use_regs() { regs r; }
+}
+
+void x() {
+  const bool b0 = true;
+  const bool b1 = false;
+}
+
+// CHECK: cir.func @_Z1xv()
+// CHECK:   %0 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["b0", init] {alignment = 1 : i64}
+// CHECK:   %1 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["b1", init] {alignment = 1 : i64}
+// CHECK:   %2 = cir.const #true
+// CHECK:   cir.store %2, %0 : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:   %3 = cir.const #false
+// CHECK:   cir.store %3, %1 : !cir.bool, !cir.ptr<!cir.bool>
+
+typedef unsigned long size_type;
+typedef unsigned long _Tp;
+
+size_type max_size() {
+  return size_type(~0) / sizeof(_Tp);
+}
+
+// CHECK: cir.func @_Z8max_sizev()
+// CHECK:   %0 = cir.alloca !u64i, !cir.ptr<!u64i>, ["__retval"] {alignment = 8 : i64}
+// CHECK:   %1 = cir.const #cir.int<0> : !s32i
+// CHECK:   %2 = cir.unary(not, %1) : !s32i, !s32i
+// CHECK:   %3 = cir.cast(integral, %2 : !s32i), !u64i
+// CHECK:   %4 = cir.const #cir.int<8> : !u64i
+// CHECK:   %5 = cir.binop(div, %3, %4) : !u64i
+
+// CHECK-DAG: #[[locScope]] = loc(fused[#[[locScopeA:loc[0-9]+]], #[[locScopeB:loc[0-9]+]]])
+// CHECK-DAG: #[[locScopeA]] = loc("{{.*}}basic.cpp":27:3)
+// CHECK-DAG: #[[locScopeB]] = loc("{{.*}}basic.cpp":31:3)
diff --git a/clang/test/CIR/CodeGen/bf16-ops.c b/clang/test/CIR/CodeGen/bf16-ops.c
new file mode 100644
index 000000000000..479be9980546
--- /dev/null
+++ b/clang/test/CIR/CodeGen/bf16-ops.c
@@ -0,0 +1,1640 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefix=NONATIVE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16 -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefix=NATIVE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefix=NONATIVE-LLVM %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16 -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefix=NATIVE-LLVM %s
+
+volatile unsigned test;
+volatile int i0;
+volatile __bf16 h0 = 0.0, h1 = 1.0, h2;
+volatile float f0, f1, f2;
+volatile double d0;
+short s0;
+
+void foo(void) {
+  test = (h0);
+  // NONATIVE: %{{.+}} = cir.cast(float_to_int, %{{.+}} : !cir.bf16), !u32i
+  // NATIVE: %{{.+}} = cir.cast(float_to_int, %{{.+}} : !cir.bf16), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fptoui bfloat %{{.+}} to i32
+  // NATIVE-LLVM: %{{.+}} = fptoui bfloat %{{.+}} to i32
+
+  h0 = (test);
+  // NONATIVE: %{{.+}} = cir.cast(int_to_float, %{{.+}} : !u32i), !cir.bf16
+  // NATIVE: %{{.+}} = cir.cast(int_to_float, %{{.+}} : !u32i), !cir.bf16
+
+  // NONATIVE-LLVM: %{{.+}} = uitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = uitofp i32 %{{.+}} to bfloat
+
+  test = (!h1);
+  //      NONATIVE: %[[#A:]] = cir.cast(float_to_bool, %{{.+}} : !cir.bf16), !cir.bool
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(not, %[[#A]]) : !cir.bool, !cir.bool
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(float_to_bool, %{{.+}} : !cir.bf16), !cir.bool
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(not, %[[#A]]) : !cir.bool, !cir.bool
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#A:]] = fcmp une bfloat %{{.+}}, 0xR0000
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = zext i1 %[[#A]] to i8
+  // NONATIVE-LLVM-NEXT: %[[#C:]] = xor i8 %[[#B]], 1
+  // NONATIVE-LLVM-NEXT: %{{.+}} = zext i8 %[[#C]] to i32
+
+  //      NATIVE-LLVM: %[[#A:]] = fcmp une bfloat %{{.+}}, 0xR0000
+  // NATIVE-LLVM-NEXT: %[[#B:]] = zext i1 %[[#A]] to i8
+  // NATIVE-LLVM-NEXT: %[[#C:]] = xor i8 %[[#B]], 1
+  // NATIVE-LLVM-NEXT: %{{.+}} = zext i8 %[[#C]] to i32
+
+  h1 = -h1;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //  NATIVE-NOT: %{{.+}} = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //  NATIVE-NOT: %{{.+}} = cir.cast(floating, %{{.+}} : !cir.float), !cir.bf16
+  //      NATIVE: %{{.+}} = cir.unary(minus, %{{.+}}) : !cir.bf16, !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fneg float %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fneg bfloat %{{.+}}
+
+  h1 = +h1;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(plus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //  NATIVE-NOT: %{{.+}} = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //  NATIVE-NOT: %{{.+}} = cir.cast(floating, %{{.+}} : !cir.float), !cir.bf16
+  //      NATIVE: %{{.+}} = cir.unary(plus, %{{.+}}) : !cir.bf16, !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#A]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h1, align 2
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#A]], ptr @h1, align 2
+
+  h1++;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.bf16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  // NONATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xR3F80
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xR3F80
+
+  ++h1;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.bf16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  // NONATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xR3F80
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xR3F80
+
+  --h1;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.bf16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  // NONATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xRBF80
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xRBF80
+
+  h1--;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.bf16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  // NONATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xRBF80
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xRBF80
+
+  h1 = h0 * h2;
+  //      NONATIVE: %[[#LHS:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#RHS:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHS]], %[[#RHS]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#A]] : !cir.float), !cir.bf16
+
+  // NATIVE: %{{.+}} = cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fmul bfloat %{{.+}}, %{{.+}}
+
+  h1 = h0 * (__bf16) -2.0f;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.unary(minus, %[[#B]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(mul, %[[#A]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#F]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(mul, %{{.+}}, %[[#C]]) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fmul float %[[#A]], -2.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fmul bfloat %{{.+}}, 0xRC000
+
+  h1 = h0 * f2;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NATIVE-LLVM: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = f0 * h2;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = h0 * i0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(mul, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#D]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fpext bfloat %[[#RHS]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fmul bfloat %{{.+}}, %[[#A]]
+
+  h1 = (h0 / h2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(div, %[[#A]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#C]] : !cir.float), !cir.bf16
+
+  // NATIVE: %{{.+}} = cir.binop(div, %{{.+}}, %{{.+}}) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fdiv bfloat %{{.+}}, %{{.+}}
+
+  h1 = (h0 / (__bf16) -2.0f);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.unary(minus, %[[#B]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(div, %[[#A]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#F]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(div, %{{.+}}, %[[#C]]) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fdiv float %[[#A]], -2.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fdiv bfloat %{{.+}}, 0xRC000
+
+  h1 = (h0 / f2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NATIVE-LLVM: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = (f0 / h2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = (h0 / i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(div, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#D]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fpext bfloat %[[#RHS]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fdiv bfloat %{{.+}}, %[[#A]]
+
+  h1 = (h2 + h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(add, %[[#A]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#C]] : !cir.float), !cir.bf16
+
+  // NATIVE: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, %{{.+}}
+
+  h1 = ((__bf16)-2.0 + h0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#E:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(add, %[[#D]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#F]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.bf16
+  //      NATIVE: %{{.+}} = cir.binop(add, %[[#C]], %{{.+}}) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fadd float -2.000000e+00, %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat 0xRC000, %{{.+}}
+
+  h1 = (h2 + f0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = load volatile float, ptr @f0, align 4
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RHS:]] = load volatile float, ptr @f0, align 4
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = (f2 + h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = (h0 + i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(add, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#D]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS_INT:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = fpext bfloat %[[#RHS_INT]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fadd bfloat %{{.+}}, %[[#A]]
+
+  h1 = (h2 - h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#C]] : !cir.float), !cir.bf16
+
+  // NATIVE: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fsub bfloat %{{.+}}, %{{.+}}
+
+  h1 = ((__bf16)-2.0f - h0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#E:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(sub, %[[#D]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#F]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  //      NATIVE: %{{.+}} = cir.binop(sub, %[[#C]], %{{.+}}) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fsub float -2.000000e+00, %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fsub bfloat 0xRC000, %{{.+}}
+
+  h1 = (h2 - f0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = load volatile float, ptr @f0, align 4
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RHS:]] = load volatile float, ptr @f0, align 4
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = (f2 - h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = (h0 - i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(sub, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#D]] : !cir.float), !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS_INT:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = fpext bfloat %[[#RHS_INT]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fsub bfloat %{{.+}}, %[[#A]]
+
+  test = (h2 < h0);
+  //      NONATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt bfloat %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp olt bfloat %{{.+}}, %{{.+}}
+
+  test = (h2 < (__bf16)42.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt bfloat %{{.+}}, 0xR4228
+
+  // NATIVE-LLVM: %{{.+}} = fcmp olt bfloat %{{.+}}, 0xR4228
+
+  test = (h2 < f0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp olt float %[[#LHS]], %{{.+}}
+
+  test = (f2 < h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp olt float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp olt float %{{.+}}, %[[#RHS]]
+
+  test = (i0 < h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt bfloat %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fcmp olt bfloat %[[#LHS]], %{{.+}}
+
+  test = (h0 < i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp olt bfloat %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp olt bfloat %{{.+}}, %[[#RHS]]
+
+  test = (h0 > h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt bfloat %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt bfloat %{{.+}}, %{{.+}}
+
+  test = ((__bf16)42.0 > h2);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  //      NONATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  //      NATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt bfloat 0xR4228, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt bfloat 0xR4228, %{{.+}}
+
+  test = (h0 > f2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt float %[[#LHS]], %{{.+}}
+
+  test = (f0 > h2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt float %{{.+}}, %[[#RHS]]
+
+  // NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt float %{{.+}}, %[[#RHS]]
+
+  test = (i0 > h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt bfloat %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt bfloat %[[#LHS]], %{{.+}}
+
+  test = (h0 > i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt bfloat %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt bfloat %{{.+}}, %[[#RHS]]
+
+  test = (h2 <= h0);
+  //      NONATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole bfloat %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ole bfloat %{{.+}}, %{{.+}}
+
+  test = (h2 <= (__bf16)42.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole bfloat %{{.+}}, 0xR4228
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ole bfloat %{{.+}}, 0xR4228
+
+  test = (h2 <= f0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp ole float %[[#LHS]], %{{.+}}
+
+  test = (f2 <= h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ole float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ole float %{{.+}}, %[[#RHS]]
+
+  test = (i0 <= h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole bfloat %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fcmp ole bfloat %[[#LHS]], %{{.+}}
+
+  test = (h0 <= i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ole bfloat %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ole bfloat %{{.+}}, %[[#RHS]]
+
+  test = (h0 >= h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  // NONATIVE-NEXT: %{{.+}} = cir.get_global @test : !cir.ptr<!u32i>
+
+  //      NATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge bfloat %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oge bfloat %{{.+}}, %{{.+}}
+
+  test = (h0 >= (__bf16)-2.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#D]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.bf16
+  // NATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#D]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge bfloat %{{.+}}, 0xRC000
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oge bfloat %{{.+}}, 0xRC000
+
+  test = (h0 >= f2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp oge float %[[#LHS]], %{{.+}}
+
+  test = (f0 >= h2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge float %{{.+}}, %[[#RHS]]
+
+  // NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp oge float %{{.+}}, %[[#RHS]]
+
+  test = (i0 >= h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge bfloat %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fcmp oge bfloat %[[#LHS]], %{{.+}}
+
+  test = (h0 >= i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oge bfloat %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oge bfloat %{{.+}}, %[[#RHS]]
+
+  test = (h1 == h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %{{.+}}, %{{.+}}
+
+  test = (h1 == (__bf16)1.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %{{.+}}, 0xR3F80
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %{{.+}}, 0xR3F80
+
+  test = (h1 == f1);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq float %[[#A]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq float %[[#A]], %{{.+}}
+
+  test = (f1 == h1);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq float %{{.+}}, %[[#RHS]]
+
+  test = (i0 == h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %[[#LHS]], %{{.+}}
+
+  test = (h0 == i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq bfloat %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq bfloat %{{.+}}, %[[#RHS]]
+
+  test = (h1 != h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp une bfloat %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp une bfloat %{{.+}}, %{{.+}}
+
+  test = (h1 != (__bf16)1.0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.double), !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(ne, %{{.+}}, %[[#B]]) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp une bfloat %{{.+}}, 0xR3F80
+
+  // NATIVE-LLVM: %{{.+}} = fcmp une bfloat %{{.+}}, 0xR3F80
+
+  test = (h1 != f1);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp une float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp une float %[[#LHS]], %{{.+}}
+
+  test = (f1 != h1);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp une float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp une float %{{.+}}, %[[#RHS]]
+
+  test = (i0 != h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM: %{{.+}} = fcmp une bfloat %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fcmp une bfloat %[[#LHS]], %{{.+}}
+
+  test = (h0 != i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.bf16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp une bfloat %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp une bfloat %{{.+}}, %[[#RHS]]
+
+  h1 = (h1 ? h2 : h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(float_to_bool, %{{.+}} : !cir.bf16), !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.ternary(%[[#A]], true {
+  //      NONATIVE:   cir.yield %{{.+}} : !cir.bf16
+  // NONATIVE-NEXT: }, false {
+  //      NONATIVE:   cir.yield %{{.+}} : !cir.bf16
+  // NONATIVE-NEXT: }) : (!cir.bool) -> !cir.bf16
+  //      NONATIVE: %{{.+}} = cir.get_global @h1 : !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(float_to_bool, %{{.+}} : !cir.bf16), !cir.bool
+  // NATIVE-NEXT: %[[#B:]] = cir.ternary(%[[#A]], true {
+  //      NATIVE:   cir.yield %{{.+}} : !cir.bf16
+  // NATIVE-NEXT: }, false {
+  //      NATIVE:   cir.yield %{{.+}} : !cir.bf16
+  // NATIVE-NEXT: }) : (!cir.bool) -> !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.get_global @h1 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %[[#C]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM:   %[[#A:]] = fcmp une bfloat %{{.+}}, 0xR0000
+  // NONATIVE-LLVM-NEXT:   br i1 %[[#A]], label %[[#LABEL_A:]], label %[[#LABEL_B:]]
+  //      NONATIVE-LLVM: [[#LABEL_A]]:
+  // NONATIVE-LLVM-NEXT:   %[[#B:]] = load volatile bfloat, ptr @h2, align 2
+  // NONATIVE-LLVM-NEXT:   br label %[[#LABEL_C:]]
+  //      NONATIVE-LLVM: [[#LABEL_B]]:
+  // NONATIVE-LLVM-NEXT:   %[[#C:]] = load volatile bfloat, ptr @h0, align 2
+  // NONATIVE-LLVM-NEXT:   br label %[[#LABEL_C]]
+  //      NONATIVE-LLVM: [[#LABEL_C]]:
+  // NONATIVE-LLVM-NEXT:   %{{.+}} = phi bfloat [ %[[#C]], %[[#LABEL_B]] ], [ %[[#B]], %[[#LABEL_A]] ]
+
+  //      NATIVE-LLVM:   %[[#A:]] = fcmp une bfloat %{{.+}}, 0xR0000
+  // NATIVE-LLVM-NEXT:   br i1 %[[#A]], label %[[#LABEL_A:]], label %[[#LABEL_B:]]
+  //      NATIVE-LLVM: [[#LABEL_A]]:
+  // NATIVE-LLVM-NEXT:   %[[#B:]] = load volatile bfloat, ptr @h2, align 2
+  // NATIVE-LLVM-NEXT:   br label %[[#LABEL_C:]]
+  //      NATIVE-LLVM: [[#LABEL_B]]:
+  // NATIVE-LLVM-NEXT:   %[[#C:]] = load volatile bfloat, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT:   br label %[[#LABEL_C]]
+  //      NATIVE-LLVM: [[#LABEL_C]]:
+  // NATIVE-LLVM-NEXT:   %{{.+}} = phi bfloat [ %[[#C]], %[[#LABEL_B]] ], [ %[[#B]], %[[#LABEL_A]] ]
+
+  h0 = h1;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h1 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile %[[#B]], %[[#C]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h1 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %[[#C]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h1, align 2
+  // NONATIVE-LLVM-NEXT: store volatile bfloat %[[#A]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h1, align 2
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#A]], ptr @h0, align 2
+
+  h0 = (__bf16)-2.0f;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  // NONATIVE-LLVM: store volatile bfloat 0xRC000, ptr @h0, align 2
+
+  // NATIVE-LLVM: store volatile bfloat 0xRC000, ptr @h0, align 2
+
+  h0 = f0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @f0 : !cir.ptr<!cir.float>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.float>, !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @f0 : !cir.ptr<!cir.float>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.float>, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile float, ptr @f0, align 4
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc float %[[#A]] to bfloat
+  // NONATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile float, ptr @f0, align 4
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptrunc float %[[#A]] to bfloat
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  h0 = i0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!s32i>, !s32i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(int_to_float, %[[#B]] : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!s32i>, !s32i
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(int_to_float, %[[#B]] : !s32i), !cir.bf16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile i32, ptr @i0, align 4
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = sitofp i32 %[[#A]] to bfloat
+  // NONATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile i32, ptr @i0, align 4
+  // NATIVE-LLVM-NEXT: %[[#B:]] = sitofp i32 %[[#A]] to bfloat
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  i0 = h0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(float_to_int, %[[#B]] : !cir.bf16), !s32i
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(float_to_int, %[[#B]] : !cir.bf16), !s32i
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !s32i, !cir.ptr<!s32i>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h0, align 2
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptosi bfloat %[[#A]] to i32
+  // NONATIVE-LLVM-NEXT: store volatile i32 %[[#B]], ptr @i0, align 4
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptosi bfloat %[[#A]] to i32
+  // NATIVE-LLVM-NEXT: store volatile i32 %[[#B]], ptr @i0, align 4
+
+  h0 += h1;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(add, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#A]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, %{{.+}}
+
+  h0 += (__bf16)1.0f;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#D:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.binop(add, %[[#D]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.cast(floating, %[[#E]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#F]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.float), !cir.bf16
+  //      NATIVE: %[[#C:]] = cir.binop(add, %{{.+}}, %[[#B]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fadd float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xR3F80
+
+  h0 += f2;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fadd float %[[#A]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fadd float %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  i0 += h0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(add, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(float_to_int, %[[#C]] : !cir.float), !s32i
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(float_to_int, %[[#B]] : !cir.bf16), !s32i
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %[[#A:]] = fadd bfloat %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi bfloat %[[#A]] to i32
+
+  h0 += i0;
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(add, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = fpext bfloat %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, %[[#RHS]]
+
+  h0 -= h1;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(sub, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#A]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fsub bfloat %{{.+}}, %{{.+}}
+
+  h0 -= (__bf16)1.0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.double), !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(sub, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  //      NATIVE: %[[#C:]] = cir.binop(sub, %{{.+}}, %[[#B]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fsub float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fsub bfloat %{{.+}}, 0xR3F80
+
+  h0 -= f2;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fsub float %[[#A]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fsub float %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  i0 -= h0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(sub, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(float_to_int, %[[#C]] : !cir.float), !s32i
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(float_to_int, %[[#B]] : !cir.bf16), !s32i
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %[[#A:]] = fsub bfloat %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi bfloat %[[#A]] to i32
+
+  h0 -= i0;
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(sub, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = fpext bfloat %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fsub bfloat %{{.+}}, %[[#RHS]]
+
+  h0 *= h1;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(mul, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#A]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fmul bfloat %{{.+}}, %{{.+}}
+
+  h0 *= (__bf16)1.0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.double), !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(mul, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  //      NATIVE: %[[#C:]] = cir.binop(mul, %{{.+}}, %[[#B]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fmul float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fmul bfloat %{{.+}}, 0xR3F80
+
+  h0 *= f2;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fmul float %[[#A]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fmul float %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  i0 *= h0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(mul, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(float_to_int, %[[#C]] : !cir.float), !s32i
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(float_to_int, %[[#B]] : !cir.bf16), !s32i
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %[[#A:]] = fmul bfloat %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi bfloat %[[#A]] to i32
+
+  h0 *= i0;
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(mul, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = fpext bfloat %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fmul bfloat %{{.+}}, %[[#RHS]]
+
+  h0 /= h1;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(div, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(div, %{{.+}}, %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#A]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fdiv bfloat %{{.+}}, %{{.+}}
+
+  h0 /= (__bf16)1.0;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#D:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.binop(div, %[[#D]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.cast(floating, %[[#E]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#F]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.bf16
+  //      NATIVE: %[[#C:]] = cir.binop(div, %{{.+}}, %[[#B]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fdiv float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fdiv bfloat %{{.+}}, 0xR3F80
+
+  h0 /= f2;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fdiv float %[[#A]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fdiv float %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  i0 /= h0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(div, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(float_to_int, %[[#C]] : !cir.float), !s32i
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(float_to_int, %[[#B]] : !cir.bf16), !s32i
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %[[#A:]] = fdiv bfloat %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi bfloat %[[#A]] to i32
+
+  h0 /= i0;
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.bf16), !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast(floating, %{{.+}} : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(div, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = fpext bfloat %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fdiv bfloat %{{.+}}, %[[#RHS]]
+
+  h0 = d0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.bf16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to bfloat
+  // NONATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to bfloat
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  h0 = (float)d0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.bf16
+  // NONATIVE-NEXT: %[[#E:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.float
+  // NATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.bf16
+  // NATIVE-NEXT: %[[#E:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to float
+  // NONATIVE-LLVM-NEXT: %[[#C:]] = fptrunc float %[[#B]] to bfloat
+  // NONATIVE-LLVM-NEXT: store volatile bfloat %[[#C]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to float
+  // NATIVE-LLVM-NEXT: %[[#C:]] = fptrunc float %[[#B]] to bfloat
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#C]], ptr @h0, align 2
+
+  d0 = h0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.bf16), !cir.double
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.bf16), !cir.double
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h0, align 2
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fpext bfloat %[[#A]] to double
+  // NONATIVE-LLVM-NEXT: store volatile double %[[#B]], ptr @d0, align 8
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fpext bfloat %[[#A]] to double
+  // NATIVE-LLVM-NEXT: store volatile double %[[#B]], ptr @d0, align 8
+
+  d0 = (float)h0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.bf16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.double
+  // NONATIVE-NEXT: %[[#E:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.bf16), !cir.float
+  // NATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.double
+  // NATIVE-NEXT: %[[#E:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h0, align 2
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fpext bfloat %[[#A]] to float
+  // NONATIVE-LLVM-NEXT: %[[#C:]] = fpext float %[[#B]] to double
+  // NONATIVE-LLVM-NEXT: store volatile double %[[#C]], ptr @d0, align 8
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fpext bfloat %[[#A]] to float
+  // NATIVE-LLVM-NEXT: %[[#C:]] = fpext float %[[#B]] to double
+  // NATIVE-LLVM-NEXT: store volatile double %[[#C]], ptr @d0, align 8
+
+  h0 = s0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @s0 : !cir.ptr<!s16i>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load %[[#A]] : !cir.ptr<!s16i>, !s16i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(int_to_float, %[[#B]] : !s16i), !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @s0 : !cir.ptr<!s16i>
+  // NATIVE-NEXT: %[[#B:]] = cir.load %[[#A]] : !cir.ptr<!s16i>, !s16i
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(int_to_float, %[[#B]] : !s16i), !cir.bf16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load i16, ptr @s0, align 2
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = sitofp i16 %[[#A]] to bfloat
+  // NONATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load i16, ptr @s0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = sitofp i16 %[[#A]] to bfloat
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+}
diff --git a/clang/test/CIR/CodeGen/binassign.cpp b/clang/test/CIR/CodeGen/binassign.cpp
new file mode 100644
index 000000000000..3e09281072e2
--- /dev/null
+++ b/clang/test/CIR/CodeGen/binassign.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int foo(int a, int b) {
+  int x = a * b;
+  x *= b;
+  x /= b;
+  x %= b;
+  x += b;
+  x -= b;
+  x >>= b;
+  x <<= b;
+  x &= b;
+  x ^= b;
+  x |= b;
+  return x;
+}
+
+// CHECK: [[Value:%[0-9]+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK: = cir.binop(mul,
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(mul,
+// CHECK: cir.store {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: cir.binop(div,
+// CHECK: cir.store {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(rem,  {{.*}} loc([[SourceLocation:#loc[0-9]+]])
+// CHECK: cir.store {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(add,
+// CHECK: cir.store {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(sub,
+// CHECK: cir.store {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.shift( right
+// CHECK: cir.store {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.shift(left
+// CHECK: cir.store {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(and,
+// CHECK: cir.store {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(xor,
+// CHECK: cir.store {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(or,
+// CHECK: cir.store {{.*}}[[Value]]
+
+typedef enum {
+  A = 3,
+} enumy;
+
+enumy getty();
+
+void exec() {
+  enumy r;
+  if ((r = getty()) < 0) {}
+}
+
+// CHECK: cir.func @_Z4execv()
+// CHECK:   %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["r"] {alignment = 4 : i64}
+// CHECK:   cir.scope {
+// CHECK:     %1 = cir.call @_Z5gettyv() : () -> !u32i
+// CHECK:     cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+// CHECK:     %2 = cir.cast(integral, %1 : !u32i), !s32i
+// CHECK:     %3 = cir.const #cir.int<0> : !s32i
+// CHECK:     %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+// CHECK:     cir.if %4 {
+
+// CHECK: [[SourceLocationB:#loc[0-9]+]] = loc("{{.*}}binassign.cpp":8:8)
+// CHECK: [[SourceLocationA:#loc[0-9]+]] = loc("{{.*}}binassign.cpp":8:3)
+// CHECK: [[SourceLocation]] = loc(fused[[[SourceLocationA]], [[SourceLocationB]]])
diff --git a/clang/test/CIR/CodeGen/binop.c b/clang/test/CIR/CodeGen/binop.c
new file mode 100644
index 000000000000..280fd29b067f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/binop.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void conditionalResultIimplicitCast(int a, int b, float f) {
+  // Should implicit cast back to int.
+  int x = a && b;
+  // CHECK: %[[#INT:]] = cir.ternary
+  // CHECK: %{{.+}} = cir.cast(bool_to_int, %[[#INT]] : !cir.bool), !s32i
+  float y = f && f;
+  // CHECK: %[[#BOOL:]] = cir.ternary
+  // CHECK: %[[#INT:]] = cir.cast(bool_to_int, %[[#BOOL]] : !cir.bool), !s32i
+  // CHECK: %{{.+}} = cir.cast(int_to_float, %[[#INT]] : !s32i), !cir.float
+}
diff --git a/clang/test/CIR/CodeGen/binop.cpp b/clang/test/CIR/CodeGen/binop.cpp
new file mode 100644
index 000000000000..ce68d5a4e9b3
--- /dev/null
+++ b/clang/test/CIR/CodeGen/binop.cpp
@@ -0,0 +1,118 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -O1 -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void b0(int a, int b) {
+  int x = a * b;
+  x = x / b;
+  x = x % b;
+  x = x + b;
+  x = x - b;
+  x = x >> b;
+  x = x << b;
+  x = x & b;
+  x = x ^ b;
+  x = x | b;
+}
+
+// CHECK: = cir.binop(mul, %3, %4) nsw : !s32i
+// CHECK: = cir.binop(div, %6, %7) : !s32i
+// CHECK: = cir.binop(rem, %9, %10) : !s32i
+// CHECK: = cir.binop(add, %12, %13) nsw : !s32i
+// CHECK: = cir.binop(sub, %15, %16) nsw : !s32i
+// CHECK: = cir.shift( right, %18 : !s32i, %19 : !s32i) -> !s32i
+// CHECK: = cir.shift(left, %21 : !s32i, %22 : !s32i) -> !s32i
+// CHECK: = cir.binop(and, %24, %25) : !s32i
+// CHECK: = cir.binop(xor, %27, %28) : !s32i
+// CHECK: = cir.binop(or, %30, %31) : !s32i
+
+void b1(bool a, bool b) {
+  bool x = a && b;
+  x = x || b;
+}
+
+// CHECK: cir.ternary(%3, true
+// CHECK-NEXT: %7 = cir.load %1
+// CHECK-NEXT: cir.yield %7
+// CHECK-NEXT: false {
+// CHECK-NEXT: cir.const #false
+// CHECK-NEXT: cir.yield
+
+// CHECK: cir.ternary(%5, true
+// CHECK-NEXT: cir.const #true
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: false {
+// CHECK-NEXT: %7 = cir.load %1
+// CHECK-NEXT: cir.yield
+
+void b2(bool a) {
+ bool x = 0 && a;
+ x = 1 && a;
+ x = 0 || a;
+ x = 1 || a;
+}
+
+// CHECK: %0 = cir.alloca {{.*}} ["a", init]
+// CHECK: %1 = cir.alloca {{.*}} ["x", init]
+// CHECK: %2 = cir.const #false
+// CHECK-NEXT: cir.store %2, %1
+// CHECK-NEXT: %3 = cir.load %0
+// CHECK-NEXT: cir.store %3, %1
+// CHECK-NEXT: %4 = cir.load %0
+// CHECK-NEXT: cir.store %4, %1
+// CHECK-NEXT: %5 = cir.const #true
+// CHECK-NEXT: cir.store %5, %1
+
+void b3(int a, int b, int c, int d) {
+  bool x = (a == b) && (c == d);
+  x = (a == b) || (c == d);
+}
+
+// CHECK: %0 = cir.alloca {{.*}} ["a", init]
+// CHECK-NEXT: %1 = cir.alloca {{.*}} ["b", init]
+// CHECK-NEXT: %2 = cir.alloca {{.*}} ["c", init]
+// CHECK-NEXT: %3 = cir.alloca {{.*}} ["d", init]
+// CHECK-NEXT: %4 = cir.alloca {{.*}} ["x", init]
+// CHECK: %5 = cir.load %0
+// CHECK-NEXT: %6 = cir.load %1
+// CHECK-NEXT: %7 = cir.cmp(eq, %5, %6)
+// CHECK-NEXT: cir.ternary(%7, true
+// CHECK-NEXT: %13 = cir.load %2
+// CHECK-NEXT: %14 = cir.load %3
+// CHECK-NEXT: %15 = cir.cmp(eq, %13, %14)
+// CHECK-NEXT: cir.yield %15
+// CHECK-NEXT: }, false {
+// CHECK-NEXT: %13 = cir.const #false
+// CHECK-NEXT: cir.yield %13
+
+void testFloatingPointBinOps(float a, float b) {
+  a * b;
+  // CHECK: cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.float
+  a / b;
+  // CHECK: cir.binop(div, %{{.+}}, %{{.+}}) : !cir.float
+  a + b;
+  // CHECK: cir.binop(add, %{{.+}}, %{{.+}}) : !cir.float
+  a - b;
+  // CHECK: cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.float
+}
+
+struct S {};
+
+struct HasOpEq
+{
+  bool operator==(const S& other);
+};
+
+void rewritten_binop()
+{
+  HasOpEq s1;
+  S s2;
+  if (s1 != s2)
+    return;
+}
+
+// CHECK-LABEL: _Z15rewritten_binopv
+// CHECK:   cir.scope {
+// CHECK:     cir.call @_ZN7HasOpEqeqERK1S
+// CHECK:     %[[COND:.*]] = cir.unary(not
+// CHECK:     cir.if %[[COND]]
+// CHECK:       cir.return
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/bitfield-union.c b/clang/test/CIR/CodeGen/bitfield-union.c
new file mode 100644
index 000000000000..882e2ad9249a
--- /dev/null
+++ b/clang/test/CIR/CodeGen/bitfield-union.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void main() {
+    union demo {
+        int x;
+        int y : 4;
+        int z : 8;
+    };
+    union demo d;
+    d.x = 1;
+    d.y = 2;
+    d.z = 0;
+}
+
+// CHECK: !ty_demo = !cir.struct<union "demo" {!cir.int<s, 32>, !cir.int<u, 8>, !cir.int<u, 8>}>
+// CHECK: #bfi_y = #cir.bitfield_info<name = "y", storage_type = !u8i, size = 4, offset = 0, is_signed = true>
+// CHECK: #bfi_z = #cir.bitfield_info<name = "z", storage_type = !u8i, size = 8, offset = 0, is_signed = true>
+
+//   cir.func no_proto @main() extra(#fn_attr) {
+//     %0 = cir.alloca !ty_demo, !cir.ptr<!ty_demo>, ["d"] {alignment = 4 : i64}
+//     %1 = cir.const #cir.int<1> : !s32i
+//     %2 = cir.get_member %0[0] {name = "x"} : !cir.ptr<!ty_demo> -> !cir.ptr<!s32i>
+//     cir.store %1, %2 : !s32i, !cir.ptr<!s32i>
+//     %3 = cir.const #cir.int<2> : !s32i
+//     %4 = cir.cast(bitcast, %0 : !cir.ptr<!ty_demo>), !cir.ptr<!u8i>
+//     %5 = cir.set_bitfield(#bfi_y, %4 : !cir.ptr<!u8i>, %3 : !s32i) -> !s32i
+//     %6 = cir.const #cir.int<0> : !s32i loc(#loc10)
+//     %7 = cir.cast(bitcast, %0 : !cir.ptr<!ty_demo>), !cir.ptr<!u8i>
+//     %8 = cir.set_bitfield(#bfi_z, %7 : !cir.ptr<!u8i>, %6 : !s32i) -> !s32i
+//     cir.return
+//   }
diff --git a/clang/test/CIR/CodeGen/bitfields.c b/clang/test/CIR/CodeGen/bitfields.c
new file mode 100644
index 000000000000..5d6a0295a2dc
--- /dev/null
+++ b/clang/test/CIR/CodeGen/bitfields.c
@@ -0,0 +1,131 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct __long {
+  struct __attribute__((__packed__)) {
+      unsigned __is_long_ : 1;
+      unsigned __cap_ : sizeof(unsigned) * 8 - 1;
+  };
+  unsigned __size_;
+  unsigned *__data_;
+};
+
+void m() {
+  struct __long l;
+}
+
+typedef struct {
+  int a : 4;
+  int b : 5;
+  int c;
+} D;
+
+typedef struct {
+  int a : 4;
+  int b : 27;
+  int c : 17;
+  int d : 2;
+  int e : 15;
+  unsigned f; // type other than int above, not a bitfield
+} S;
+
+typedef struct {
+  int a : 3;  // one bitfield with size < 8
+  unsigned b;
+} T;
+
+typedef struct {
+    char a;
+    char b;
+    char c;
+
+    // startOffset 24 bits, new storage from here
+    int d: 2;
+    int e: 2;
+    int f: 4;
+    int g: 25;
+    int h: 3;
+    int i: 4;
+    int j: 3;
+    int k: 8;
+
+    int l: 14; // need to be a part of the new storage
+               // because (tail - startOffset) is 65 after 'l' field
+} U;
+
+// CHECK: !ty_D = !cir.struct<struct "D" {!cir.int<u, 16>, !cir.int<s, 32>}>
+// CHECK: !ty_T = !cir.struct<struct "T" {!cir.int<u, 8>, !cir.int<u, 32>} #cir.record.decl.ast>
+// CHECK: !ty_anon2E0_ = !cir.struct<struct "anon.0" {!cir.int<u, 32>} #cir.record.decl.ast>
+// CHECK: !ty_anon_struct = !cir.struct<struct  {!cir.int<u, 8>, !cir.int<u, 8>, !cir.int<s, 32>}>
+// CHECK: #bfi_a = #cir.bitfield_info<name = "a", storage_type = !u8i, size = 3, offset = 0, is_signed = true>
+// CHECK: #bfi_e = #cir.bitfield_info<name = "e", storage_type = !u16i, size = 15, offset = 0, is_signed = true>
+// CHECK: !ty_S = !cir.struct<struct "S" {!cir.int<u, 32>, !cir.array<!cir.int<u, 8> x 3>, !cir.int<u, 16>, !cir.int<u, 32>}>
+// CHECK: !ty_U = !cir.struct<struct "U" {!cir.int<s, 8>, !cir.int<s, 8>, !cir.int<s, 8>, !cir.array<!cir.int<u, 8> x 9>}>
+// CHECK: !ty___long = !cir.struct<struct "__long" {!cir.struct<struct "anon.0" {!cir.int<u, 32>} #cir.record.decl.ast>, !cir.int<u, 32>, !cir.ptr<!cir.int<u, 32>>}>
+// CHECK: #bfi_d = #cir.bitfield_info<name = "d", storage_type = !cir.array<!u8i x 3>, size = 2, offset = 17, is_signed = true>
+
+// CHECK: cir.func {{.*@store_field}}
+// CHECK:   [[TMP0:%.*]] = cir.alloca !ty_S, !cir.ptr<!ty_S>
+// CHECK:   [[TMP1:%.*]] = cir.const #cir.int<3> : !s32i
+// CHECK:   [[TMP2:%.*]] = cir.get_member [[TMP0]][2] {name = "e"} : !cir.ptr<!ty_S> -> !cir.ptr<!u16i>
+// CHECK:   cir.set_bitfield(#bfi_e, [[TMP2]] : !cir.ptr<!u16i>, [[TMP1]] : !s32i)
+void store_field() {
+  S s;
+  s.e = 3;
+}
+
+// CHECK: cir.func {{.*@load_field}}
+// CHECK:   [[TMP0:%.*]] = cir.alloca !cir.ptr<!ty_S>, !cir.ptr<!cir.ptr<!ty_S>>, ["s", init]
+// CHECK:   [[TMP1:%.*]] = cir.load [[TMP0]] : !cir.ptr<!cir.ptr<!ty_S>>, !cir.ptr<!ty_S>
+// CHECK:   [[TMP2:%.*]] = cir.get_member [[TMP1]][1] {name = "d"} : !cir.ptr<!ty_S> -> !cir.ptr<!cir.array<!u8i x 3>>
+// CHECK:   [[TMP3:%.*]] = cir.get_bitfield(#bfi_d, [[TMP2]] : !cir.ptr<!cir.array<!u8i x 3>>) -> !s32i
+int load_field(S* s) {
+  return s->d;
+}
+
+// CHECK: cir.func {{.*@unOp}}
+// CHECK:   [[TMP0:%.*]] = cir.get_member {{.*}}[1] {name = "d"} : !cir.ptr<!ty_S> -> !cir.ptr<!cir.array<!u8i x 3>>
+// CHECK:   [[TMP1:%.*]] = cir.get_bitfield(#bfi_d, [[TMP0]] : !cir.ptr<!cir.array<!u8i x 3>>) -> !s32i
+// CHECK:   [[TMP2:%.*]] = cir.unary(inc, [[TMP1]]) : !s32i, !s32i
+// CHECK:   cir.set_bitfield(#bfi_d, [[TMP0]] : !cir.ptr<!cir.array<!u8i x 3>>, [[TMP2]] : !s32i)
+void unOp(S* s) {
+  s->d++;
+}
+
+// CHECK: cir.func {{.*@binOp}}
+// CHECK:   [[TMP0:%.*]] = cir.const #cir.int<42> : !s32i
+// CHECK:   [[TMP1:%.*]] = cir.get_member {{.*}}[1] {name = "d"} : !cir.ptr<!ty_S> -> !cir.ptr<!cir.array<!u8i x 3>>
+// CHECK:   [[TMP2:%.*]] = cir.get_bitfield(#bfi_d, [[TMP1]] : !cir.ptr<!cir.array<!u8i x 3>>) -> !s32i
+// CHECK:   [[TMP3:%.*]] = cir.binop(or, [[TMP2]], [[TMP0]]) : !s32i
+// CHECK:   cir.set_bitfield(#bfi_d, [[TMP1]] : !cir.ptr<!cir.array<!u8i x 3>>, [[TMP3]] : !s32i)
+void binOp(S* s) {
+   s->d |= 42;
+}
+
+
+// CHECK: cir.func {{.*@load_non_bitfield}}
+// CHECK:   cir.get_member {{%.}}[3] {name = "f"} : !cir.ptr<!ty_S> -> !cir.ptr<!u32i>
+unsigned load_non_bitfield(S *s) {
+  return s->f;
+}
+
+// just create a usage of T type
+// CHECK: cir.func {{.*@load_one_bitfield}}
+int load_one_bitfield(T* t) {
+  return t->a;
+}
+
+// CHECK: cir.func {{.*@createU}}
+void createU() {
+  U u;
+}
+
+// for this struct type we create an anon structure with different storage types in initialization
+// CHECK: cir.func {{.*@createD}}
+// CHECK:   %0 = cir.alloca !ty_D, !cir.ptr<!ty_D>, ["d"] {alignment = 4 : i64}
+// CHECK:   %1 = cir.cast(bitcast, %0 : !cir.ptr<!ty_D>), !cir.ptr<!ty_anon_struct>
+// CHECK:   %2 = cir.const #cir.const_struct<{#cir.int<33> : !u8i, #cir.int<0> : !u8i, #cir.int<3> : !s32i}> : !ty_anon_struct
+// CHECK:   cir.store %2, %1 : !ty_anon_struct, !cir.ptr<!ty_anon_struct>
+void createD() {
+  D d = {1,2,3};
+}
diff --git a/clang/test/CIR/CodeGen/bitfields.cpp b/clang/test/CIR/CodeGen/bitfields.cpp
new file mode 100644
index 000000000000..7ed8aff25cd9
--- /dev/null
+++ b/clang/test/CIR/CodeGen/bitfields.cpp
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct __long {
+  struct __attribute__((__packed__)) {
+      unsigned __is_long_ : 1;
+      unsigned __cap_ : sizeof(unsigned) * 8 - 1;
+  };
+  unsigned __size_;
+  unsigned *__data_;
+};
+
+void m() {
+  __long l;
+}
+
+typedef struct {
+  int a : 4;
+  int b : 27;
+  int c : 17;
+  int d : 2;
+  int e : 15;
+  unsigned f; // type other than int above, not a bitfield
+} S;
+
+typedef struct {
+  int a : 3;  // one bitfield with size < 8
+  unsigned b;
+} T;
+// CHECK: !ty_T = !cir.struct<struct "T" {!cir.int<u, 8>, !cir.int<u, 32>} #cir.record.decl.ast>
+// CHECK: !ty_anon2E0_ = !cir.struct<struct "anon.0" {!cir.int<u, 32>} #cir.record.decl.ast>
+// CHECK: !ty_S = !cir.struct<struct "S" {!cir.int<u, 32>, !cir.array<!cir.int<u, 8> x 3>, !cir.int<u, 16>, !cir.int<u, 32>}>
+// CHECK: !ty___long = !cir.struct<struct "__long" {!cir.struct<struct "anon.0" {!cir.int<u, 32>} #cir.record.decl.ast>, !cir.int<u, 32>, !cir.ptr<!cir.int<u, 32>>}>
+
+// CHECK: cir.func @_Z11store_field
+// CHECK:   [[TMP0:%.*]] = cir.alloca !ty_S, !cir.ptr<!ty_S>
+// CHECK:   [[TMP1:%.*]] = cir.const #cir.int<3> : !s32i
+// CHECK:   [[TMP2:%.*]] = cir.cast(bitcast, [[TMP0]] : !cir.ptr<!ty_S>), !cir.ptr<!u32i>
+// CHECK:   cir.set_bitfield(#bfi_a, [[TMP2]] : !cir.ptr<!u32i>, [[TMP1]] : !s32i)
+void store_field() {
+  S s;
+  s.a = 3;
+}
+
+// CHECK: cir.func @_Z10load_field
+// CHECK:   [[TMP0:%.*]] = cir.alloca !cir.ptr<!ty_S>, !cir.ptr<!cir.ptr<!ty_S>>, ["s", init]
+// CHECK:   [[TMP1:%.*]] = cir.load [[TMP0]] : !cir.ptr<!cir.ptr<!ty_S>>, !cir.ptr<!ty_S>
+// CHECK:   [[TMP2:%.*]] = cir.get_member [[TMP1]][1] {name = "d"} : !cir.ptr<!ty_S> -> !cir.ptr<!cir.array<!u8i x 3>>
+// CHECK:   [[TMP3:%.*]] = cir.get_bitfield(#bfi_d, [[TMP2]] : !cir.ptr<!cir.array<!u8i x 3>>) -> !s32i
+int load_field(S& s) {
+  return s.d;
+}
+
+// CHECK: cir.func @_Z17load_non_bitfield
+// CHECK:   cir.get_member {{%.}}[3] {name = "f"} : !cir.ptr<!ty_S> -> !cir.ptr<!u32i>
+unsigned load_non_bitfield(S& s) {
+  return s.f;
+}
+
+// just create a usage of T type
+// CHECK: cir.func @_Z17load_one_bitfield
+int load_one_bitfield(T& t) {
+  return t.a;
+}
diff --git a/clang/test/CIR/CodeGen/bitfields_be.c b/clang/test/CIR/CodeGen/bitfields_be.c
new file mode 100644
index 000000000000..9063a33fdd8d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/bitfields_be.c
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -triple aarch64_be-unknown-linux-gnu -emit-llvm %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=LLVM
+
+// RUN: %clang_cc1 -triple aarch64_be-unknown-linux-gnu -fclangir -emit-llvm %s -o %t1.cir
+// RUN: FileCheck --input-file=%t1.cir %s 
+
+typedef struct {
+    int a : 4;
+    int b : 11;
+    int c : 17;
+} S;
+
+void init(S* s) {
+    s->a = -4;
+    s->b = 42;
+    s->c = -12345;
+}
+
+// field 'a'
+// LLVM:   %[[PTR0:.*]] = load ptr
+// CHECK:  %[[PTR0:.*]] = load ptr
+// LLVM:   %[[VAL0:.*]] = load i32, ptr %[[PTR0]]
+// CHECK:  %[[VAL0:.*]] = load i32, ptr %[[PTR0]]
+// LLVM:   %[[AND0:.*]] = and i32 %[[VAL0]], 268435455
+// CHECK:  %[[AND0:.*]] = and i32 %[[VAL0]], 268435455
+// LLVM:   %[[OR0:.*]] = or i32 %[[AND0]], -1073741824
+// CHECK:  %[[OR0:.*]] = or i32 %[[AND0]], -1073741824
+// LLVM:   store i32 %[[OR0]], ptr %[[PTR0]]
+// CHECK:  store i32 %[[OR0]], ptr %[[PTR0]]
+
+// field 'b'
+// LLVM:   %[[PTR1:.*]] = load ptr
+// CHECK:  %[[PTR1:.*]] = load ptr
+// LLVM:   %[[VAL1:.*]] = load i32, ptr %[[PTR1]]
+// CHECK:  %[[VAL1:.*]] = load i32, ptr %[[PTR1]]
+// LLVM:   %[[AND1:.*]] = and i32 %[[VAL1]], -268304385
+// CHECK:  %[[AND1:.*]] = and i32 %[[VAL1]], -268304385
+// LLVM:   %[[OR1:.*]] = or i32 %[[AND1]], 5505024
+// CHECK:  %[[OR1:.*]] = or i32 %[[AND1]], 5505024
+// LLVM:   store i32 %[[OR1]], ptr %[[PTR1]]
+// CHECK:  store i32 %[[OR1]], ptr %[[PTR1]]
+
+// field 'c'
+// LLVM:   %[[PTR2:.*]] = load ptr
+// CHECK:  %[[PTR2:.*]] = load ptr
+// LLVM:   %[[VAL2:.*]] = load i32, ptr %[[PTR2]]
+// CHECK:  %[[VAL2:.*]] = load i32, ptr %[[PTR2]]
+// LLVM:   %[[AND2:.*]] = and i32 %[[VAL2]], -131072
+// CHECK:  %[[AND2:.*]] = and i32 %[[VAL2]], -131072
+// LLVM:   %[[OR2:.*]] = or i32 %[[AND2]], 118727
+// CHECK:  %[[OR2:.*]] = or i32 %[[AND2]], 118727
+// LLVM:   store i32 %[[OR2]], ptr %[[PTR2]]
+// CHECK:  store i32 %[[OR2]], ptr %[[PTR2]]
+
diff --git a/clang/test/CIR/CodeGen/bitint.c b/clang/test/CIR/CodeGen/bitint.c
new file mode 100644
index 000000000000..176339c81af3
--- /dev/null
+++ b/clang/test/CIR/CodeGen/bitint.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void VLATest(_BitInt(3) A, _BitInt(42) B, _BitInt(17) C) {
+  int AR1[A];
+  int AR2[B];
+  int AR3[C];
+}
+
+//      CHECK: cir.func @VLATest
+//      CHECK:   %[[#A:]] = cir.load %{{.+}} : !cir.ptr<!cir.int<s, 3>>, !cir.int<s, 3>
+// CHECK-NEXT:   %[[#A_PROMOTED:]] = cir.cast(integral, %[[#A]] : !cir.int<s, 3>), !u64i
+// CHECK-NEXT:   %[[#SP:]] = cir.stack_save : !cir.ptr<!u8i>
+// CHECK-NEXT:   cir.store %[[#SP]], %{{.+}} : !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>
+// CHECK-NEXT:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, %[[#A_PROMOTED]] : !u64i
+// CHECK-NEXT:   %[[#B:]] = cir.load %1 : !cir.ptr<!cir.int<s, 42>>, !cir.int<s, 42>
+// CHECK-NEXT:   %[[#B_PROMOTED:]] = cir.cast(integral, %[[#B]] : !cir.int<s, 42>), !u64i
+// CHECK-NEXT:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, %[[#B_PROMOTED]] : !u64i
+// CHECK-NEXT:   %[[#C:]] = cir.load %2 : !cir.ptr<!cir.int<s, 17>>, !cir.int<s, 17>
+// CHECK-NEXT:   %[[#C_PROMOTED:]] = cir.cast(integral, %[[#C]] : !cir.int<s, 17>), !u64i
+// CHECK-NEXT:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, %[[#C_PROMOTED]] : !u64i
+//      CHECK: }
diff --git a/clang/test/CIR/CodeGen/bitint.cpp b/clang/test/CIR/CodeGen/bitint.cpp
new file mode 100644
index 000000000000..7f7c85ed268c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/bitint.cpp
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+using i10 = signed _BitInt(10);
+using u10 = unsigned _BitInt(10);
+
+unsigned _BitInt(1) GlobSize1 = 0;
+// CHECK: cir.global external @GlobSize1 = #cir.int<0> : !cir.int<u, 1>
+
+i10 test_signed(i10 arg) {
+  return arg;
+}
+
+// CHECK: cir.func @_Z11test_signedDB10_(%arg0: !cir.int<s, 10> loc({{.*}}) -> !cir.int<s, 10>
+// CHECK: }
+
+u10 test_unsigned(u10 arg) {
+  return arg;
+}
+
+// CHECK: cir.func @_Z13test_unsignedDU10_(%arg0: !cir.int<u, 10> loc({{.*}}) -> !cir.int<u, 10>
+// CHECK: }
+
+i10 test_init() {
+  return 42;
+}
+
+//      CHECK: cir.func @_Z9test_initv() -> !cir.int<s, 10>
+//      CHECK:   %[[#LITERAL:]] = cir.const #cir.int<42> : !s32i
+// CHECK-NEXT:   %{{.+}} = cir.cast(integral, %[[#LITERAL]] : !s32i), !cir.int<s, 10>
+//      CHECK: }
+
+void test_init_for_mem() {
+  i10 x = 42;
+}
+
+//      CHECK: cir.func @_Z17test_init_for_memv()
+//      CHECK:   %[[#LITERAL:]] = cir.const #cir.int<42> : !s32i
+// CHECK-NEXT:   %[[#INIT:]] = cir.cast(integral, %[[#LITERAL]] : !s32i), !cir.int<s, 10>
+// CHECK-NEXT:   cir.store %[[#INIT]], %{{.+}} : !cir.int<s, 10>, !cir.ptr<!cir.int<s, 10>>
+//      CHECK: }
+
+i10 test_arith(i10 lhs, i10 rhs) {
+  return lhs + rhs;
+}
+
+//      CHECK: cir.func @_Z10test_arithDB10_S_(%arg0: !cir.int<s, 10> loc({{.+}}), %arg1: !cir.int<s, 10> loc({{.+}})) -> !cir.int<s, 10>
+//      CHECK:   %[[#LHS:]] = cir.load %{{.+}} : !cir.ptr<!cir.int<s, 10>>, !cir.int<s, 10>
+// CHECK-NEXT:   %[[#RHS:]] = cir.load %{{.+}} : !cir.ptr<!cir.int<s, 10>>, !cir.int<s, 10>
+// CHECK-NEXT:   %{{.+}} = cir.binop(add, %[[#LHS]], %[[#RHS]]) nsw : !cir.int<s, 10>
+//      CHECK: }
+
+void Size1ExtIntParam(unsigned _BitInt(1) A) {
+  unsigned _BitInt(1) B[5];
+  B[2] = A;
+}
+
+//      CHECK: cir.func @_Z16Size1ExtIntParamDU1_
+//      CHECK:   %[[#A:]] = cir.load %{{.+}} : !cir.ptr<!cir.int<u, 1>>, !cir.int<u, 1>
+// CHECK-NEXT:   %[[#IDX:]] = cir.const #cir.int<2> : !s32i
+// CHECK-NEXT:   %[[#ARRAY:]] = cir.cast(array_to_ptrdecay, %1 : !cir.ptr<!cir.array<!cir.int<u, 1> x 5>>), !cir.ptr<!cir.int<u, 1>>
+// CHECK-NEXT:   %[[#PTR:]] = cir.ptr_stride(%[[#ARRAY]] : !cir.ptr<!cir.int<u, 1>>, %[[#IDX]] : !s32i), !cir.ptr<!cir.int<u, 1>>
+// CHECK-NEXT:   cir.store %[[#A]], %[[#PTR]] : !cir.int<u, 1>, !cir.ptr<!cir.int<u, 1>>
+//      CHECK: }
+
+struct S {
+  _BitInt(17) A;
+  _BitInt(10) B;
+  _BitInt(17) C;
+};
+
+void OffsetOfTest(void) {
+  int A = __builtin_offsetof(struct S,A);
+  int B = __builtin_offsetof(struct S,B);
+  int C = __builtin_offsetof(struct S,C);
+}
+
+// CHECK: cir.func @_Z12OffsetOfTestv()
+// CHECK:   %{{.+}} = cir.const #cir.int<0> : !u64i
+// CHECK:   %{{.+}} = cir.const #cir.int<4> : !u64i
+// CHECK:   %{{.+}} = cir.const #cir.int<8> : !u64i
+// CHECK: }
+
+_BitInt(2) ParamPassing(_BitInt(15) a, _BitInt(31) b) {}
+
+// CHECK: cir.func @_Z12ParamPassingDB15_DB31_(%arg0: !cir.int<s, 15> loc({{.+}}), %arg1: !cir.int<s, 31> loc({{.+}})) -> !cir.int<s, 2>
diff --git a/clang/test/CIR/CodeGen/bool.c b/clang/test/CIR/CodeGen/bool.c
new file mode 100644
index 000000000000..c31b04f27c27
--- /dev/null
+++ b/clang/test/CIR/CodeGen/bool.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include <stdbool.h>
+
+typedef struct {
+  bool x;
+} S;
+
+// CHECK:  cir.func @init_bool
+// CHECK:    [[ALLOC:%.*]] = cir.alloca !ty_S, !cir.ptr<!ty_S>
+// CHECK:    [[ZERO:%.*]] = cir.const #cir.zero : !ty_S
+// CHECK:    cir.store [[ZERO]], [[ALLOC]] : !ty_S, !cir.ptr<!ty_S>
+void init_bool(void) {
+  S s = {0};
+}
+
+// CHECK:  cir.func @store_bool
+// CHECK:    [[TMP0:%.*]] = cir.alloca !cir.ptr<!ty_S>, !cir.ptr<!cir.ptr<!ty_S>>
+// CHECK:    cir.store %arg0, [[TMP0]] : !cir.ptr<!ty_S>, !cir.ptr<!cir.ptr<!ty_S>>
+// CHECK:    [[TMP1:%.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:    [[TMP2:%.*]] = cir.cast(int_to_bool, [[TMP1]] : !s32i), !cir.bool
+// CHECK:    [[TMP3:%.*]] = cir.load [[TMP0]] : !cir.ptr<!cir.ptr<!ty_S>>, !cir.ptr<!ty_S>
+// CHECK:    [[TMP4:%.*]] = cir.get_member [[TMP3]][0] {name = "x"} : !cir.ptr<!ty_S> -> !cir.ptr<!cir.bool>
+// CHECK:    cir.store [[TMP2]], [[TMP4]] : !cir.bool, !cir.ptr<!cir.bool>
+void store_bool(S *s) {
+  s->x = false;
+}
+
+// CHECK:  cir.func @load_bool
+// CHECK:    [[TMP0:%.*]] = cir.alloca !cir.ptr<!ty_S>, !cir.ptr<!cir.ptr<!ty_S>>, ["s", init] {alignment = 8 : i64}
+// CHECK:    [[TMP1:%.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["x", init] {alignment = 1 : i64}
+// CHECK:    cir.store %arg0, [[TMP0]] : !cir.ptr<!ty_S>, !cir.ptr<!cir.ptr<!ty_S>>
+// CHECK:    [[TMP2:%.*]] = cir.load [[TMP0]] : !cir.ptr<!cir.ptr<!ty_S>>, !cir.ptr<!ty_S>
+// CHECK:    [[TMP3:%.*]] = cir.get_member [[TMP2]][0] {name = "x"} : !cir.ptr<!ty_S> -> !cir.ptr<!cir.bool>
+// CHECK:    [[TMP4:%.*]] = cir.load [[TMP3]] : !cir.ptr<!cir.bool>, !cir.bool
+void load_bool(S *s) {
+  bool x = s->x;
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/bswap.cpp b/clang/test/CIR/CodeGen/bswap.cpp
new file mode 100644
index 000000000000..66a6ccf3ffec
--- /dev/null
+++ b/clang/test/CIR/CodeGen/bswap.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+using u16 = unsigned short;
+using u32 = unsigned int;
+using u64 = unsigned long long;
+
+u16 bswap_u16(u16 x) {
+  return __builtin_bswap16(x);
+}
+
+// CHECK: cir.func @_Z9bswap_u16t
+// CHECK:   %{{.+}} = cir.bswap(%{{.+}} : !u16i) : !u16i
+// CHECK: }
+
+u32 bswap_u32(u32 x) {
+  return __builtin_bswap32(x);
+}
+
+// CHECK: cir.func @_Z9bswap_u32j
+// CHECK:   %{{.+}} = cir.bswap(%{{.+}} : !u32i) : !u32i
+// CHECK: }
+
+u64 bswap_u64(u64 x) {
+  return __builtin_bswap64(x);
+}
+
+// CHECK: cir.func @_Z9bswap_u64y
+// CHECK:   %{{.+}} = cir.bswap(%{{.+}} : !u64i) : !u64i
+// CHECK: }
diff --git a/clang/test/CIR/CodeGen/build-deferred.cpp b/clang/test/CIR/CodeGen/build-deferred.cpp
new file mode 100644
index 000000000000..f62d8ddc5c99
--- /dev/null
+++ b/clang/test/CIR/CodeGen/build-deferred.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fclangir-build-deferred-threshold=0 %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+class String {
+  char *storage{nullptr};
+  long size;
+  long capacity;
+
+public:
+  String() : size{0} {}
+  String(int size) : size{size} {}
+  String(const char *s) {}
+};
+
+void test() {
+  String s1{};
+  String s2{1};
+  String s3{"abcdefghijklmnop"};
+}
+
+// CHECK-NOT: cir.func linkonce_odr @_ZN6StringC2Ev
+// CHECK-NOT: cir.func linkonce_odr @_ZN6StringC2Ei
+// CHECK-NOT: cir.func linkonce_odr @_ZN6StringC2EPKc
+// CHECK-NOT: cir.func linkonce_odr @_ZN6StringC1EPKc
+
+// CHECK: cir.func @_Z4testv()
+// CHECK:   cir.call @_ZN6StringC1Ev(%0) : (!cir.ptr<!ty_String>) -> ()
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/builtin-alloca.c b/clang/test/CIR/CodeGen/builtin-alloca.c
new file mode 100644
index 000000000000..3aa6b04bbeb9
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-alloca.c
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+typedef __SIZE_TYPE__ size_t;
+void *alloca(size_t size);
+void *_alloca(size_t size);
+
+void my_alloca(size_t n)
+{
+  int *c1 = alloca(n);
+}
+// CIR:       cir.func @my_alloca([[ALLOCA_SIZE:%.*]]: !u64i
+// CIR:       cir.store [[ALLOCA_SIZE]], [[LOCAL_VAR_ALLOCA_SIZE:%.*]] : !u64i, !cir.ptr<!u64i>
+// CIR:       [[TMP_ALLOCA_SIZE:%.*]] = cir.load [[LOCAL_VAR_ALLOCA_SIZE]] : !cir.ptr<!u64i>, !u64i
+// CIR:       [[ALLOCA_RES:%.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, [[TMP_ALLOCA_SIZE]] : !u64i, ["bi_alloca"] {alignment = 16 : i64}
+// CIR-NEXT:  cir.cast(bitcast, [[ALLOCA_RES]] : !cir.ptr<!u8i>), !cir.ptr<!void>
+// CIR: }
+
+
+// LLVM:       define dso_local void @my_alloca(i64 [[ALLOCA_SIZE:%.*]])
+// LLVM:       store i64 [[ALLOCA_SIZE]], ptr [[LOCAL_VAR_ALLOCA_SIZE:%.*]],
+// LLVM:       [[TMP_ALLOCA_SIZE:%.*]] =  load i64, ptr [[LOCAL_VAR_ALLOCA_SIZE]],
+// LLVM:       [[ALLOCA_RES:%.*]] = alloca i8, i64 [[TMP_ALLOCA_SIZE]], align 16
+// LLVM: }
+
+void my___builtin_alloca(size_t n)
+{
+  int *c1 = (int *)__builtin_alloca(n);
+}
+
+// CIR:       cir.func @my___builtin_alloca([[ALLOCA_SIZE:%.*]]: !u64i
+// CIR:       cir.store [[ALLOCA_SIZE]], [[LOCAL_VAR_ALLOCA_SIZE:%.*]] : !u64i, !cir.ptr<!u64i>
+// CIR:       [[TMP_ALLOCA_SIZE:%.*]] = cir.load [[LOCAL_VAR_ALLOCA_SIZE]] : !cir.ptr<!u64i>, !u64i
+// CIR:       [[ALLOCA_RES:%.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, [[TMP_ALLOCA_SIZE]] : !u64i, ["bi_alloca"] {alignment = 16 : i64}
+// CIR-NEXT:  cir.cast(bitcast, [[ALLOCA_RES]] : !cir.ptr<!u8i>), !cir.ptr<!void>
+// CIR: }
+
+
+// LLVM:       define dso_local void @my___builtin_alloca(i64 [[ALLOCA_SIZE:%.*]])
+// LLVM:       store i64 [[ALLOCA_SIZE]], ptr [[LOCAL_VAR_ALLOCA_SIZE:%.*]],
+// LLVM:       [[TMP_ALLOCA_SIZE:%.*]] =  load i64, ptr [[LOCAL_VAR_ALLOCA_SIZE]],
+// LLVM:       [[ALLOCA_RES:%.*]] = alloca i8, i64 [[TMP_ALLOCA_SIZE]], align 16
+// LLVM: }
+
+void my__builtin_alloca_uninitialized(size_t n)
+{
+  int *c1 = (int *)__builtin_alloca_uninitialized(n);
+}
+
+// CIR:       cir.func @my__builtin_alloca_uninitialized([[ALLOCA_SIZE:%.*]]: !u64i
+// CIR:       cir.store [[ALLOCA_SIZE]], [[LOCAL_VAR_ALLOCA_SIZE:%.*]] : !u64i, !cir.ptr<!u64i>
+// CIR:       [[TMP_ALLOCA_SIZE:%.*]] = cir.load [[LOCAL_VAR_ALLOCA_SIZE]] : !cir.ptr<!u64i>, !u64i
+// CIR:       [[ALLOCA_RES:%.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, [[TMP_ALLOCA_SIZE]] : !u64i, ["bi_alloca"] {alignment = 16 : i64}
+// CIR-NEXT:  cir.cast(bitcast, [[ALLOCA_RES]] : !cir.ptr<!u8i>), !cir.ptr<!void>
+// CIR: }
+
+
+// LLVM:       define dso_local void @my__builtin_alloca_uninitialized(i64 [[ALLOCA_SIZE:%.*]])
+// LLVM:       store i64 [[ALLOCA_SIZE]], ptr [[LOCAL_VAR_ALLOCA_SIZE:%.*]],
+// LLVM:       [[TMP_ALLOCA_SIZE:%.*]] =  load i64, ptr [[LOCAL_VAR_ALLOCA_SIZE]],
+// LLVM:       [[ALLOCA_RES:%.*]] = alloca i8, i64 [[TMP_ALLOCA_SIZE]], align 16
+// LLVM: }
diff --git a/clang/test/CIR/CodeGen/builtin-arm-ldrex.c b/clang/test/CIR/CodeGen/builtin-arm-ldrex.c
new file mode 100644
index 000000000000..0ced1b088139
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-arm-ldrex.c
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir -emit-cir -target-feature +neon %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+struct twoFldT {
+  char a, b;
+};
+// CIR: !ty_twoFldT = !cir.struct<struct "twoFldT" {!cir.int<s, 8>, !cir.int<s, 8>}
+int test_ldrex(char *addr, long long *addr64, float *addrfloat) {
+// CIR-LABEL: @test_ldrex
+  int sum = 0;
+  sum += __builtin_arm_ldrex(addr);
+// CIR: [[INTRES0:%.*]] = cir.llvm.intrinsic "llvm.aarch64.ldxr" {{%[0-9]+}} : (!cir.ptr<!s8i>) -> !s64i 
+// CIR: [[CAST0:%.*]] = cir.cast(integral, [[INTRES0]] : !s64i), !s8i 
+// CIR: [[CAST_I32:%.*]] = cir.cast(integral, [[CAST0]] : !s8i), !s32i
+
+  sum += __builtin_arm_ldrex((short *)addr);
+// CIR: [[INTRES1:%.*]] = cir.llvm.intrinsic "llvm.aarch64.ldxr" {{%[0-9]+}} : (!cir.ptr<!s16i>) -> !s64i
+// CIR: [[CAST1:%.*]] = cir.cast(integral, [[INTRES1]] : !s64i), !s16i 
+// CIR: [[CAST_I16:%.*]] = cir.cast(integral, [[CAST1]] : !s16i), !s32i
+
+  sum += __builtin_arm_ldrex((int *)addr);
+// CIR: [[INTRES2:%.*]] = cir.llvm.intrinsic "llvm.aarch64.ldxr" {{%[0-9]+}} : (!cir.ptr<!s32i>) -> !s64i
+// CIR: [[CAST2:%.*]] = cir.cast(integral, [[INTRES2]] : !s64i), !s32i
+
+  sum += __builtin_arm_ldrex((long long *)addr);
+// CIR: [[INTRES3:%.*]] = cir.llvm.intrinsic "llvm.aarch64.ldxr" {{%[0-9]+}} : (!cir.ptr<!s64i>) -> !s64i
+
+  sum += __builtin_arm_ldrex(addr64);
+// CIR: [[INTRES4:%.*]] = cir.llvm.intrinsic "llvm.aarch64.ldxr" {{%[0-9]+}} : (!cir.ptr<!s64i>) -> !s64i
+
+
+  sum += *__builtin_arm_ldrex((int **)addr);
+// CIR: [[INTRES5:%.*]] = cir.llvm.intrinsic "llvm.aarch64.ldxr"  {{%[0-9]+}} : (!cir.ptr<!cir.ptr<!s32i>>) -> !s64i
+
+  sum += __builtin_arm_ldrex((struct twoFldT **)addr)->a;
+// CIR: [[INTRES6:%.*]] = cir.llvm.intrinsic "llvm.aarch64.ldxr"  {{%[0-9]+}} : (!cir.ptr<!cir.ptr<!ty_twoFldT>>) -> !s64i
+// CIR: [[CAST3:%.*]] = cir.cast(int_to_ptr, [[INTRES6]] : !s64i), !cir.ptr<!ty_twoFldT>
+// CIR: [[MEMBER_A:%.*]] = cir.get_member [[CAST3]][0] {name = "a"} : !cir.ptr<!ty_twoFldT> -> !cir.ptr<!s8i>
+
+
+ // TODO: Uncomment next 2 lines, add tests when floating result type supported
+ // sum += __builtin_arm_ldrex(addrfloat);
+
+ // sum += __builtin_arm_ldrex((double *)addr);
+
+
+  return sum;
+}
diff --git a/clang/test/CIR/CodeGen/builtin-assume.cpp b/clang/test/CIR/CodeGen/builtin-assume.cpp
new file mode 100644
index 000000000000..da807994f4b1
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-assume.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir
+// RUN: FileCheck %s --check-prefix=CIR --input-file=%t.cir
+
+int test_assume(int x) {
+  __builtin_assume(x > 0);
+  return x;
+}
+
+//      CIR: cir.func @_Z11test_assumei
+//      CIR:   %[[#x:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#zero:]] = cir.const #cir.int<0> : !s32i
+// CIR-NEXT:   %[[#cond:]] = cir.cmp(gt, %[[#x]], %[[#zero]]) : !s32i, !cir.bool
+// CIR-NEXT:   cir.assume %[[#cond]] : !cir.bool
+//      CIR: }
+
+int test_assume_aligned(int *ptr) {
+  int *aligned = (int *)__builtin_assume_aligned(ptr, 8);
+  return *aligned;
+}
+
+//      CIR: cir.func @_Z19test_assume_alignedPi
+//      CIR:   %[[#ptr:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[#aligned:]] = cir.assume.aligned %[[#ptr]] : !cir.ptr<!s32i>[alignment 8]
+// CIR-NEXT:   cir.store %[[#aligned]], %[[#aligned_slot:]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR-NEXT:   %[[#aligned2:]] = cir.load deref %[[#aligned_slot]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %{{.+}} = cir.load %[[#aligned2]] : !cir.ptr<!s32i>, !s32i
+//      CIR: }
+
+int test_assume_aligned_offset(int *ptr) {
+  int *aligned = (int *)__builtin_assume_aligned(ptr, 8, 4);
+  return *aligned;
+}
+
+//      CIR: cir.func @_Z26test_assume_aligned_offsetPi
+//      CIR:   %[[#ptr:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[#offset:]] = cir.const #cir.int<4> : !s32i
+// CIR-NEXT:   %[[#offset2:]] = cir.cast(integral, %[[#offset]] : !s32i), !u64i
+// CIR-NEXT:   %[[#aligned:]] = cir.assume.aligned %[[#ptr]] : !cir.ptr<!s32i>[alignment 8, offset %[[#offset2]] : !u64i]
+// CIR-NEXT:   cir.store %[[#aligned]], %[[#aligned_slot:]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR-NEXT:   %[[#aligned2:]] = cir.load deref %[[#aligned_slot]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %{{.+}} = cir.load %[[#aligned2]] : !cir.ptr<!s32i>, !s32i
+//      CIR: }
+
+int test_separate_storage(int *p1, int *p2) {
+  __builtin_assume_separate_storage(p1, p2);
+  return *p1 + *p2;
+}
+
+//      CIR: cir.func @_Z21test_separate_storagePiS_
+//      CIR:   %[[#p1:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[#p1_voidptr:]] = cir.cast(bitcast, %[[#p1]] : !cir.ptr<!s32i>), !cir.ptr<!void>
+// CIR-NEXT:   %[[#p2:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[#p2_voidptr:]] = cir.cast(bitcast, %[[#p2]] : !cir.ptr<!s32i>), !cir.ptr<!void>
+// CIR-NEXT:   cir.assume.separate_storage %[[#p1_voidptr]], %[[#p2_voidptr]] : !cir.ptr<!void>
+//      CIR: }
diff --git a/clang/test/CIR/CodeGen/builtin-bit-cast.cpp b/clang/test/CIR/CodeGen/builtin-bit-cast.cpp
new file mode 100644
index 000000000000..696b472a159f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-bit-cast.cpp
@@ -0,0 +1,136 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+float test_scalar(int &oper) {
+  return __builtin_bit_cast(float, oper);
+}
+
+// CIR-LABEL: cir.func @_Z11test_scalarRi
+//       CIR:   %[[#SRC_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+//  CIR-NEXT:   %[[#DST_PTR:]] = cir.cast(bitcast, %[[#SRC_PTR]] : !cir.ptr<!s32i>), !cir.ptr<!cir.float>
+//  CIR-NEXT:   %{{.+}} = cir.load %[[#DST_PTR]] : !cir.ptr<!cir.float>, !cir.float
+//       CIR: }
+
+// LLVM-LABEL: define dso_local float @_Z11test_scalarRi
+//       LLVM:   %[[#PTR:]] = load ptr, ptr %{{.+}}, align 8
+//  LLVM-NEXT:   %{{.+}} = load float, ptr %[[#PTR]], align 4
+//       LLVM: }
+
+struct two_ints {
+  int x;
+  int y;
+};
+
+unsigned long test_aggregate_to_scalar(two_ints &ti) {
+  return __builtin_bit_cast(unsigned long, ti);
+}
+
+// CIR-LABEL: cir.func @_Z24test_aggregate_to_scalarR8two_ints
+//       CIR:   %[[#SRC_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!ty_two_ints>>, !cir.ptr<!ty_two_ints>
+//  CIR-NEXT:   %[[#DST_PTR:]] = cir.cast(bitcast, %[[#SRC_PTR]] : !cir.ptr<!ty_two_ints>), !cir.ptr<!u64i>
+//  CIR-NEXT:   %{{.+}} = cir.load %[[#DST_PTR]] : !cir.ptr<!u64i>, !u64i
+//       CIR: }
+
+// LLVM-LABEL: define dso_local i64 @_Z24test_aggregate_to_scalarR8two_ints
+//       LLVM:   %[[#PTR:]] = load ptr, ptr %{{.+}}, align 8
+//  LLVM-NEXT:   %{{.+}} = load i64, ptr %[[#PTR]], align 8
+//       LLVM: }
+
+struct two_floats {
+  float x;
+  float y;
+};
+
+two_floats test_aggregate_record(two_ints& ti) {
+   return __builtin_bit_cast(two_floats, ti);
+}
+
+// CIR-LABEL: cir.func @_Z21test_aggregate_recordR8two_ints
+//       CIR:   %[[#SRC_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!ty_two_ints>>, !cir.ptr<!ty_two_ints>
+//  CIR-NEXT:   %[[#SRC_VOID_PTR:]] = cir.cast(bitcast, %[[#SRC_PTR]] : !cir.ptr<!ty_two_ints>), !cir.ptr<!void>
+//  CIR-NEXT:   %[[#DST_VOID_PTR:]] = cir.cast(bitcast, %{{.+}} : !cir.ptr<!ty_two_floats>), !cir.ptr<!void>
+//  CIR-NEXT:   %[[#SIZE:]] = cir.const #cir.int<8> : !u64i
+//  CIR-NEXT:   cir.libc.memcpy %[[#SIZE]] bytes from %[[#SRC_VOID_PTR]] to %[[#DST_VOID_PTR]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+//       CIR: }
+
+// LLVM-LABEL: define dso_local %struct.two_floats @_Z21test_aggregate_recordR8two_ints
+//       LLVM:   %[[#DST_SLOT:]] = alloca %struct.two_floats, i64 1, align 4
+//       LLVM:   %[[#SRC_PTR:]] = load ptr, ptr %2, align 8
+//  LLVM-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr %[[#DST_SLOT]], ptr %[[#SRC_PTR]], i64 8, i1 false)
+//  LLVM-NEXT:   %{{.+}} = load %struct.two_floats, ptr %[[#DST_SLOT]], align 4
+//       LLVM: }
+
+two_floats test_aggregate_array(int (&ary)[2]) {
+  return __builtin_bit_cast(two_floats, ary);
+}
+
+// CIR-LABEL: cir.func @_Z20test_aggregate_arrayRA2_i
+//       CIR:   %[[#SRC_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!cir.array<!s32i x 2>>>, !cir.ptr<!cir.array<!s32i x 2>>
+//  CIR-NEXT:   %[[#SRC_VOID_PTR:]] = cir.cast(bitcast, %[[#SRC_PTR]] : !cir.ptr<!cir.array<!s32i x 2>>), !cir.ptr<!void>
+//  CIR-NEXT:   %[[#DST_VOID_PTR:]] = cir.cast(bitcast, %{{.+}} : !cir.ptr<!ty_two_floats>), !cir.ptr<!void>
+//  CIR-NEXT:   %[[#SIZE:]] = cir.const #cir.int<8> : !u64i
+//  CIR-NEXT:   cir.libc.memcpy %[[#SIZE]] bytes from %[[#SRC_VOID_PTR]] to %[[#DST_VOID_PTR]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+//       CIR: }
+
+// LLVM-LABEL: define dso_local %struct.two_floats @_Z20test_aggregate_arrayRA2_i
+//       LLVM:   %[[#DST_SLOT:]] = alloca %struct.two_floats, i64 1, align 4
+//       LLVM:   %[[#SRC_PTR:]] = load ptr, ptr %2, align 8
+//  LLVM-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr %[[#DST_SLOT]], ptr %[[#SRC_PTR]], i64 8, i1 false)
+//  LLVM-NEXT:   %{{.+}} = load %struct.two_floats, ptr %[[#DST_SLOT]], align 4
+//       LLVM: }
+
+two_ints test_scalar_to_aggregate(unsigned long ul) {
+  return __builtin_bit_cast(two_ints, ul);
+}
+
+// CIR-LABEL: cir.func @_Z24test_scalar_to_aggregatem
+//       CIR:   %[[#SRC_VOID_PTR:]] = cir.cast(bitcast, %{{.+}} : !cir.ptr<!u64i>), !cir.ptr<!void>
+//  CIR-NEXT:   %[[#DST_VOID_PTR:]] = cir.cast(bitcast, %{{.+}} : !cir.ptr<!ty_two_ints>), !cir.ptr<!void>
+//  CIR-NEXT:   %[[#SIZE:]] = cir.const #cir.int<8> : !u64i
+//  CIR-NEXT:   cir.libc.memcpy %[[#SIZE]] bytes from %[[#SRC_VOID_PTR]] to %[[#DST_VOID_PTR]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+//       CIR: }
+
+// LLVM-LABEL: define dso_local %struct.two_ints @_Z24test_scalar_to_aggregatem
+//       LLVM:   %[[#DST_SLOT:]] = alloca %struct.two_ints, i64 1, align 4
+//       LLVM:   call void @llvm.memcpy.p0.p0.i64(ptr %[[#DST_SLOT]], ptr %{{.+}}, i64 8, i1 false)
+//  LLVM-NEXT:   %{{.+}} = load %struct.two_ints, ptr %[[#DST_SLOT]], align 4
+//       LLVM: }
+
+unsigned long test_array(int (&ary)[2]) {
+  return __builtin_bit_cast(unsigned long, ary);
+}
+
+// CIR-LABEL: cir.func @_Z10test_arrayRA2_i
+//      CIR:   %[[#SRC_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!cir.array<!s32i x 2>>>, !cir.ptr<!cir.array<!s32i x 2>>
+// CIR-NEXT:   %[[#DST_PTR:]] = cir.cast(bitcast, %[[#SRC_PTR]] : !cir.ptr<!cir.array<!s32i x 2>>), !cir.ptr<!u64i>
+// CIR-NEXT:   %{{.+}} = cir.load %[[#DST_PTR]] : !cir.ptr<!u64i>, !u64i
+//      CIR: }
+
+// LLVM-LABEL: define dso_local i64 @_Z10test_arrayRA2_i
+//       LLVM:   %[[#SRC_PTR:]] = load ptr, ptr %{{.+}}, align 8
+//  LLVM-NEXT:   %{{.+}} = load i64, ptr %[[#SRC_PTR]], align 8
+//       LLVM: }
+
+two_ints test_rvalue_aggregate() {
+  return __builtin_bit_cast(two_ints, 42ul);
+}
+
+// CIR-LABEL: cir.func @_Z21test_rvalue_aggregatev()
+//       CIR:   cir.scope {
+//  CIR-NEXT:     %[[#TMP_SLOT:]] = cir.alloca !u64i, !cir.ptr<!u64i>
+//  CIR-NEXT:     %[[#A:]] = cir.const #cir.int<42> : !u64i
+//  CIR-NEXT:     cir.store %[[#A]], %[[#TMP_SLOT]] : !u64i, !cir.ptr<!u64i>
+//  CIR-NEXT:     %[[#SRC_VOID_PTR:]] = cir.cast(bitcast, %[[#TMP_SLOT]] : !cir.ptr<!u64i>), !cir.ptr<!void>
+//  CIR-NEXT:     %[[#DST_VOID_PTR:]] = cir.cast(bitcast, %0 : !cir.ptr<!ty_two_ints>), !cir.ptr<!void>
+//  CIR-NEXT:     %[[#SIZE:]] = cir.const #cir.int<8> : !u64i
+//  CIR-NEXT:     cir.libc.memcpy %[[#SIZE]] bytes from %[[#SRC_VOID_PTR]] to %[[#DST_VOID_PTR]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+//  CIR-NEXT:   }
+//       CIR: }
+
+// LLVM-LABEL: define dso_local %struct.two_ints @_Z21test_rvalue_aggregatev
+//       LLVM:   %[[#SRC_SLOT:]] = alloca i64, i64 1, align 8
+//  LLVM-NEXT:   store i64 42, ptr %[[#SRC_SLOT]], align 8
+//  LLVM-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr %{{.+}}, ptr %[[#SRC_SLOT]], i64 8, i1 false)
+//       LLVM: }
diff --git a/clang/test/CIR/CodeGen/builtin-bits.cpp b/clang/test/CIR/CodeGen/builtin-bits.cpp
new file mode 100644
index 000000000000..6b82f75187b8
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-bits.cpp
@@ -0,0 +1,186 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int test_builtin_clrsb(int x) {
+  return __builtin_clrsb(x);
+}
+
+// CHECK: cir.func @_Z18test_builtin_clrsbi
+// CHECK:   %{{.+}} = cir.bit.clrsb(%{{.+}} : !s32i) : !s32i
+// CHECK: }
+
+int test_builtin_clrsbl(long x) {
+  return __builtin_clrsbl(x);
+}
+
+// CHECK: cir.func @_Z19test_builtin_clrsbll
+// CHECK:   %{{.+}} = cir.bit.clrsb(%{{.+}} : !s64i) : !s32i
+// CHECK: }
+
+int test_builtin_clrsbll(long long x) {
+  return __builtin_clrsbll(x);
+}
+
+// CHECK: cir.func @_Z20test_builtin_clrsbllx
+// CHECK:   %{{.+}} = cir.bit.clrsb(%{{.+}} : !s64i) : !s32i
+// CHECK: }
+
+int test_builtin_ctzs(unsigned short x) {
+  return __builtin_ctzs(x);
+}
+
+// CHECK: cir.func @_Z17test_builtin_ctzst
+// CHECK:   %{{.+}} = cir.bit.ctz(%{{.+}} : !u16i) : !s32i
+// CHEKC: }
+
+int test_builtin_ctz(unsigned x) {
+  return __builtin_ctz(x);
+}
+
+// CHECK: cir.func @_Z16test_builtin_ctzj
+// CHECK:   %{{.+}} = cir.bit.ctz(%{{.+}} : !u32i) : !s32i
+// CHECK: }
+
+int test_builtin_ctzl(unsigned long x) {
+  return __builtin_ctzl(x);
+}
+
+// CHECK: cir.func @_Z17test_builtin_ctzlm
+// CHECK:   %{{.+}} = cir.bit.ctz(%{{.+}} : !u64i) : !s32i
+// CHECK: }
+
+int test_builtin_ctzll(unsigned long long x) {
+  return __builtin_ctzll(x);
+}
+
+// CHECK: cir.func @_Z18test_builtin_ctzlly
+// CHECK:   %{{.+}} = cir.bit.ctz(%{{.+}} : !u64i) : !s32i
+// CHECK: }
+
+int test_builtin_ctzg(unsigned x) {
+  return __builtin_ctzg(x);
+}
+
+// CHECK: cir.func @_Z17test_builtin_ctzgj
+// CHECK:   %{{.+}} = cir.bit.ctz(%{{.+}} : !u32i) : !s32i
+// CHECK: }
+
+int test_builtin_clzs(unsigned short x) {
+  return __builtin_clzs(x);
+}
+
+// CHECK: cir.func @_Z17test_builtin_clzst
+// CHECK:   %{{.+}} = cir.bit.clz(%{{.+}} : !u16i) : !s32i
+// CHECK: }
+
+int test_builtin_clz(unsigned x) {
+  return __builtin_clz(x);
+}
+
+// CHECK: cir.func @_Z16test_builtin_clzj
+// CHECK:   %{{.+}} = cir.bit.clz(%{{.+}} : !u32i) : !s32i
+// CHECK: }
+
+int test_builtin_clzl(unsigned long x) {
+  return __builtin_clzl(x);
+}
+
+// CHECK: cir.func @_Z17test_builtin_clzlm
+// CHECK:   %{{.+}} = cir.bit.clz(%{{.+}} : !u64i) : !s32i
+// CHECK: }
+
+int test_builtin_clzll(unsigned long long x) {
+  return __builtin_clzll(x);
+}
+
+// CHECK: cir.func @_Z18test_builtin_clzlly
+// CHECK:   %{{.+}} = cir.bit.clz(%{{.+}} : !u64i) : !s32i
+// CHECK: }
+
+int test_builtin_clzg(unsigned x) {
+  return __builtin_clzg(x);
+}
+
+// CHECK: cir.func @_Z17test_builtin_clzgj
+// CHECK:   %{{.+}} = cir.bit.clz(%{{.+}} : !u32i) : !s32i
+// CHECK: }
+
+int test_builtin_ffs(int x) {
+  return __builtin_ffs(x);
+}
+
+// CHECK: cir.func @_Z16test_builtin_ffsi
+// CHECK:   %{{.+}} = cir.bit.ffs(%{{.+}} : !s32i) : !s32i
+// CHECK: }
+
+int test_builtin_ffsl(long x) {
+  return __builtin_ffsl(x);
+}
+
+// CHECK: cir.func @_Z17test_builtin_ffsll
+// CHECK:   %{{.+}} = cir.bit.ffs(%{{.+}} : !s64i) : !s32i
+// CHECK: }
+
+int test_builtin_ffsll(long long x) {
+  return __builtin_ffsll(x);
+}
+
+// CHECK: cir.func @_Z18test_builtin_ffsllx
+// CHECK:   %{{.+}} = cir.bit.ffs(%{{.+}} : !s64i) : !s32i
+// CHECK: }
+
+int test_builtin_parity(unsigned x) {
+  return __builtin_parity(x);
+}
+
+// CHECK: cir.func @_Z19test_builtin_parityj
+// CHECK:   %{{.+}} = cir.bit.parity(%{{.+}} : !u32i) : !s32i
+// CHECK: }
+
+int test_builtin_parityl(unsigned long x) {
+  return __builtin_parityl(x);
+}
+
+// CHECK: cir.func @_Z20test_builtin_paritylm
+// CHECK:   %{{.+}} = cir.bit.parity(%{{.+}} : !u64i) : !s32i
+// CHECK: }
+
+int test_builtin_parityll(unsigned long long x) {
+  return __builtin_parityll(x);
+}
+
+// CHECK: cir.func @_Z21test_builtin_paritylly
+// CHECK:   %{{.+}} = cir.bit.parity(%{{.+}} : !u64i) : !s32i
+// CHECK: }
+
+int test_builtin_popcount(unsigned x) {
+  return __builtin_popcount(x);
+}
+
+// CHECK: cir.func @_Z21test_builtin_popcountj
+// CHECK:   %{{.+}} = cir.bit.popcount(%{{.+}} : !u32i) : !s32i
+// CHECK: }
+
+int test_builtin_popcountl(unsigned long x) {
+  return __builtin_popcountl(x);
+}
+
+// CHECK: cir.func @_Z22test_builtin_popcountlm
+// CHECK:   %{{.+}} = cir.bit.popcount(%{{.+}} : !u64i) : !s32i
+// CHECK: }
+
+int test_builtin_popcountll(unsigned long long x) {
+  return __builtin_popcountll(x);
+}
+
+// CHECK: cir.func @_Z23test_builtin_popcountlly
+// CHECK:   %{{.+}} = cir.bit.popcount(%{{.+}} : !u64i) : !s32i
+// CHECK: }
+
+int test_builtin_popcountg(unsigned x) {
+  return __builtin_popcountg(x);
+}
+
+// CHECK: cir.func @_Z22test_builtin_popcountgj
+// CHECK:   %{{.+}} = cir.bit.popcount(%{{.+}} : !u32i) : !s32i
+// CHECK: }
diff --git a/clang/test/CIR/CodeGen/builtin-constant-evaluated.cpp b/clang/test/CIR/CodeGen/builtin-constant-evaluated.cpp
new file mode 100644
index 000000000000..d09a60085f81
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-constant-evaluated.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+auto func() {
+  return __builtin_strcmp("", "");
+  // CHECK:      cir.func @_Z4funcv()
+  // CHECK-NEXT: %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64} loc(#loc2)
+  // CHECK-NEXT: %1 = cir.const #cir.int<0> : !s32i loc(#loc7)
+  // CHECK-NEXT: cir.store %1, %0 : !s32i, !cir.ptr<!s32i> loc(#loc8)
+  // CHECK-NEXT: %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i loc(#loc8)
+  // CHECK-NEXT: cir.return %2 : !s32i loc(#loc8)
+}
diff --git a/clang/test/CIR/CodeGen/builtin-constant-p.c b/clang/test/CIR/CodeGen/builtin-constant-p.c
new file mode 100644
index 000000000000..a8eb13adacfd
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-constant-p.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+int a = 0;
+int foo() {
+  return __builtin_constant_p(a);
+}
+
+// CIR:  cir.func no_proto @foo() -> !s32i extra(#fn_attr)
+// CIR:    [[TMP0:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CIR:    [[TMP1:%.*]] = cir.get_global @a : !cir.ptr<!s32i>
+// CIR:    [[TMP2:%.*]] = cir.load [[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CIR:    [[TMP3:%.*]] = cir.is_constant([[TMP2]] : !s32i) : !cir.bool
+// CIR:    [[TMP4:%.*]] = cir.cast(bool_to_int, [[TMP3]] : !cir.bool), !s32i
+// CIR:    cir.store [[TMP4]], [[TMP0]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[TMP5:%.*]] = cir.load [[TMP0]] : !cir.ptr<!s32i>, !s32i
+// CIR:    cir.return [[TMP5]] : !s32i
+
+// LLVM:define dso_local i32 @foo()
+// LLVM:  [[TMP1:%.*]] = alloca i32, i64 1
+// LLVM:  [[TMP2:%.*]] = load i32, ptr @a
+// LLVM:  [[TMP3:%.*]] = call i1 @llvm.is.constant.i32(i32 [[TMP2]])
+// LLVM:  [[TMP4:%.*]] = zext i1 [[TMP3]] to i8
+// LLVM:  [[TMP5:%.*]] = zext i8 [[TMP4]] to i32
+// LLVM:  store i32 [[TMP5]], ptr [[TMP1]]
+// LLVM:  [[TMP6:%.*]] = load i32, ptr [[TMP1]]
+// LLVM:  ret i32 [[TMP6]]
+
diff --git a/clang/test/CIR/CodeGen/builtin-floating-point.c b/clang/test/CIR/CodeGen/builtin-floating-point.c
new file mode 100644
index 000000000000..e882d8606458
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-floating-point.c
@@ -0,0 +1,1568 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t1.cir 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-apple-darwin-macho -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t1.cir 2>&1 | FileCheck %s --check-prefix=AARCH64
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+// lround
+
+long my_lroundf(float f) {
+  return __builtin_lroundf(f);
+  // CHECK: cir.func @my_lroundf
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @my_lroundf
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long my_lround(double f) {
+  return __builtin_lround(f);
+  // CHECK: cir.func @my_lround
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @my_lround
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long my_lroundl(long double f) {
+  return __builtin_lroundl(f);
+  // CHECK: cir.func @my_lroundl
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.lround %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @my_lroundl
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+long lroundf(float);
+long lround(double);
+long lroundl(long double);
+
+long call_lroundf(float f) {
+  return lroundf(f);
+  // CHECK: cir.func @call_lroundf
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @call_lroundf
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long call_lround(double f) {
+  return lround(f);
+  // CHECK: cir.func @call_lround
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @call_lround
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long call_lroundl(long double f) {
+  return lroundl(f);
+  // CHECK: cir.func @call_lroundl
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.lround %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @call_lroundl
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// llround
+
+long long my_llroundf(float f) {
+  return __builtin_llroundf(f);
+  // CHECK: cir.func @my_llroundf
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @my_llroundf
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long long my_llround(double f) {
+  return __builtin_llround(f);
+  // CHECK: cir.func @my_llround
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @my_llround
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long long my_llroundl(long double f) {
+  return __builtin_llroundl(f);
+  // CHECK: cir.func @my_llroundl
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.llround %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @my_llroundl
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+long long llroundf(float);
+long long llround(double);
+long long llroundl(long double);
+
+long long call_llroundf(float f) {
+  return llroundf(f);
+  // CHECK: cir.func @call_llroundf
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @call_llroundf
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long long call_llround(double f) {
+  return llround(f);
+  // CHECK: cir.func @call_llround
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @call_llround
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long long call_llroundl(long double f) {
+  return llroundl(f);
+  // CHECK: cir.func @call_llroundl
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.llround %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @call_llroundl
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// lrint
+
+long my_lrintf(float f) {
+  return __builtin_lrintf(f);
+  // CHECK: cir.func @my_lrintf
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @my_lrintf
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long my_lrint(double f) {
+  return __builtin_lrint(f);
+  // CHECK: cir.func @my_lrint
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @my_lrint
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long my_lrintl(long double f) {
+  return __builtin_lrintl(f);
+  // CHECK: cir.func @my_lrintl
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.lrint %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @my_lrintl
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+long lrintf(float);
+long lrint(double);
+long lrintl(long double);
+
+long call_lrintf(float f) {
+  return lrintf(f);
+  // CHECK: cir.func @call_lrintf
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @call_lrintf
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long call_lrint(double f) {
+  return lrint(f);
+  // CHECK: cir.func @call_lrint
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @call_lrint
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long call_lrintl(long double f) {
+  return lrintl(f);
+  // CHECK: cir.func @call_lrintl
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.lrint %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @call_lrintl
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// llrint
+
+long long my_llrintf(float f) {
+  return __builtin_llrintf(f);
+  // CHECK: cir.func @my_llrintf
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @my_llrintf
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long long my_llrint(double f) {
+  return __builtin_llrint(f);
+  // CHECK: cir.func @my_llrint
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @my_llrint
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long long my_llrintl(long double f) {
+  return __builtin_llrintl(f);
+  // CHECK: cir.func @my_llrintl
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.llrint %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @my_llrintl
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+long long llrintf(float);
+long long llrint(double);
+long long llrintl(long double);
+
+long long call_llrintf(float f) {
+  return llrintf(f);
+  // CHECK: cir.func @call_llrintf
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @call_llrintf
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long long call_llrint(double f) {
+  return llrint(f);
+  // CHECK: cir.func @call_llrint
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @call_llrint
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long long call_llrintl(long double f) {
+  return llrintl(f);
+  // CHECK: cir.func @call_llrintl
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.llrint %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @call_llrintl
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// ceil
+
+float my_ceilf(float f) {
+  return __builtin_ceilf(f);
+  // CHECK: cir.func @my_ceilf
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_ceilf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.ceil.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_ceil(double f) {
+  return __builtin_ceil(f);
+  // CHECK: cir.func @my_ceil
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_ceil(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.ceil.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_ceill(long double f) {
+  return __builtin_ceill(f);
+  // CHECK: cir.func @my_ceill
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.ceil {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_ceill(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.ceil.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float ceilf(float);
+double ceil(double);
+long double ceill(long double);
+
+float call_ceilf(float f) {
+  return ceilf(f);
+  // CHECK: cir.func @call_ceilf
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_ceilf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.ceil.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_ceil(double f) {
+  return ceil(f);
+  // CHECK: cir.func @call_ceil
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_ceil(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.ceil.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_ceill(long double f) {
+  return ceill(f);
+  // CHECK: cir.func @call_ceill
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.ceil {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_ceill(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.ceil.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// cos
+
+float my_cosf(float f) {
+  return __builtin_cosf(f);
+  // CHECK: cir.func @my_cosf
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_cosf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.cos.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_cos(double f) {
+  return __builtin_cos(f);
+  // CHECK: cir.func @my_cos
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_cos(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.cos.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_cosl(long double f) {
+  return __builtin_cosl(f);
+  // CHECK: cir.func @my_cosl
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.cos {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_cosl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.cos.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float cosf(float);
+double cos(double);
+long double cosl(long double);
+
+float call_cosf(float f) {
+  return cosf(f);
+  // CHECK: cir.func @call_cosf
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_cosf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.cos.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_cos(double f) {
+  return cos(f);
+  // CHECK: cir.func @call_cos
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_cos(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.cos.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_cosl(long double f) {
+  return cosl(f);
+  // CHECK: cir.func @call_cosl
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.cos {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_cosl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.cos.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// exp
+
+float my_expf(float f) {
+  return __builtin_expf(f);
+  // CHECK: cir.func @my_expf
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_expf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.exp.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_exp(double f) {
+  return __builtin_exp(f);
+  // CHECK: cir.func @my_exp
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_exp(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.exp.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_expl(long double f) {
+  return __builtin_expl(f);
+  // CHECK: cir.func @my_expl
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.exp {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_expl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.exp.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float expf(float);
+double exp(double);
+long double expl(long double);
+
+float call_expf(float f) {
+  return expf(f);
+  // CHECK: cir.func @call_expf
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_expf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.exp.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_exp(double f) {
+  return exp(f);
+  // CHECK: cir.func @call_exp
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_exp(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.exp.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_expl(long double f) {
+  return expl(f);
+  // CHECK: cir.func @call_expl
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.exp {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_expl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.exp.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// exp2
+
+float my_exp2f(float f) {
+  return __builtin_exp2f(f);
+  // CHECK: cir.func @my_exp2f
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_exp2f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.exp2.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_exp2(double f) {
+  return __builtin_exp2(f);
+  // CHECK: cir.func @my_exp2
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_exp2(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.exp2.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_exp2l(long double f) {
+  return __builtin_exp2l(f);
+  // CHECK: cir.func @my_exp2l
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.exp2 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_exp2l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.exp2.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float exp2f(float);
+double exp2(double);
+long double exp2l(long double);
+
+float call_exp2f(float f) {
+  return exp2f(f);
+  // CHECK: cir.func @call_exp2f
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_exp2f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.exp2.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_exp2(double f) {
+  return exp2(f);
+  // CHECK: cir.func @call_exp2
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_exp2(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.exp2.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_exp2l(long double f) {
+  return exp2l(f);
+  // CHECK: cir.func @call_exp2l
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.exp2 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_exp2l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.exp2.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// floor
+
+float my_floorf(float f) {
+  return __builtin_floorf(f);
+  // CHECK: cir.func @my_floorf
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_floorf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.floor.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_floor(double f) {
+  return __builtin_floor(f);
+  // CHECK: cir.func @my_floor
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_floor(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.floor.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_floorl(long double f) {
+  return __builtin_floorl(f);
+  // CHECK: cir.func @my_floorl
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.floor {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_floorl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.floor.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float floorf(float);
+double floor(double);
+long double floorl(long double);
+
+float call_floorf(float f) {
+  return floorf(f);
+  // CHECK: cir.func @call_floorf
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_floorf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.floor.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_floor(double f) {
+  return floor(f);
+  // CHECK: cir.func @call_floor
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_floor(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.floor.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_floorl(long double f) {
+  return floorl(f);
+  // CHECK: cir.func @call_floorl
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.floor {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_floorl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.floor.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// log
+
+float my_logf(float f) {
+  return __builtin_logf(f);
+  // CHECK: cir.func @my_logf
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_logf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_log(double f) {
+  return __builtin_log(f);
+  // CHECK: cir.func @my_log
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_log(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_logl(long double f) {
+  return __builtin_logl(f);
+  // CHECK: cir.func @my_logl
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_logl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float logf(float);
+double log(double);
+long double logl(long double);
+
+float call_logf(float f) {
+  return logf(f);
+  // CHECK: cir.func @call_logf
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_logf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_log(double f) {
+  return log(f);
+  // CHECK: cir.func @call_log
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_log(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_logl(long double f) {
+  return logl(f);
+  // CHECK: cir.func @call_logl
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_logl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// log10
+
+float my_log10f(float f) {
+  return __builtin_log10f(f);
+  // CHECK: cir.func @my_log10f
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_log10f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log10.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_log10(double f) {
+  return __builtin_log10(f);
+  // CHECK: cir.func @my_log10
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_log10(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log10.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_log10l(long double f) {
+  return __builtin_log10l(f);
+  // CHECK: cir.func @my_log10l
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log10 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_log10l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log10.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float log10f(float);
+double log10(double);
+long double log10l(long double);
+
+float call_log10f(float f) {
+  return log10f(f);
+  // CHECK: cir.func @call_log10f
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_log10f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log10.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_log10(double f) {
+  return log10(f);
+  // CHECK: cir.func @call_log10
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_log10(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log10.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_log10l(long double f) {
+  return log10l(f);
+  // CHECK: cir.func @call_log10l
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log10 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_log10l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log10.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// log2
+
+float my_log2f(float f) {
+  return __builtin_log2f(f);
+  // CHECK: cir.func @my_log2f
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_log2f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log2.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_log2(double f) {
+  return __builtin_log2(f);
+  // CHECK: cir.func @my_log2
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_log2(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log2.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_log2l(long double f) {
+  return __builtin_log2l(f);
+  // CHECK: cir.func @my_log2l
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log2 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_log2l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log2.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float log2f(float);
+double log2(double);
+long double log2l(long double);
+
+float call_log2f(float f) {
+  return log2f(f);
+  // CHECK: cir.func @call_log2f
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_log2f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log2.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_log2(double f) {
+  return log2(f);
+  // CHECK: cir.func @call_log2
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_log2(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log2.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_log2l(long double f) {
+  return log2l(f);
+  // CHECK: cir.func @call_log2l
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log2 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_log2l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log2.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// nearbyint
+
+float my_nearbyintf(float f) {
+  return __builtin_nearbyintf(f);
+  // CHECK: cir.func @my_nearbyintf
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_nearbyintf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.nearbyint.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_nearbyint(double f) {
+  return __builtin_nearbyint(f);
+  // CHECK: cir.func @my_nearbyint
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_nearbyint(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.nearbyint.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_nearbyintl(long double f) {
+  return __builtin_nearbyintl(f);
+  // CHECK: cir.func @my_nearbyintl
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.nearbyint {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_nearbyintl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.nearbyint.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float nearbyintf(float);
+double nearbyint(double);
+long double nearbyintl(long double);
+
+float call_nearbyintf(float f) {
+  return nearbyintf(f);
+  // CHECK: cir.func @call_nearbyintf
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_nearbyintf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.nearbyint.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_nearbyint(double f) {
+  return nearbyint(f);
+  // CHECK: cir.func @call_nearbyint
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_nearbyint(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.nearbyint.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_nearbyintl(long double f) {
+  return nearbyintl(f);
+  // CHECK: cir.func @call_nearbyintl
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.nearbyint {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_nearbyintl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.nearbyint.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// rint
+
+float my_rintf(float f) {
+  return __builtin_rintf(f);
+  // CHECK: cir.func @my_rintf
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_rintf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.rint.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_rint(double f) {
+  return __builtin_rint(f);
+  // CHECK: cir.func @my_rint
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_rint(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.rint.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_rintl(long double f) {
+  return __builtin_rintl(f);
+  // CHECK: cir.func @my_rintl
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.rint {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_rintl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.rint.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float rintf(float);
+double rint(double);
+long double rintl(long double);
+
+float call_rintf(float f) {
+  return rintf(f);
+  // CHECK: cir.func @call_rintf
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_rintf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.rint.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_rint(double f) {
+  return rint(f);
+  // CHECK: cir.func @call_rint
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_rint(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.rint.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_rintl(long double f) {
+  return rintl(f);
+  // CHECK: cir.func @call_rintl
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.rint {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_rintl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.rint.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// round
+
+float my_roundf(float f) {
+  return __builtin_roundf(f);
+  // CHECK: cir.func @my_roundf
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_roundf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.round.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_round(double f) {
+  return __builtin_round(f);
+  // CHECK: cir.func @my_round
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_round(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.round.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_roundl(long double f) {
+  return __builtin_roundl(f);
+  // CHECK: cir.func @my_roundl
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.round {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_roundl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.round.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float roundf(float);
+double round(double);
+long double roundl(long double);
+
+float call_roundf(float f) {
+  return roundf(f);
+  // CHECK: cir.func @call_roundf
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_roundf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.round.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_round(double f) {
+  return round(f);
+  // CHECK: cir.func @call_round
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_round(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.round.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_roundl(long double f) {
+  return roundl(f);
+  // CHECK: cir.func @call_roundl
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.round {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_roundl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.round.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// sin
+
+float my_sinf(float f) {
+  return __builtin_sinf(f);
+  // CHECK: cir.func @my_sinf
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_sinf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.sin.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_sin(double f) {
+  return __builtin_sin(f);
+  // CHECK: cir.func @my_sin
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_sin(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.sin.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_sinl(long double f) {
+  return __builtin_sinl(f);
+  // CHECK: cir.func @my_sinl
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.sin {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_sinl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.sin.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float sinf(float);
+double sin(double);
+long double sinl(long double);
+
+float call_sinf(float f) {
+  return sinf(f);
+  // CHECK: cir.func @call_sinf
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_sinf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.sin.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_sin(double f) {
+  return sin(f);
+  // CHECK: cir.func @call_sin
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_sin(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.sin.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_sinl(long double f) {
+  return sinl(f);
+  // CHECK: cir.func @call_sinl
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.sin {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_sinl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.sin.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// sqrt
+
+float my_sqrtf(float f) {
+  return __builtin_sqrtf(f);
+  // CHECK: cir.func @my_sqrtf
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_sqrtf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.sqrt.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_sqrt(double f) {
+  return __builtin_sqrt(f);
+  // CHECK: cir.func @my_sqrt
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_sqrt(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.sqrt.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_sqrtl(long double f) {
+  return __builtin_sqrtl(f);
+  // CHECK: cir.func @my_sqrtl
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.sqrt {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_sqrtl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.sqrt.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float sqrtf(float);
+double sqrt(double);
+long double sqrtl(long double);
+
+float call_sqrtf(float f) {
+  return sqrtf(f);
+  // CHECK: cir.func @call_sqrtf
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_sqrtf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.sqrt.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_sqrt(double f) {
+  return sqrt(f);
+  // CHECK: cir.func @call_sqrt
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_sqrt(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.sqrt.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_sqrtl(long double f) {
+  return sqrtl(f);
+  // CHECK: cir.func @call_sqrtl
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.sqrt {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_sqrtl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.sqrt.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// trunc
+
+float my_truncf(float f) {
+  return __builtin_truncf(f);
+  // CHECK: cir.func @my_truncf
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_truncf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.trunc.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_trunc(double f) {
+  return __builtin_trunc(f);
+  // CHECK: cir.func @my_trunc
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_trunc(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.trunc.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_truncl(long double f) {
+  return __builtin_truncl(f);
+  // CHECK: cir.func @my_truncl
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.trunc {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_truncl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.trunc.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float truncf(float);
+double trunc(double);
+long double truncl(long double);
+
+float call_truncf(float f) {
+  return truncf(f);
+  // CHECK: cir.func @call_truncf
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_truncf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.trunc.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_trunc(double f) {
+  return trunc(f);
+  // CHECK: cir.func @call_trunc
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_trunc(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.trunc.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_truncl(long double f) {
+  return truncl(f);
+  // CHECK: cir.func @call_truncl
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.trunc {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_truncl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.trunc.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// copysign
+
+float my_copysignf(float x, float y) {
+  return __builtin_copysignf(x, y);
+  // CHECK: cir.func @my_copysignf
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_copysignf
+  // LLVM:   %{{.+}} = call float @llvm.copysign.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_copysign(double x, double y) {
+  return __builtin_copysign(x, y);
+  // CHECK: cir.func @my_copysign
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_copysign
+  // LLVM:   %{{.+}} = call double @llvm.copysign.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double my_copysignl(long double x, long double y) {
+  return __builtin_copysignl(x, y);
+  // CHECK: cir.func @my_copysignl
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_copysignl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.copysign.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float copysignf(float, float);
+double copysign(double, double);
+long double copysignl(long double, long double);
+
+float call_copysignf(float x, float y) {
+  return copysignf(x, y);
+  // CHECK: cir.func @call_copysignf
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_copysignf
+  // LLVM:   %{{.+}} = call float @llvm.copysign.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double call_copysign(double x, double y) {
+  return copysign(x, y);
+  // CHECK: cir.func @call_copysign
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_copysign
+  // LLVM:   %{{.+}} = call double @llvm.copysign.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double call_copysignl(long double x, long double y) {
+  return copysignl(x, y);
+  // CHECK: cir.func @call_copysignl
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_copysignl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.copysign.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// fmax
+
+float my_fmaxf(float x, float y) {
+  return __builtin_fmaxf(x, y);
+  // CHECK: cir.func @my_fmaxf
+  // CHECK:   %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_fmaxf
+  // LLVM:   %{{.+}} = call float @llvm.maxnum.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_fmax(double x, double y) {
+  return __builtin_fmax(x, y);
+  // CHECK: cir.func @my_fmax
+  // CHECK:   %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_fmax
+  // LLVM:   %{{.+}} = call double @llvm.maxnum.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double my_fmaxl(long double x, long double y) {
+  return __builtin_fmaxl(x, y);
+  // CHECK: cir.func @my_fmaxl
+  // CHECK:   %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_fmaxl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float fmaxf(float, float);
+double fmax(double, double);
+long double fmaxl(long double, long double);
+
+float call_fmaxf(float x, float y) {
+  return fmaxf(x, y);
+  // CHECK: cir.func @call_fmaxf
+  // CHECK:   %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_fmaxf
+  // LLVM:   %{{.+}} = call float @llvm.maxnum.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double call_fmax(double x, double y) {
+  return fmax(x, y);
+  // CHECK: cir.func @call_fmax
+  // CHECK:   %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_fmax
+  // LLVM:   %{{.+}} = call double @llvm.maxnum.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double call_fmaxl(long double x, long double y) {
+  return fmaxl(x, y);
+  // CHECK: cir.func @call_fmaxl
+  // CHECK:   %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmax %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_fmaxl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// fmin
+
+float my_fminf(float x, float y) {
+  return __builtin_fminf(x, y);
+  // CHECK: cir.func @my_fminf
+  // CHECK:   %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_fminf
+  // LLVM:   %{{.+}} = call float @llvm.minnum.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_fmin(double x, double y) {
+  return __builtin_fmin(x, y);
+  // CHECK: cir.func @my_fmin
+  // CHECK:   %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_fmin
+  // LLVM:   %{{.+}} = call double @llvm.minnum.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double my_fminl(long double x, long double y) {
+  return __builtin_fminl(x, y);
+  // CHECK: cir.func @my_fminl
+  // CHECK:   %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_fminl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.minnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float fminf(float, float);
+double fmin(double, double);
+long double fminl(long double, long double);
+
+float call_fminf(float x, float y) {
+  return fminf(x, y);
+  // CHECK: cir.func @call_fminf
+  // CHECK:   %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_fminf
+  // LLVM:   %{{.+}} = call float @llvm.minnum.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double call_fmin(double x, double y) {
+  return fmin(x, y);
+  // CHECK: cir.func @call_fmin
+  // CHECK:   %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_fmin
+  // LLVM:   %{{.+}} = call double @llvm.minnum.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double call_fminl(long double x, long double y) {
+  return fminl(x, y);
+  // CHECK: cir.func @call_fminl
+  // CHECK:   %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmin %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_fminl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.minnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// fmod
+
+float my_fmodf(float x, float y) {
+  return __builtin_fmodf(x, y);
+  // CHECK: cir.func @my_fmodf
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_fmodf
+  // LLVM:   %{{.+}} = frem float %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+double my_fmod(double x, double y) {
+  return __builtin_fmod(x, y);
+  // CHECK: cir.func @my_fmod
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_fmod
+  // LLVM:   %{{.+}} = frem double %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+long double my_fmodl(long double x, long double y) {
+  return __builtin_fmodl(x, y);
+  // CHECK: cir.func @my_fmodl
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_fmodl
+  // LLVM:   %{{.+}} = frem x86_fp80 %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+float fmodf(float, float);
+double fmod(double, double);
+long double fmodl(long double, long double);
+
+float call_fmodf(float x, float y) {
+  return fmodf(x, y);
+  // CHECK: cir.func @call_fmodf
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_fmodf
+  // LLVM:   %{{.+}} = frem float %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+double call_fmod(double x, double y) {
+  return fmod(x, y);
+  // CHECK: cir.func @call_fmod
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_fmod
+  // LLVM:   %{{.+}} = frem double %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+long double call_fmodl(long double x, long double y) {
+  return fmodl(x, y);
+  // CHECK: cir.func @call_fmodl
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_fmodl
+  // LLVM:   %{{.+}} = frem x86_fp80 %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+// pow
+
+float my_powf(float x, float y) {
+  return __builtin_powf(x, y);
+  // CHECK: cir.func @my_powf
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_powf
+  // LLVM:   %{{.+}} = call float @llvm.pow.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_pow(double x, double y) {
+  return __builtin_pow(x, y);
+  // CHECK: cir.func @my_pow
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_pow
+  // LLVM:   %{{.+}} = call double @llvm.pow.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double my_powl(long double x, long double y) {
+  return __builtin_powl(x, y);
+  // CHECK: cir.func @my_powl
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_powl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.pow.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float powf(float, float);
+double pow(double, double);
+long double powl(long double, long double);
+
+float call_powf(float x, float y) {
+  return powf(x, y);
+  // CHECK: cir.func @call_powf
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_powf
+  // LLVM:   %{{.+}} = call float @llvm.pow.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double call_pow(double x, double y) {
+  return pow(x, y);
+  // CHECK: cir.func @call_pow
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_pow
+  // LLVM:   %{{.+}} = call double @llvm.pow.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double call_powl(long double x, long double y) {
+  return powl(x, y);
+  // CHECK: cir.func @call_powl
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_powl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.pow.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
diff --git a/clang/test/CIR/CodeGen/builtin-ms-alloca.c b/clang/test/CIR/CodeGen/builtin-ms-alloca.c
new file mode 100644
index 000000000000..baec3072d58d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-ms-alloca.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fms-extensions -emit-cir %s -o - | FileCheck %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fms-extensions -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+typedef __SIZE_TYPE__ size_t;
+
+void my_win_alloca(size_t n)
+{
+  int *c1 = (int *)_alloca(n);
+}
+
+// CIR:       cir.func @my_win_alloca([[ALLOCA_SIZE:%.*]]: !u64i
+// CIR:       cir.store [[ALLOCA_SIZE]], [[LOCAL_VAR_ALLOCA_SIZE:%.*]] : !u64i, !cir.ptr<!u64i>
+// CIR:       [[TMP_ALLOCA_SIZE:%.*]] = cir.load [[LOCAL_VAR_ALLOCA_SIZE]] : !cir.ptr<!u64i>, !u64i
+// CIR:       [[ALLOCA_RES:%.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, [[TMP_ALLOCA_SIZE]] : !u64i, ["bi_alloca"] {alignment = 16 : i64}
+// CIR-NEXT:  cir.cast(bitcast, [[ALLOCA_RES]] : !cir.ptr<!u8i>), !cir.ptr<!void>
+// CIR: }
+
+
+// LLVM:       define dso_local void @my_win_alloca(i64 [[ALLOCA_SIZE:%.*]])
+// LLVM:       store i64 [[ALLOCA_SIZE]], ptr [[LOCAL_VAR_ALLOCA_SIZE:%.*]],
+// LLVM:       [[TMP_ALLOCA_SIZE:%.*]] =  load i64, ptr [[LOCAL_VAR_ALLOCA_SIZE]],
+// LLVM:       [[ALLOCA_RES:%.*]] = alloca i8, i64 [[TMP_ALLOCA_SIZE]], align 16
+// LLVM: }
diff --git a/clang/test/CIR/CodeGen/builtin-prefetch.c b/clang/test/CIR/CodeGen/builtin-prefetch.c
new file mode 100644
index 000000000000..56ac9a70ddb4
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-prefetch.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+void foo(void *a) {
+  __builtin_prefetch(a, 1, 1);
+}
+
+// CIR:  cir.func @foo(%arg0: !cir.ptr<!void> loc({{.*}}))
+// CIR:    [[PTR_ALLOC:%.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["a", init] {alignment = 8 : i64}
+// CIR:    cir.store %arg0, [[PTR_ALLOC]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CIR:    [[PTR:%.*]] = cir.load [[PTR_ALLOC]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR:    cir.prefetch([[PTR]] : !cir.ptr<!void>) locality(1) write
+// CIR:    cir.return
+
+// LLVM:  define dso_local void @foo(ptr [[ARG0:%.*]])
+// LLVM:    [[PTR_ALLOC:%.*]] = alloca ptr, i64 1
+// LLVM:    store ptr [[ARG0]], ptr [[PTR_ALLOC]]
+// LLVM:    [[PTR:%.*]] = load ptr, ptr [[PTR_ALLOC]]
+// LLVM:    call void @llvm.prefetch.p0(ptr [[PTR]], i32 1, i32 1, i32 1)
+// LLVM:    ret void
diff --git a/clang/test/CIR/CodeGen/builtin-rotate.c b/clang/test/CIR/CodeGen/builtin-rotate.c
new file mode 100644
index 000000000000..bc0c93690658
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtin-rotate.c
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+void f() {
+// CIR-LABEL: @f
+// LLVM-LABEL: @f
+  unsigned int v[4];
+  unsigned int h = __builtin_rotateleft32(v[0], 1);
+// CIR: %[[CONST:.*]] = cir.const #cir.int<1> : !s32i
+// CIR: %[[CAST:.*]] = cir.cast(integral, %[[CONST]] : !s32i), !u32i
+// CIR: cir.rotate left {{.*}}, %[[CAST]] -> !u32i
+
+// LLVM: %[[SRC:.*]] = load i32, ptr
+// LLVM: call i32 @llvm.fshl.i32(i32 %[[SRC]], i32 %[[SRC]], i32 1)
+}
+
+unsigned char rotl8(unsigned char x, unsigned char y) {
+// CIR-LABEL: rotl8
+// CIR: cir.rotate left {{.*}}, {{.*}} -> !u8i
+
+// LLVM-LABEL: rotl8
+// LLVM: [[F:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 [[Y:%.*]])
+  return __builtin_rotateleft8(x, y);
+}
+
+short rotl16(short x, short y) {
+// CIR-LABEL: rotl16
+// CIR: cir.rotate left {{.*}}, {{.*}} -> !u16i
+
+// LLVM-LABEL: rotl16
+// LLVM: [[F:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y:%.*]])
+  return __builtin_rotateleft16(x, y);
+}
+
+int rotl32(int x, unsigned int y) {
+// CIR-LABEL: rotl32
+// CIR: cir.rotate left {{.*}}, {{.*}} -> !u32i
+
+// LLVM-LABEL: rotl32
+// LLVM: [[F:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+  return __builtin_rotateleft32(x, y);
+}
+
+unsigned long long rotl64(unsigned long long x, long long y) {
+// CIR-LABEL: rotl64
+// CIR: cir.rotate left {{.*}}, {{.*}} -> !u64i
+
+// LLVM-LABEL: rotl64
+// LLVM: [[F:%.*]] = call i64 @llvm.fshl.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+  return __builtin_rotateleft64(x, y);
+}
+
+char rotr8(char x, char y) {
+// CIR-LABEL: rotr8
+// CIR: cir.rotate right {{.*}}, {{.*}} -> !u8i
+
+// LLVM-LABEL: rotr8
+// LLVM: [[F:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[Y:%.*]])
+  return __builtin_rotateright8(x, y);
+}
+
+unsigned short rotr16(unsigned short x, unsigned short y) {
+// CIR-LABEL: rotr16
+// CIR: cir.rotate right {{.*}}, {{.*}} -> !u16i
+
+// LLVM-LABEL: rotr16
+// LLVM: [[F:%.*]] = call i16 @llvm.fshr.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y:%.*]])
+  return __builtin_rotateright16(x, y);
+}
+
+unsigned int rotr32(unsigned int x, int y) {
+// CIR-LABEL: rotr32
+// CIR: cir.rotate right {{.*}}, {{.*}} -> !u32i
+
+// LLVM-LABEL: rotr32
+// LLVM: [[F:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+  return __builtin_rotateright32(x, y);
+}
+
+long long rotr64(long long x, unsigned long long y) {
+// CIR-LABEL: rotr64
+// CIR: cir.rotate right {{.*}}, {{.*}} -> !u64i
+
+// LLVM-LABEL: rotr64
+// LLVM: [[F:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+  return __builtin_rotateright64(x, y);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/builtins-overflow.cpp b/clang/test/CIR/CodeGen/builtins-overflow.cpp
new file mode 100644
index 000000000000..d4652527cb56
--- /dev/null
+++ b/clang/test/CIR/CodeGen/builtins-overflow.cpp
@@ -0,0 +1,364 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+bool test_add_overflow_uint_uint_uint(unsigned x, unsigned y, unsigned *res) {
+  return __builtin_add_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z32test_add_overflow_uint_uint_uintjjPj
+//      CHECK:   %[[#LHS:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#RHS:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#LHS]], %[[#RHS]]) : !u32i, (!u32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u32i, !cir.ptr<!u32i>
+//      CHECK: }
+
+bool test_add_overflow_int_int_int(int x, int y, int *res) {
+  return __builtin_add_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z29test_add_overflow_int_int_intiiPi
+//      CHECK:   %[[#LHS:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#RHS:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#LHS]], %[[#RHS]]) : !s32i, (!s32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CHECK: }
+
+bool test_add_overflow_xint31_xint31_xint31(_BitInt(31) x, _BitInt(31) y, _BitInt(31) *res) {
+  return __builtin_add_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z38test_add_overflow_xint31_xint31_xint31DB31_S_PS_
+//      CHECK:   %[[#LHS:]] = cir.load %{{.+}} : !cir.ptr<!cir.int<s, 31>>, !cir.int<s, 31>
+// CHECK-NEXT:   %[[#RHS:]] = cir.load %{{.+}} : !cir.ptr<!cir.int<s, 31>>, !cir.int<s, 31>
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!cir.int<s, 31>>>, !cir.ptr<!cir.int<s, 31>>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#LHS]], %[[#RHS]]) : <s, 31>, (<s, 31>, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !cir.int<s, 31>, !cir.ptr<!cir.int<s, 31>>
+//      CHECK: }
+
+bool test_sub_overflow_uint_uint_uint(unsigned x, unsigned y, unsigned *res) {
+  return __builtin_sub_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z32test_sub_overflow_uint_uint_uintjjPj
+//      CHECK:   %[[#LHS:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#RHS:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#LHS]], %[[#RHS]]) : !u32i, (!u32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u32i, !cir.ptr<!u32i>
+//      CHECK: }
+
+bool test_sub_overflow_int_int_int(int x, int y, int *res) {
+  return __builtin_sub_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z29test_sub_overflow_int_int_intiiPi
+//      CHECK:   %[[#LHS:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#RHS:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#LHS]], %[[#RHS]]) : !s32i, (!s32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CHECK: }
+
+bool test_sub_overflow_xint31_xint31_xint31(_BitInt(31) x, _BitInt(31) y, _BitInt(31) *res) {
+  return __builtin_sub_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z38test_sub_overflow_xint31_xint31_xint31DB31_S_PS_
+//      CHECK:   %[[#LHS:]] = cir.load %{{.+}} : !cir.ptr<!cir.int<s, 31>>, !cir.int<s, 31>
+// CHECK-NEXT:   %[[#RHS:]] = cir.load %{{.+}} : !cir.ptr<!cir.int<s, 31>>, !cir.int<s, 31>
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!cir.int<s, 31>>>, !cir.ptr<!cir.int<s, 31>>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#LHS]], %[[#RHS]]) : <s, 31>, (<s, 31>, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !cir.int<s, 31>, !cir.ptr<!cir.int<s, 31>>
+//      CHECK: }
+
+bool test_mul_overflow_uint_uint_uint(unsigned x, unsigned y, unsigned *res) {
+  return __builtin_mul_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z32test_mul_overflow_uint_uint_uintjjPj
+//      CHECK:   %[[#LHS:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#RHS:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#LHS]], %[[#RHS]]) : !u32i, (!u32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u32i, !cir.ptr<!u32i>
+//      CHECK: }
+
+bool test_mul_overflow_int_int_int(int x, int y, int *res) {
+  return __builtin_mul_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z29test_mul_overflow_int_int_intiiPi
+//      CHECK:   %[[#LHS:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#RHS:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#LHS]], %[[#RHS]]) : !s32i, (!s32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CHECK: }
+
+bool test_mul_overflow_xint31_xint31_xint31(_BitInt(31) x, _BitInt(31) y, _BitInt(31) *res) {
+  return __builtin_mul_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z38test_mul_overflow_xint31_xint31_xint31DB31_S_PS_
+//      CHECK:   %[[#LHS:]] = cir.load %{{.+}} : !cir.ptr<!cir.int<s, 31>>, !cir.int<s, 31>
+// CHECK-NEXT:   %[[#RHS:]] = cir.load %{{.+}} : !cir.ptr<!cir.int<s, 31>>, !cir.int<s, 31>
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!cir.int<s, 31>>>, !cir.ptr<!cir.int<s, 31>>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#LHS]], %[[#RHS]]) : <s, 31>, (<s, 31>, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !cir.int<s, 31>, !cir.ptr<!cir.int<s, 31>>
+//      CHECK: }
+
+bool test_mul_overflow_ulong_ulong_long(unsigned long x, unsigned long y, unsigned long *res) {
+  return __builtin_mul_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z34test_mul_overflow_ulong_ulong_longmmPm
+//      CHECK:   %[[#LHS:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#RHS:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#LHS]], %[[#RHS]]) : !u64i, (!u64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CHECK: }
+
+bool test_add_overflow_uint_int_int(unsigned x, int y, int *res) {
+  return __builtin_add_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z30test_add_overflow_uint_int_intjiPi
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:   %[[#PROM_X:]] = cir.cast(integral, %[[#X]] : !u32i), !cir.int<s, 33>
+// CHECK-NEXT:   %[[#PROM_Y:]] = cir.cast(integral, %[[#Y]] : !s32i), !cir.int<s, 33>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#PROM_X]], %[[#PROM_Y]]) : <s, 33>, (!s32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CHECK: }
+
+bool test_add_overflow_volatile(int x, int y, volatile int *res) {
+  return __builtin_add_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z26test_add_overflow_volatileiiPVi
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !s32i, (!s32i, !cir.bool)
+// CHECK-NEXT:   cir.store volatile %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CHECK: }
+
+bool test_uadd_overflow(unsigned x, unsigned y, unsigned *res) {
+  return __builtin_uadd_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z18test_uadd_overflowjjPj
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !u32i, (!u32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u32i, !cir.ptr<!u32i>
+//      CHECK: }
+
+bool test_uaddl_overflow(unsigned long x, unsigned long y, unsigned long *res) {
+  return __builtin_uaddl_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z19test_uaddl_overflowmmPm
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !u64i, (!u64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CHECK: }
+
+bool test_uaddll_overflow(unsigned long long x, unsigned long long y, unsigned long long *res) {
+  return __builtin_uaddll_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z20test_uaddll_overflowyyPy
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !u64i, (!u64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CHECK: }
+
+bool test_usub_overflow(unsigned x, unsigned y, unsigned *res) {
+  return __builtin_usub_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z18test_usub_overflowjjPj
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#X]], %[[#Y]]) : !u32i, (!u32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u32i, !cir.ptr<!u32i>
+//      CHECK: }
+
+bool test_usubl_overflow(unsigned long x, unsigned long y, unsigned long *res) {
+  return __builtin_usubl_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z19test_usubl_overflowmmPm
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#X]], %[[#Y]]) : !u64i, (!u64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CHECK: }
+
+bool test_usubll_overflow(unsigned long long x, unsigned long long y, unsigned long long *res) {
+  return __builtin_usubll_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z20test_usubll_overflowyyPy
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#X]], %[[#Y]]) : !u64i, (!u64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CHECK: }
+
+bool test_umul_overflow(unsigned x, unsigned y, unsigned *res) {
+  return __builtin_umul_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z18test_umul_overflowjjPj
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#X]], %[[#Y]]) : !u32i, (!u32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u32i, !cir.ptr<!u32i>
+//      CHECK: }
+
+bool test_umull_overflow(unsigned long x, unsigned long y, unsigned long *res) {
+  return __builtin_umull_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z19test_umull_overflowmmPm
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#X]], %[[#Y]]) : !u64i, (!u64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CHECK: }
+
+bool test_umulll_overflow(unsigned long long x, unsigned long long y, unsigned long long *res) {
+  return __builtin_umulll_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z20test_umulll_overflowyyPy
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#X]], %[[#Y]]) : !u64i, (!u64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CHECK: }
+
+bool test_sadd_overflow(int x, int y, int *res) {
+  return __builtin_sadd_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z18test_sadd_overflowiiPi
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !s32i, (!s32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CHECK: }
+
+bool test_saddl_overflow(long x, long y, long *res) {
+  return __builtin_saddl_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z19test_saddl_overflowllPl
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !s64i, (!s64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s64i, !cir.ptr<!s64i>
+//      CHECK: }
+
+bool test_saddll_overflow(long long x, long long y, long long *res) {
+  return __builtin_saddll_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z20test_saddll_overflowxxPx
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !s64i, (!s64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s64i, !cir.ptr<!s64i>
+//      CHECK: }
+
+bool test_ssub_overflow(int x, int y, int *res) {
+  return __builtin_ssub_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z18test_ssub_overflowiiPi
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#X]], %[[#Y]]) : !s32i, (!s32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CHECK: }
+
+bool test_ssubl_overflow(long x, long y, long *res) {
+  return __builtin_ssubl_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z19test_ssubl_overflowllPl
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#X]], %[[#Y]]) : !s64i, (!s64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s64i, !cir.ptr<!s64i>
+//      CHECK: }
+
+bool test_ssubll_overflow(long long x, long long y, long long *res) {
+  return __builtin_ssubll_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z20test_ssubll_overflowxxPx
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#X]], %[[#Y]]) : !s64i, (!s64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s64i, !cir.ptr<!s64i>
+//      CHECK: }
+
+bool test_smul_overflow(int x, int y, int *res) {
+  return __builtin_smul_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z18test_smul_overflowiiPi
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#X]], %[[#Y]]) : !s32i, (!s32i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CHECK: }
+
+bool test_smull_overflow(long x, long y, long *res) {
+  return __builtin_smull_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z19test_smull_overflowllPl
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#X]], %[[#Y]]) : !s64i, (!s64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s64i, !cir.ptr<!s64i>
+//      CHECK: }
+
+bool test_smulll_overflow(long long x, long long y, long long *res) {
+  return __builtin_smulll_overflow(x, y, res);
+}
+
+//      CHECK: cir.func @_Z20test_smulll_overflowxxPx
+//      CHECK:   %[[#X:]] = cir.load %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT:   %[[#Y:]] = cir.load %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CHECK-NEXT:   %[[#RES_PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// CHECK-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#X]], %[[#Y]]) : !s64i, (!s64i, !cir.bool)
+// CHECK-NEXT:   cir.store %[[RES]], %[[#RES_PTR]] : !s64i, !cir.ptr<!s64i>
+//      CHECK: }
diff --git a/clang/test/CIR/CodeGen/c11atomic.c b/clang/test/CIR/CodeGen/c11atomic.c
new file mode 100644
index 000000000000..96cb48013cd8
--- /dev/null
+++ b/clang/test/CIR/CodeGen/c11atomic.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -triple aarch64-none-linux-android21 -fclangir -emit-cir -std=c11 -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 %s -triple aarch64-none-linux-android21 -fclangir -emit-llvm -std=c11 -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// CIR-DAG: ![[PS:.*]] = !cir.struct<struct "PS" {!cir.int<s, 16>, !cir.int<s, 16>, !cir.int<s, 16>}
+// CIR-DAG: ![[ANON:.*]] = !cir.struct<struct  {!cir.struct<struct "PS" {!cir.int<s, 16>, !cir.int<s, 16>, !cir.int<s, 16>} {{.*}}>, !cir.array<!cir.int<u, 8> x 2>}>
+// CIR-DAG: cir.global external @testPromotedStructGlobal = #cir.const_struct<{#cir.const_struct<{#cir.int<1> : !s16i, #cir.int<2> : !s16i, #cir.int<3> : !s16i}> : ![[PS]], #cir.zero : !cir.array<!u8i x 2>}> : ![[ANON]]
+
+// LLVM-DAG: %[[PS:.*]] = type { i16, i16, i16 }
+// LLVM-DAG: @testPromotedStructGlobal = global { %[[PS]], [2 x i8] } { %[[PS]] { i16 1, i16 2, i16 3 }, [2 x i8] zeroinitializer }
+typedef struct { short x, y, z; } PS;
+_Atomic PS testPromotedStructGlobal = (PS){1, 2, 3};
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/c89-implicit-int.c b/clang/test/CIR/CodeGen/c89-implicit-int.c
new file mode 100644
index 000000000000..8fe7b285c338
--- /dev/null
+++ b/clang/test/CIR/CodeGen/c89-implicit-int.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c89 -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Implicit int return type.
+test = 0;
+// CHECK: cir.global external @test = #cir.int<0> : !s32i
+func (void) {
+// CHECK: cir.func @func() -> !s32i
+  return 0;
+}
diff --git a/clang/test/CIR/CodeGen/call-extra-attrs.cpp b/clang/test/CIR/CodeGen/call-extra-attrs.cpp
new file mode 100644
index 000000000000..674343f44a6f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/call-extra-attrs.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+__attribute__((nothrow))
+int s0(int a, int b) {
+  int x = a + b;
+  return x;
+}
+
+__attribute__((noinline))
+int s1(int a, int b) {
+  return s0(a,b);
+}
+
+int s2(int a, int b) {
+  return s1(a, b);
+}
+
+// CIR: #fn_attr = #cir<extra({inline = #cir.inline<no>, nothrow = #cir.nothrow, optnone = #cir.optnone})>
+// CIR: #fn_attr1 = #cir<extra({nothrow = #cir.nothrow})>
+
+// CIR: cir.func @_Z2s0ii(%{{.*}}, %{{.*}}) -> {{.*}} extra(#fn_attr)
+// CIR: cir.func @_Z2s1ii(%{{.*}}, %{{.*}}) -> {{.*}} extra(#fn_attr)
+// CIR: cir.call @_Z2s0ii(%{{.*}}, %{{.*}}) : ({{.*}}, {{.*}}) -> {{.*}} extra(#fn_attr1)
+// CIR: cir.func @_Z2s2ii(%{{.*}}, %{{.*}}) -> {{.*}} extra(#fn_attr)
+// CHECK-NOT: cir.call @_Z2s1ii(%{{.*}}, %{{.*}}) : ({{.*}}, {{.*}}) -> {{.*}} extra(#fn_attr{{.*}})
+
+// LLVM: define dso_local i32 @_Z2s0ii(i32 %0, i32 %1) #[[#ATTR1:]]
+// LLVM: define dso_local i32 @_Z2s1ii(i32 %0, i32 %1) #[[#ATTR1:]]
+// LLVM: define dso_local i32 @_Z2s2ii(i32 %0, i32 %1) #[[#ATTR1:]]
+
+// LLVM: attributes #[[#ATTR1]] = {{.*}} noinline nounwind optnone
diff --git a/clang/test/CIR/CodeGen/call-via-class-member-funcptr.cpp b/clang/test/CIR/CodeGen/call-via-class-member-funcptr.cpp
new file mode 100644
index 000000000000..a69ca1a19c96
--- /dev/null
+++ b/clang/test/CIR/CodeGen/call-via-class-member-funcptr.cpp
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+class a {
+public:
+  static char *b(int);
+};
+int h=0;
+class f {
+public:
+  const char *b();
+  a g;
+};
+const char *f::b() { return g.b(h); }
+void fn1() { f f1; }
+
+// CIR: ty_a = !cir.struct<class "a" {!cir.int<u, 8>} #cir.record.decl.ast>
+// CIR: ty_f = !cir.struct<class "f" {!cir.struct<class "a" {!cir.int<u, 8>} #cir.record.decl.ast>}>
+
+// CIR: cir.global external @h = #cir.int<0>
+// CIR: cir.func private @_ZN1a1bEi(!s32i) -> !cir.ptr<!s8i>
+
+// CIR: cir.func @_ZN1f1bEv(%arg0: !cir.ptr<!ty_f> loc{{.*}}) -> !cir.ptr<!s8i>
+// CIR: [[H_PTR:%.*]] = cir.get_global @h : !cir.ptr<!s32i> loc(#loc18)
+// CIR: [[H_VAL:%.*]] = cir.load [[H_PTR]] : !cir.ptr<!s32i>, !s32i
+// CIR: [[RET1_VAL:%.*]] = cir.call @_ZN1a1bEi([[H_VAL]]) : (!s32i) -> !cir.ptr<!s8i>
+// CIR: cir.store [[RET1_VAL]], [[RET1_P:%.*]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CIR: [[RET1_VAL2:%.*]] = cir.load [[RET1_P]] : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+//    %7 = cir.load %1 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CIR: cir.return [[RET1_VAL2]] : !cir.ptr<!s8i>
+
+// CIR: cir.func @_Z3fn1v()
+// CIR: [[CLS_F:%.*]] = cir.alloca !ty_f, !cir.ptr<!ty_f>, ["f1"] {alignment = 1 : i64}
+// CIR: cir.return
+
+// LLVM: %class.f = type { %class.a }
+// LLVM:  %class.a = type { i8 }
+// LLVM: @h = global i32 0
+// LLVM: declare {{.*}} ptr @_ZN1a1bEi(i32)
+
+// LLVM: define dso_local ptr @_ZN1f1bEv(ptr [[ARG0:%.*]])
+// LLVM: [[ARG0_SAVE:%.*]] = alloca ptr, i64 1, align 8
+// LLVM: [[RET_SAVE:%.*]] = alloca ptr, i64 1, align 8
+// LLVM: store ptr [[ARG0]], ptr [[ARG0_SAVE]], align 8,
+// LLVM: [[ARG0_LOAD:%.*]] = load ptr, ptr [[ARG0_SAVE]], align 8
+// LLVM: [[FUNC_PTR:%.*]] = getelementptr %class.f, ptr [[ARG0_LOAD]], i32 0, i32 0,
+// LLVM: [[VAR_H:%.*]] = load i32, ptr @h, align 4
+// LLVM: [[RET_VAL:%.*]] = call ptr @_ZN1a1bEi(i32 [[VAR_H]]),
+// LLVM: store ptr [[RET_VAL]], ptr [[RET_SAVE]], align 8,
+// LLVM: [[RET_VAL2:%.*]] = load ptr, ptr [[RET_SAVE]], align 8
+// LLVM: ret ptr [[RET_VAL2]]
+
+// LLVM: define dso_local void @_Z3fn1v()
+// LLVM: [[FUNC_PTR:%.*]] = alloca %class.f, i64 1, align 1
+// LLVM: ret void
diff --git a/clang/test/CIR/CodeGen/call.c b/clang/test/CIR/CodeGen/call.c
new file mode 100644
index 000000000000..2c3d5cfa151e
--- /dev/null
+++ b/clang/test/CIR/CodeGen/call.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CXX
+
+void a(void) {}
+int b(int a, int b) {
+  return a + b;
+}
+double c(double a, double b) {
+  return a + b;
+}
+
+void d(void) {
+  a();
+  b(0, 1);
+}
+
+// CHECK: module {{.*}} {
+// CHECK:   cir.func @a()
+// CHECK:     cir.return
+// CHECK:   }
+// CHECK:   cir.func @b(%arg0: !s32i {{.*}}, %arg1: !s32i {{.*}}) -> !s32i
+// CHECK:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK:     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CHECK:     %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK:     cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK:     cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK:     %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:     %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK:     %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK:     cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+// CHECK:     %6 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// CHECK:     cir.return %6
+// CHECK:   }
+// CHECK:   cir.func @c(%arg0: !cir.double {{.*}}, %arg1: !cir.double {{.*}}) -> !cir.double
+// CHECK:     %0 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["a", init]
+// CHECK:     %1 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["b", init]
+// CHECK:     %2 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["__retval"]
+// CHECK:     cir.store %arg0, %0 : !cir.double, !cir.ptr<!cir.double>
+// CHECK:     cir.store %arg1, %1 : !cir.double, !cir.ptr<!cir.double>
+// CHECK:     %3 = cir.load %0 : !cir.ptr<!cir.double>, !cir.double
+// CHECK:     %4 = cir.load %1 : !cir.ptr<!cir.double>, !cir.double
+// CHECK:     %5 = cir.binop(add, %3, %4) : !cir.double
+// CHECK:     cir.store %5, %2 : !cir.double, !cir.ptr<!cir.double>
+// CHECK:     %6 = cir.load %2 : !cir.ptr<!cir.double>, !cir.double
+// CHECK:     cir.return %6 : !cir.double
+// CHECK:   }
+// CHECK:   cir.func @d()
+// CHECK:     call @a() : () -> ()
+// CHECK:     %0 = cir.const #cir.int<0> : !s32i
+// CHECK:     %1 = cir.const #cir.int<1> : !s32i
+// CHECK:     call @b(%0, %1) : (!s32i, !s32i) -> !s32i
+// CHECK:     cir.return
+// CHECK:   }
+//
+// CXX: module {{.*}} {
+// CXX-NEXT:   cir.func @_Z1av()
+// CXX-NEXT:     cir.return
+// CXX-NEXT:   }
+// CXX-NEXT:   cir.func @_Z1bii(%arg0: !s32i {{.*}}, %arg1: !s32i {{.*}}) -> !s32i
+// CXX-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CXX-NEXT:     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CXX-NEXT:     %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CXX-NEXT:     cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CXX-NEXT:     cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+// CXX-NEXT:     %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CXX-NEXT:     %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CXX-NEXT:     %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CXX-NEXT:     cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+// CXX-NEXT:     %6 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// CXX-NEXT:     cir.return %6
+// CXX-NEXT:   }
+// CXX-NEXT:   cir.func @_Z1cdd(%arg0: !cir.double {{.*}}, %arg1: !cir.double {{.*}}) -> !cir.double
+// CXX-NEXT:     %0 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["a", init]
+// CXX-NEXT:     %1 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["b", init]
+// CXX-NEXT:     %2 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["__retval"]
+// CXX-NEXT:     cir.store %arg0, %0 : !cir.double, !cir.ptr<!cir.double>
+// CXX-NEXT:     cir.store %arg1, %1 : !cir.double, !cir.ptr<!cir.double>
+// CXX-NEXT:     %3 = cir.load %0 : !cir.ptr<!cir.double>, !cir.double
+// CXX-NEXT:     %4 = cir.load %1 : !cir.ptr<!cir.double>, !cir.double
+// CXX-NEXT:     %5 = cir.binop(add, %3, %4) : !cir.double
+// CXX-NEXT:     cir.store %5, %2 : !cir.double, !cir.ptr<!cir.double>
+// CXX-NEXT:     %6 = cir.load %2 : !cir.ptr<!cir.double>, !cir.double
+// CXX-NEXT:     cir.return %6 : !cir.double
+// CXX-NEXT:   }
+// CXX-NEXT:   cir.func @_Z1dv()
+// CXX-NEXT:     call @_Z1av() : () -> ()
+// CXX-NEXT:     %0 = cir.const #cir.int<0> : !s32i
+// CXX-NEXT:     %1 = cir.const #cir.int<1> : !s32i
+// CXX-NEXT:     call @_Z1bii(%0, %1) : (!s32i, !s32i) -> !s32i
+// CXX-NEXT:     cir.return
+// CXX-NEXT:   }
diff --git a/clang/test/CIR/CodeGen/call.cpp b/clang/test/CIR/CodeGen/call.cpp
new file mode 100644
index 000000000000..26db637fdb1d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/call.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int& p();
+int f() {
+  return p() - 22;
+}
+
+// CHECK: cir.func @_Z1fv() -> !s32i
+// CHECK:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:   %1 = cir.call @_Z1pv() : () -> !cir.ptr<!s32i>
+// CHECK:   %2 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK:   %3 = cir.const #cir.int<22> : !s32i
+// CHECK:   %4 = cir.binop(sub, %2, %3) nsw : !s32i
diff --git a/clang/test/CIR/CodeGen/cast.c b/clang/test/CIR/CodeGen/cast.c
new file mode 100644
index 000000000000..710b065f8087
--- /dev/null
+++ b/clang/test/CIR/CodeGen/cast.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+typedef struct {
+  int x;
+} A;
+
+int cstyle_cast_lvalue(A a) {
+  return ((A)(a)).x;
+}
+
+// CHECK:  cir.func @cstyle_cast_lvalue(%arg0: !ty_A loc({{.*}}))
+// CHECK:    [[ALLOC_A:%.*]] = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["a", init] {alignment = 4 : i64}
+// CHECK:    [[ALLOC_RET:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:    cir.store %arg0, [[ALLOC_A]] : !ty_A, !cir.ptr<!ty_A>
+// CHECK:    [[X_ADDR:%.*]] = cir.get_member [[ALLOC_A]][0] {name = "x"} : !cir.ptr<!ty_A> -> !cir.ptr<!s32i>
+// CHECK:    [[X:%.*]] = cir.load [[X_ADDR]] : !cir.ptr<!s32i>, !s32i
+// CHECK:    cir.store [[X]], [[ALLOC_RET]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    [[RET:%.*]] = cir.load [[ALLOC_RET]] : !cir.ptr<!s32i>, !s32i
+// CHECK:    cir.return [[RET]] : !s32i
+
diff --git a/clang/test/CIR/CodeGen/cast.cpp b/clang/test/CIR/CodeGen/cast.cpp
new file mode 100644
index 000000000000..b5d1d8e4f43f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/cast.cpp
@@ -0,0 +1,144 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+unsigned char cxxstaticcast_0(unsigned int x) {
+  return static_cast<unsigned char>(x);
+}
+
+// CHECK: cir.func @_Z15cxxstaticcast_0j
+// CHECK:    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+// CHECK:    %1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["__retval"] {alignment = 1 : i64}
+// CHECK:    cir.store %arg0, %0 : !u32i, !cir.ptr<!u32i>
+// CHECK:    %2 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+// CHECK:    %3 = cir.cast(integral, %2 : !u32i), !u8i
+// CHECK:    cir.store %3, %1 : !u8i, !cir.ptr<!u8i>
+// CHECK:    %4 = cir.load %1 : !cir.ptr<!u8i>, !u8i
+// CHECK:    cir.return %4 : !u8i
+// CHECK:  }
+
+
+int cStyleCasts_0(unsigned x1, int x2, float x3, short x4, double x5) {
+// CHECK: cir.func @_{{.*}}cStyleCasts_0{{.*}}
+
+  char a = (char)x1; // truncate
+  // CHECK: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !u32i), !s8i
+
+  short b = (short)x2; // truncate with sign
+  // CHECK: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !s32i), !s16i
+
+  long long c = (long long)x1; // zero extend
+  // CHECK: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !u32i), !s64i
+
+  long long d = (long long)x2; // sign extend
+  // CHECK: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !s32i), !s64i
+
+  unsigned ui = (unsigned)x2; // sign drop
+  // CHECK: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !s32i), !u32i
+
+  int si = (int)x1; // sign add
+  // CHECK: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !u32i), !s32i
+
+  unsigned uu = (unsigned)x1; // should not be generated
+  // CHECK-NOT: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !u32i), !u32i
+
+  int arr[3];
+  int* e = (int*)arr; // explicit pointer decay
+  // CHECK: %{{[0-9]+}} = cir.cast(array_to_ptrdecay, %{{[0-9]+}} : !cir.ptr<!cir.array<!s32i x 3>>), !cir.ptr<!s32i>
+
+  int f = (int)x3;
+  // CHECK: %{{[0-9]+}} = cir.cast(float_to_int, %{{[0-9]+}} : !cir.float), !s32i
+
+  double g = (double)x3; // FP extension
+  // %{{[0-9]+}} = cir.cast(floating, %{{[0-9]+}} : !cir.float), !cir.double
+
+  long l = (long)(void*)x4; // Must sign extend before casting to pointer
+  // CHECK: %[[TMP:[0-9]+]] = cir.cast(integral, %{{[0-9]+}} : !s16i), !u64i
+  // CHECK: %[[TMP2:[0-9]+]] = cir.cast(int_to_ptr, %[[TMP]] : !u64i), !cir.ptr<!void>
+  // CHECK: %{{[0-9]+}} = cir.cast(ptr_to_int, %[[TMP2]] : !cir.ptr<!void>), !s64i
+
+  float sitofp = (float)x2; // Signed integer to floating point
+  // CHECK: %{{.+}} = cir.cast(int_to_float, %{{[0-9]+}} : !s32i), !cir.float
+
+  float uitofp = (float)x1; // Unsigned integer to floating point
+  // CHECK: %{{.+}} = cir.cast(int_to_float, %{{[0-9]+}} : !u32i), !cir.float
+
+  int fptosi = (int)x3; // Floating point to signed integer
+  // CHECK: %{{.+}} = cir.cast(float_to_int, %{{[0-9]+}} : !cir.float), !s32i
+
+  unsigned fptoui = (unsigned)x3; // Floating point to unsigned integer
+  // CHECK: %{{.+}} = cir.cast(float_to_int, %{{[0-9]+}} : !cir.float), !u32i
+
+  bool ib = (bool)x1; // No checking, because this isn't a regular cast.
+
+  int bi = (int)ib; // bool to int
+  // CHECK: %{{[0-9]+}} = cir.cast(bool_to_int, %{{[0-9]+}} : !cir.bool), !s32i
+
+  float bf = (float)ib; // bool to float
+  // CHECK: %{{[0-9]+}} = cir.cast(bool_to_float, %{{[0-9]+}} : !cir.bool), !cir.float
+
+  void* bpv = (void*)ib; // bool to pointer, which is done in two steps
+  // CHECK: %[[TMP:[0-9]+]] = cir.cast(bool_to_int,  %{{[0-9]+}} : !cir.bool), !u64i
+  // CHECK: %{{[0-9]+}} = cir.cast(int_to_ptr, %[[TMP]] : !u64i), !cir.ptr<!void>
+
+  float dptofp = (float)x5;
+  // CHECK: %{{.+}} = cir.cast(floating, %{{[0-9]+}} : !cir.double), !cir.float
+
+  return 0;
+}
+
+bool cptr(void *d) {
+  bool x = d;
+  return x;
+}
+
+// CHECK: cir.func @_Z4cptrPv(%arg0: !cir.ptr<!void>
+// CHECK:   %0 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["d", init] {alignment = 8 : i64}
+
+// CHECK:   %3 = cir.load %0 : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CHECK:   %4 = cir.cast(ptr_to_bool, %3 : !cir.ptr<!void>), !cir.bool
+
+void call_cptr(void *d) {
+  if (!cptr(d)) {
+  }
+}
+
+// CHECK: cir.func @_Z9call_cptrPv(%arg0: !cir.ptr<!void>
+// CHECK:   %0 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["d", init] {alignment = 8 : i64}
+
+// CHECK:   cir.scope {
+// CHECK:     %1 = cir.load %0 : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CHECK:     %2 = cir.call @_Z4cptrPv(%1) : (!cir.ptr<!void>) -> !cir.bool
+// CHECK:     %3 = cir.unary(not, %2) : !cir.bool, !cir.bool
+// CHECK:     cir.if %3 {
+
+void lvalue_cast(int x) {
+  *(int *)&x = 42;
+}
+
+// CHECK: cir.func @_Z11lvalue_cast
+// CHECK:   %1 = cir.const #cir.int<42> : !s32i
+// CHECK:   cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+
+struct A { int x; };
+
+void null_cast(long ptr) {
+  *(int *)0 = 0;
+  ((A *)0)->x = 0;
+}
+
+// CHECK: cir.func @_Z9null_castl
+// CHECK:   %[[ADDR:[0-9]+]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CHECK:   cir.store %{{[0-9]+}}, %[[ADDR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   %[[BASE:[0-9]+]] = cir.const #cir.ptr<null> : !cir.ptr<!ty_A>
+// CHECK:   %[[FIELD:[0-9]+]] = cir.get_member %[[BASE]][0] {name = "x"} : !cir.ptr<!ty_A> -> !cir.ptr<!s32i>
+// CHECK:   cir.store %{{[0-9]+}}, %[[FIELD]] : !s32i, !cir.ptr<!s32i>
+
+void int_cast(long ptr) {
+  ((A *)ptr)->x = 0;
+}
+
+// CHECK: cir.func @_Z8int_castl
+// CHECK:   %[[BASE:[0-9]+]] = cir.cast(int_to_ptr, %{{[0-9]+}} : !u64i), !cir.ptr<!ty_A>
+// CHECK:   %[[FIELD:[0-9]+]] = cir.get_member %[[BASE]][0] {name = "x"} : !cir.ptr<!ty_A> -> !cir.ptr<!s32i>
+// CHECK:   cir.store %{{[0-9]+}}, %[[FIELD]] : !s32i, !cir.ptr<!s32i>
+
diff --git a/clang/test/CIR/CodeGen/clear_cache.c b/clang/test/CIR/CodeGen/clear_cache.c
new file mode 100644
index 000000000000..7b649e068a19
--- /dev/null
+++ b/clang/test/CIR/CodeGen/clear_cache.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -fclangir -emit-cir -o %t.cir
+// RUN: FileCheck --input-file=%t.cir -check-prefix=CIR %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -fclangir -emit-llvm -o %t.ll
+// RUN: FileCheck --input-file=%t.ll -check-prefix=LLVM %s
+
+char buffer[32] = "This is a largely unused buffer";
+
+// __builtin___clear_cache always maps to @llvm.clear_cache, but what
+// each back-end produces is different, and this is tested in LLVM
+
+// CIR-LABEL: main
+// CIR:  %[[VAL_1:.*]] = cir.get_global @buffer : !cir.ptr<!cir.array<!s8i x 32>>
+// CIR:  %[[VAL_2:.*]] = cir.cast(array_to_ptrdecay, %[[VAL_1]] : !cir.ptr<!cir.array<!s8i x 32>>), !cir.ptr<!s8i>
+// CIR:  %[[VAL_3:.*]] = cir.cast(bitcast, %[[VAL_2]] : !cir.ptr<!s8i>), !cir.ptr<!void>
+// CIR:  %[[VAL_4:.*]] = cir.get_global @buffer : !cir.ptr<!cir.array<!s8i x 32>>
+// CIR:  %[[VAL_5:.*]] = cir.cast(array_to_ptrdecay, %[[VAL_4]] : !cir.ptr<!cir.array<!s8i x 32>>), !cir.ptr<!s8i>
+// CIR:  %[[VAL_6:.*]] = cir.const #cir.int<32> : !s32i
+// CIR:  %[[VAL_7:.*]] = cir.ptr_stride(%[[VAL_5]] : !cir.ptr<!s8i>, %[[VAL_6]] : !s32i), !cir.ptr<!s8i>
+// CIR:  %[[VAL_8:.*]] = cir.cast(bitcast, %[[VAL_7]] : !cir.ptr<!s8i>), !cir.ptr<!void>
+// CIR:  cir.clear_cache %[[VAL_3]] : !cir.ptr<!void>, %[[VAL_8]],
+
+// LLVM-LABEL: main
+// LLVM:  call void @llvm.clear_cache(ptr @buffer, ptr getelementptr (i8, ptr @buffer, i64 32)),
+
+int main(void) {
+  __builtin___clear_cache(buffer, buffer+32);
+  return 0;
+}
diff --git a/clang/test/CIR/CodeGen/cmp.cpp b/clang/test/CIR/CodeGen/cmp.cpp
new file mode 100644
index 000000000000..3bca55e78d13
--- /dev/null
+++ b/clang/test/CIR/CodeGen/cmp.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void c0(int a, int b) {
+  bool x = a > b;
+  x = a < b;
+  x = a <= b;
+  x = a >= b;
+  x = a != b;
+  x = a == b;
+}
+
+// CHECK: = cir.cmp(gt, %3, %4) : !s32i, !cir.bool
+// CHECK: = cir.cmp(lt, %6, %7) : !s32i, !cir.bool
+// CHECK: = cir.cmp(le, %9, %10) : !s32i, !cir.bool
+// CHECK: = cir.cmp(ge, %12, %13) : !s32i, !cir.bool
+// CHECK: = cir.cmp(ne, %15, %16) : !s32i, !cir.bool
+// CHECK: = cir.cmp(eq, %18, %19) : !s32i, !cir.bool
diff --git a/clang/test/CIR/CodeGen/comma.cpp b/clang/test/CIR/CodeGen/comma.cpp
new file mode 100644
index 000000000000..368b0e1bd18d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/comma.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int c0() {
+    int a = 1;
+    int b = 2;
+    return b + 1, a;
+}
+
+// CHECK: cir.func @_Z2c0v() -> !s32i
+// CHECK: %[[#RET:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK: %[[#B:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CHECK: %[[#LOADED_B:]] = cir.load %[[#B]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[#]] = cir.binop(add, %[[#LOADED_B]], %[[#]]) nsw : !s32i
+// CHECK: %[[#LOADED_A:]] = cir.load %[[#A]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.store %[[#LOADED_A]], %[[#RET]] : !s32i, !cir.ptr<!s32i>
+
+int &foo1();
+int &foo2();
+
+void c1() {
+    int &x = (foo1(), foo2());
+}
+
+// CHECK: cir.func @_Z2c1v()
+// CHECK: %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: %1 = cir.call @_Z4foo1v() : () -> !cir.ptr<!s32i>
+// CHECK: %2 = cir.call @_Z4foo2v() : () -> !cir.ptr<!s32i>
+// CHECK: cir.store %2, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
diff --git a/clang/test/CIR/CodeGen/complex-arithmetic.c b/clang/test/CIR/CodeGen/complex-arithmetic.c
new file mode 100644
index 000000000000..8e772e70f2d9
--- /dev/null
+++ b/clang/test/CIR/CodeGen/complex-arithmetic.c
@@ -0,0 +1,907 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=basic -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CLANG,CIRGEN,CIRGEN-BASIC,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=basic -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CPPLANG,CIRGEN,CIRGEN-BASIC,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=improved -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CLANG,CIRGEN,CIRGEN-IMPROVED,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=improved -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CPPLANG,CIRGEN,CIRGEN-IMPROVED,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=full -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CLANG,CIRGEN,CIRGEN-FULL,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=full -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CPPLANG,CIRGEN,CIRGEN-FULL,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=basic -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CLANG,CIR,CIR-BASIC,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=basic -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CPPLANG,CIR,CIR-BASIC,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=improved -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CLANG,CIR,CIR-IMPROVED,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=improved -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CPPLANG,CIR,CIR-IMPROVED,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=full -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CLANG,CIR,CIR-FULL,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=full -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CPPLANG,CIR,CIR-FULL,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=basic -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=CLANG,LLVM,LLVM-BASIC,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=basic -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=CPPLANG,LLVM,LLVM-BASIC,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=improved -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=CLANG,LLVM,LLVM-IMPROVED,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=improved -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=CPPLANG,LLVM,LLVM-IMPROVED,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=full -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=CLANG,LLVM,LLVM-FULL,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=full -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=CPPLANG,LLVM,LLVM-FULL,CHECK %s
+
+double _Complex cd1, cd2;
+int _Complex ci1, ci2;
+
+void add() {
+  cd1 = cd1 + cd2;
+  ci1 = ci1 + ci2;
+}
+
+// CLANG:   @add
+// CPPLANG: @_Z3addv
+
+// CIRGEN: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.complex<!s32i>
+
+//      CIR: %[[#LHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#LHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RES_REAL:]] = cir.binop(add, %[[#LHS_REAL]], %[[#RHS_REAL]]) : !cir.double
+// CIR-NEXT: %[[#RES_IMAG:]] = cir.binop(add, %[[#LHS_IMAG]], %[[#RHS_IMAG]]) : !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RES_REAL]], %[[#RES_IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#LHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#LHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RES_REAL:]] = cir.binop(add, %[[#LHS_REAL]], %[[#RHS_REAL]]) : !s32i
+// CIR-NEXT: %[[#RES_IMAG:]] = cir.binop(add, %[[#LHS_IMAG]], %[[#RHS_IMAG]]) : !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RES_REAL]], %[[#RES_IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#LHS_REAL:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#LHS_IMAG:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RHS_REAL:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#RHS_IMAG:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RES_REAL:]] = fadd double %[[#LHS_REAL]], %[[#RHS_REAL]]
+// LLVM-NEXT: %[[#RES_IMAG:]] = fadd double %[[#LHS_IMAG]], %[[#RHS_IMAG]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#RES_REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#RES_IMAG]], 1
+
+//      LLVM: %[[#LHS_REAL:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#LHS_IMAG:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#RHS_REAL:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#RHS_IMAG:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#RES_REAL:]] = add i32 %[[#LHS_REAL]], %[[#RHS_REAL]]
+// LLVM-NEXT: %[[#RES_IMAG:]] = add i32 %[[#LHS_IMAG]], %[[#RHS_IMAG]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#RES_REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#RES_IMAG]], 1
+
+// CHECK: }
+
+void sub() {
+  cd1 = cd1 - cd2;
+  ci1 = ci1 - ci2;
+}
+
+// CLANG:   @sub
+// CPPLANG: @_Z3subv
+
+// CIRGEN: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.complex<!s32i>
+
+//      CIR: %[[#LHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#LHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RES_REAL:]] = cir.binop(sub, %[[#LHS_REAL]], %[[#RHS_REAL]]) : !cir.double
+// CIR-NEXT: %[[#RES_IMAG:]] = cir.binop(sub, %[[#LHS_IMAG]], %[[#RHS_IMAG]]) : !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RES_REAL]], %[[#RES_IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#LHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#LHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RES_REAL:]] = cir.binop(sub, %[[#LHS_REAL]], %[[#RHS_REAL]]) : !s32i
+// CIR-NEXT: %[[#RES_IMAG:]] = cir.binop(sub, %[[#LHS_IMAG]], %[[#RHS_IMAG]]) : !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RES_REAL]], %[[#RES_IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#LHS_REAL:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#LHS_IMAG:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RHS_REAL:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#RHS_IMAG:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RES_REAL:]] = fsub double %[[#LHS_REAL]], %[[#RHS_REAL]]
+// LLVM-NEXT: %[[#RES_IMAG:]] = fsub double %[[#LHS_IMAG]], %[[#RHS_IMAG]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#RES_REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#RES_IMAG]], 1
+
+//      LLVM: %[[#LHS_REAL:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#LHS_IMAG:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#RHS_REAL:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#RHS_IMAG:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#RES_REAL:]] = sub i32 %[[#LHS_REAL]], %[[#RHS_REAL]]
+// LLVM-NEXT: %[[#RES_IMAG:]] = sub i32 %[[#LHS_IMAG]], %[[#RHS_IMAG]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#RES_REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#RES_IMAG]], 1
+
+// CHECK: }
+
+void mul() {
+  cd1 = cd1 * cd2;
+  ci1 = ci1 * ci2;
+}
+
+// CLANG:   @mul
+// CPPLANG: @_Z3mulv
+
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(basic) : !cir.complex<!cir.double>
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(basic) : !cir.complex<!s32i>
+
+//      CIR-BASIC: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#C:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#D:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#E:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !cir.double
+// CIR-BASIC-NEXT: %{{.+}} = cir.complex.create %[[#E]], %[[#F]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR-BASIC: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !s32i
+// CIR-BASIC-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !s32i
+// CIR-BASIC-NEXT: %[[#C:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !s32i
+// CIR-BASIC-NEXT: %[[#D:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !s32i
+// CIR-BASIC-NEXT: %[[#E:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !s32i
+// CIR-BASIC-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !s32i
+// CIR-BASIC-NEXT: %{{.+}} = cir.complex.create %[[#E]], %[[#F]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM-BASIC: %[[#LHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#LHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#RHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#RHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#A:]] = fmul double %[[#LHSR]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#B:]] = fmul double %[[#LHSI]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#C:]] = fmul double %[[#LHSR]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#D:]] = fmul double %[[#LHSI]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#E:]] = fsub double %[[#A]], %[[#B]]
+// LLVM-BASIC-NEXT: %[[#F:]] = fadd double %[[#C]], %[[#D]]
+// LLVM-BASIC-NEXT: %[[#G:]] = insertvalue { double, double } undef, double %[[#E]], 0
+// LLVM-BASIC-NEXT: %{{.+}} = insertvalue { double, double } %[[#G]], double %[[#F]], 1
+
+//      LLVM-BASIC: %[[#LHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#LHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#RHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#RHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#A:]] = mul i32 %[[#LHSR]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#B:]] = mul i32 %[[#LHSI]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#C:]] = mul i32 %[[#LHSR]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#D:]] = mul i32 %[[#LHSI]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#E:]] = sub i32 %[[#A]], %[[#B]]
+// LLVM-BASIC-NEXT: %[[#F:]] = add i32 %[[#C]], %[[#D]]
+// LLVM-BASIC-NEXT: %[[#G:]] = insertvalue { i32, i32 } undef, i32 %[[#E]], 0
+// LLVM-BASIC-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#G]], i32 %[[#F]], 1
+
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(improved) : !cir.complex<!cir.double>
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(improved) : !cir.complex<!s32i>
+
+//      CIR-IMPROVED: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !cir.double
+// CIR-IMPROVED-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !cir.double
+// CIR-IMPROVED-NEXT: %[[#C:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !cir.double
+// CIR-IMPROVED-NEXT: %[[#D:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !cir.double
+// CIR-IMPROVED-NEXT: %[[#E:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !cir.double
+// CIR-IMPROVED-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !cir.double
+// CIR-IMPROVED-NEXT: %{{.+}} = cir.complex.create %[[#E]], %[[#F]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR-IMPROVED: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#C:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#D:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#E:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !s32i
+// CIR-IMPROVED-NEXT: %{{.+}} = cir.complex.create %[[#E]], %[[#F]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM-IMPROVED: %[[#LHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#LHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#RHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#RHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#A:]] = fmul double %[[#LHSR]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#B:]] = fmul double %[[#LHSI]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#C:]] = fmul double %[[#LHSR]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#D:]] = fmul double %[[#LHSI]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#E:]] = fsub double %[[#A]], %[[#B]]
+// LLVM-IMPROVED-NEXT: %[[#F:]] = fadd double %[[#C]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#G:]] = insertvalue { double, double } undef, double %[[#E]], 0
+// LLVM-IMPROVED-NEXT: %{{.+}} = insertvalue { double, double } %[[#G]], double %[[#F]], 1
+
+//      LLVM-IMPROVED: %[[#LHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#LHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#RHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#RHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#A:]] = mul i32 %[[#LHSR]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#B:]] = mul i32 %[[#LHSI]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#C:]] = mul i32 %[[#LHSR]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#D:]] = mul i32 %[[#LHSI]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#E:]] = sub i32 %[[#A]], %[[#B]]
+// LLVM-IMPROVED-NEXT: %[[#F:]] = add i32 %[[#C]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#G:]] = insertvalue { i32, i32 } undef, i32 %[[#E]], 0
+// LLVM-IMPROVED-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#G]], i32 %[[#F]], 1
+
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(full) : !cir.complex<!cir.double>
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(full) : !cir.complex<!s32i>
+
+//      CIR-FULL: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !cir.double
+// CIR-FULL-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !cir.double
+// CIR-FULL-NEXT: %[[#C:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !cir.double
+// CIR-FULL-NEXT: %[[#D:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !cir.double
+// CIR-FULL-NEXT: %[[#E:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !cir.double
+// CIR-FULL-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !cir.double
+// CIR-FULL-NEXT: %[[#RES:]] = cir.complex.create %[[#E]], %[[#F]] : !cir.double -> !cir.complex<!cir.double>
+// CIR-FULL-NEXT: %[[#COND:]] = cir.cmp(ne, %[[#E]], %[[#E]]) : !cir.double, !cir.bool
+// CIR-FULL-NEXT: %[[#COND2:]] = cir.cmp(ne, %[[#F]], %[[#F]]) : !cir.double, !cir.bool
+// CIR-FULL-NEXT: %[[#G:]] = cir.const #false
+// CIR-FULL-NEXT: %[[#H:]] = cir.select if %[[#COND]] then %[[#COND2]] else %[[#G]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+// CIR-FULL-NEXT: %{{.+}} = cir.ternary(%[[#H]], true {
+// CIR-FULL-NEXT:   %[[#RES2:]] = cir.call @__muldc3(%[[#LHSR]], %[[#LHSI]], %[[#RHSR]], %[[#RHSI]]) : (!cir.double, !cir.double, !cir.double, !cir.double) -> !cir.complex<!cir.double>
+// CIR-FULL-NEXT:   cir.yield %[[#RES2]] : !cir.complex<!cir.double>
+// CIR-FULL-NEXT: }, false {
+// CIR-FULL-NEXT:   cir.yield %[[#RES]] : !cir.complex<!cir.double>
+// CIR-FULL-NEXT: }) : (!cir.bool) -> !cir.complex<!cir.double>
+
+//      CIR-FULL: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !s32i
+// CIR-FULL-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !s32i
+// CIR-FULL-NEXT: %[[#C:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !s32i
+// CIR-FULL-NEXT: %[[#D:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !s32i
+// CIR-FULL-NEXT: %[[#E:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !s32i
+// CIR-FULL-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !s32i
+// CIR-FULL-NEXT: %{{.+}} = cir.complex.create %[[#E]], %[[#F]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM-FULL:   %[[#LHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-FULL-NEXT:   %[[#LHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-FULL-NEXT:   %[[#RHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-FULL-NEXT:   %[[#RHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-FULL-NEXT:   %[[#A:]] = fmul double %[[#LHSR]], %[[#RHSR]]
+// LLVM-FULL-NEXT:   %[[#B:]] = fmul double %[[#LHSI]], %[[#RHSI]]
+// LLVM-FULL-NEXT:   %[[#C:]] = fmul double %[[#LHSR]], %[[#RHSI]]
+// LLVM-FULL-NEXT:   %[[#D:]] = fmul double %[[#LHSI]], %[[#RHSR]]
+// LLVM-FULL-NEXT:   %[[#E:]] = fsub double %[[#A]], %[[#B]]
+// LLVM-FULL-NEXT:   %[[#F:]] = fadd double %[[#C]], %[[#D]]
+// LLVM-FULL-NEXT:   %[[#G:]] = insertvalue { double, double } undef, double %[[#E]], 0
+// LLVM-FULL-NEXT:   %[[#RES:]] = insertvalue { double, double } %[[#G]], double %[[#F]], 1
+// LLVM-FULL-NEXT:   %[[#H:]] = fcmp une double %[[#E]], %[[#E]]
+// LLVM-FULL-NEXT:   %[[#COND:]] = zext i1 %[[#H]] to i8
+// LLVM-FULL-NEXT:   %[[#I:]] = fcmp une double %[[#F]], %[[#F]]
+// LLVM-FULL-NEXT:   %[[#COND2:]] = zext i1 %[[#I]] to i8
+// LLVM-FULL-NEXT:   %[[#J:]] = and i8 %[[#COND]], %[[#COND2]]
+// LLVM-FULL-NEXT:   %[[#COND3:]] = trunc i8 %[[#J]] to i1
+//      LLVM-FULL: {{.+}}:
+// LLVM-FULL-NEXT:   %{{.+}} = call { double, double } @__muldc3(double %[[#LHSR]], double %[[#LHSI]], double %[[#RHSR]], double %[[#RHSI]])
+// LLVM-FULL-NEXT:   br label %{{.+}}
+//      LLVM-FULL: {{.+}}:
+// LLVM-FULL-NEXT:   br label %{{.+}}
+
+//      LLVM-FULL: %[[#LHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-FULL-NEXT: %[[#LHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-FULL-NEXT: %[[#RHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-FULL-NEXT: %[[#RHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-FULL-NEXT: %[[#A:]] = mul i32 %[[#LHSR]], %[[#RHSR]]
+// LLVM-FULL-NEXT: %[[#B:]] = mul i32 %[[#LHSI]], %[[#RHSI]]
+// LLVM-FULL-NEXT: %[[#C:]] = mul i32 %[[#LHSR]], %[[#RHSI]]
+// LLVM-FULL-NEXT: %[[#D:]] = mul i32 %[[#LHSI]], %[[#RHSR]]
+// LLVM-FULL-NEXT: %[[#E:]] = sub i32 %[[#A]], %[[#B]]
+// LLVM-FULL-NEXT: %[[#F:]] = add i32 %[[#C]], %[[#D]]
+// LLVM-FULL-NEXT: %[[#G:]] = insertvalue { i32, i32 } undef, i32 %[[#E]], 0
+// LLVM-FULL-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#G]], i32 %[[#F]], 1
+
+// CHECK: }
+
+void div() {
+  cd1 = cd1 / cd2;
+  ci1 = ci1 / ci2;
+}
+
+// CLANG:   @div
+// CPPLANG: @_Z3divv
+
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(basic) : !cir.complex<!cir.double>
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(basic) : !cir.complex<!s32i>
+
+//      CIR-BASIC: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#C:]] = cir.binop(mul, %[[#RHSR]], %[[#RHSR]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#D:]] = cir.binop(mul, %[[#RHSI]], %[[#RHSI]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#E:]] = cir.binop(add, %[[#A]], %[[#B]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#G:]] = cir.binop(div, %[[#E]], %[[#F]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#H:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#I:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#J:]] = cir.binop(sub, %[[#H]], %[[#I]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#K:]] = cir.binop(div, %[[#J]], %[[#F]]) : !cir.double
+// CIR-BASIC-NEXT: %{{.+}} = cir.complex.create %[[#G]], %[[#K]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR-BASIC: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !s32i
+// CIR-BASIC-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !s32i
+// CIR-BASIC-NEXT: %[[#C:]] = cir.binop(mul, %[[#RHSR]], %[[#RHSR]]) : !s32i
+// CIR-BASIC-NEXT: %[[#D:]] = cir.binop(mul, %[[#RHSI]], %[[#RHSI]]) : !s32i
+// CIR-BASIC-NEXT: %[[#E:]] = cir.binop(add, %[[#A]], %[[#B]]) : !s32i
+// CIR-BASIC-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !s32i
+// CIR-BASIC-NEXT: %[[#G:]] = cir.binop(div, %[[#E]], %[[#F]]) : !s32i
+// CIR-BASIC-NEXT: %[[#H:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !s32i
+// CIR-BASIC-NEXT: %[[#I:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !s32i
+// CIR-BASIC-NEXT: %[[#J:]] = cir.binop(sub, %[[#H]], %[[#I]]) : !s32i
+// CIR-BASIC-NEXT: %[[#K:]] = cir.binop(div, %[[#J]], %[[#F]]) : !s32i
+// CIR-BASIC-NEXT: %{{.+}} = cir.complex.create %[[#G]], %[[#K]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM-BASIC: %[[#LHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#LHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#RHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#RHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#A:]] = fmul double %[[#LHSR]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#B:]] = fmul double %[[#LHSI]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#C:]] = fmul double %[[#RHSR]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#D:]] = fmul double %[[#RHSI]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#E:]] = fadd double %[[#A]], %[[#B]]
+// LLVM-BASIC-NEXT: %[[#F:]] = fadd double %[[#C]], %[[#D]]
+// LLVM-BASIC-NEXT: %[[#G:]] = fdiv double %[[#E]], %[[#F]]
+// LLVM-BASIC-NEXT: %[[#H:]] = fmul double %[[#LHSI]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#I:]] = fmul double %[[#LHSR]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#J:]] = fsub double %[[#H]], %[[#I]]
+// LLVM-BASIC-NEXT: %[[#K:]] = fdiv double %[[#J]], %[[#F]]
+// LLVM-BASIC-NEXT: %[[#L:]] = insertvalue { double, double } undef, double %[[#G]], 0
+// LLVM-BASIC-NEXT: %{{.+}} = insertvalue { double, double } %[[#L]], double %[[#K]], 1
+
+//      LLVM-BASIC: %[[#LHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#LHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#RHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#RHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#A:]] = mul i32 %[[#LHSR]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#B:]] = mul i32 %[[#LHSI]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#C:]] = mul i32 %[[#RHSR]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#D:]] = mul i32 %[[#RHSI]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#E:]] = add i32 %[[#A]], %[[#B]]
+// LLVM-BASIC-NEXT: %[[#F:]] = add i32 %[[#C]], %[[#D]]
+// LLVM-BASIC-NEXT: %[[#G:]] = sdiv i32 %[[#E]], %[[#F]]
+// LLVM-BASIC-NEXT: %[[#H:]] = mul i32 %[[#LHSI]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#I:]] = mul i32 %[[#LHSR]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#J:]] = sub i32 %[[#H]], %[[#I]]
+// LLVM-BASIC-NEXT: %[[#K:]] = sdiv i32 %[[#J]], %[[#F]]
+// LLVM-BASIC-NEXT: %[[#L:]] = insertvalue { i32, i32 } undef, i32 %[[#G]], 0
+// LLVM-BASIC-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#L]], i32 %[[#K]], 1
+
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(improved) : !cir.complex<!cir.double>
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(improved) : !cir.complex<!s32i>
+
+//      CIR-IMPROVED: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#A:]] = cir.fabs %[[#RHSR]] : !cir.double
+// CIR-IMPROVED-NEXT: %[[#B:]] = cir.fabs %[[#RHSI]] : !cir.double
+// CIR-IMPROVED-NEXT: %[[#C:]] = cir.cmp(ge, %[[#A]], %[[#B]]) : !cir.double, !cir.bool
+// CIR-IMPROVED-NEXT: %{{.+}} = cir.ternary(%[[#C]], true {
+// CIR-IMPROVED-NEXT:   %[[#D:]] = cir.binop(div, %[[#RHSI]], %[[#RHSR]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#E:]] = cir.binop(mul, %[[#D]], %[[#RHSI]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#F:]] = cir.binop(add, %[[#RHSR]], %[[#E]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#G:]] = cir.binop(mul, %[[#LHSI]], %[[#D]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#H:]] = cir.binop(add, %[[#LHSR]], %[[#G]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#I:]] = cir.binop(div, %[[#H]], %[[#F]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#J:]] = cir.binop(mul, %[[#LHSR]], %[[#D]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#K:]] = cir.binop(sub, %[[#LHSI]], %[[#J]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#L:]] = cir.binop(div, %[[#K]], %[[#F]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#M:]] = cir.complex.create %[[#I]], %[[#L]] : !cir.double -> !cir.complex<!cir.double>
+// CIR-IMPROVED-NEXT:   cir.yield %[[#M]] : !cir.complex<!cir.double>
+// CIR-IMPROVED-NEXT: }, false {
+// CIR-IMPROVED-NEXT:   %[[#D:]] = cir.binop(div, %[[#RHSR]], %[[#RHSI]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#E:]] = cir.binop(mul, %[[#D]], %[[#RHSR]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#F:]] = cir.binop(add, %[[#RHSI]], %[[#E]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#G:]] = cir.binop(mul, %[[#LHSR]], %[[#D]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#H:]] = cir.binop(add, %[[#G]], %[[#LHSI]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#I:]] = cir.binop(div, %[[#H]], %[[#F]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#J:]] = cir.binop(mul, %[[#LHSI]], %[[#D]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#K:]] = cir.binop(sub, %[[#J]], %4) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#L:]] = cir.binop(div, %[[#K]], %[[#F]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#M:]] = cir.complex.create %[[#I]], %[[#L]] : !cir.double -> !cir.complex<!cir.double>
+// CIR-IMPROVED-NEXT:   cir.yield %[[#M]] : !cir.complex<!cir.double>
+// CIR-IMPROVED-NEXT: }) : (!cir.bool) -> !cir.complex<!cir.double>
+
+//      CIR-IMPROVED: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#C:]] = cir.binop(mul, %[[#RHSR]], %[[#RHSR]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#D:]] = cir.binop(mul, %[[#RHSI]], %[[#RHSI]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#E:]] = cir.binop(add, %[[#A]], %[[#B]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#G:]] = cir.binop(div, %[[#E]], %[[#F]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#H:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#I:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#J:]] = cir.binop(sub, %[[#H]], %[[#I]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#K:]] = cir.binop(div, %[[#J]], %[[#F]]) : !s32i
+// CIR-IMPROVED-NEXT: %{{.+}} = cir.complex.create %[[#G]], %[[#K]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM-IMPROVED: %[[#LHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#LHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#RHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#RHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#A:]] = call double @llvm.fabs.f64(double %[[#RHSR]])
+// LLVM-IMPROVED-NEXT: %[[#B:]] = call double @llvm.fabs.f64(double %[[#RHSI]])
+// LLVM-IMPROVED-NEXT: %[[#C:]] = fcmp oge double %[[#A]], %[[#B]]
+// LLVM-IMPROVED-NEXT: br i1 %[[#C]], label %[[#LA:]], label %[[#LB:]]
+//      LLVM-IMPROVED: [[#LA]]:
+// LLVM-IMPROVED-NEXT: %[[#D:]] = fdiv double %[[#RHSI]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#E:]] = fmul double %[[#D]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#F:]] = fadd double %[[#RHSR]], %[[#E]]
+// LLVM-IMPROVED-NEXT: %[[#G:]] = fmul double %[[#LHSI]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#H:]] = fadd double %[[#LHSR]], %[[#G]]
+// LLVM-IMPROVED-NEXT: %[[#I:]] = fdiv double %[[#H]], %[[#F]]
+// LLVM-IMPROVED-NEXT: %[[#J:]] = fmul double %[[#LHSR]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#K:]] = fsub double %[[#LHSI]], %[[#J]]
+// LLVM-IMPROVED-NEXT: %[[#L:]] = fdiv double %[[#K]], %[[#F]]
+// LLVM-IMPROVED-NEXT: %[[#M:]] = insertvalue { double, double } undef, double %[[#I]], 0
+// LLVM-IMPROVED-NEXT: %[[#N1:]] = insertvalue { double, double } %[[#M]], double %[[#L]], 1
+// LLVM-IMPROVED-NEXT: br label %[[#LC:]]
+//      LLVM-IMPROVED: [[#LB]]:
+// LLVM-IMPROVED-NEXT: %[[#D:]] = fdiv double %[[#RHSR]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#E:]] = fmul double %[[#D]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#F:]] = fadd double %[[#RHSI]], %[[#E]]
+// LLVM-IMPROVED-NEXT: %[[#G:]] = fmul double %[[#LHSR]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#H:]] = fadd double %[[#G]], %[[#LHSI]]
+// LLVM-IMPROVED-NEXT: %[[#I:]] = fdiv double %[[#H]], %[[#F]]
+// LLVM-IMPROVED-NEXT: %[[#J:]] = fmul double %[[#LHSI]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#K:]] = fsub double %[[#J]], %[[#LHSR]]
+// LLVM-IMPROVED-NEXT: %[[#L:]] = fdiv double %[[#K]], %[[#F]]
+// LLVM-IMPROVED-NEXT: %[[#M:]] = insertvalue { double, double } undef, double %[[#I]], 0
+// LLVM-IMPROVED-NEXT: %[[#N2:]] = insertvalue { double, double } %[[#M]], double %[[#L]], 1
+// LLVM-IMPROVED-NEXT: br label %[[#LC]]
+//      LLVM-IMPROVED: [[#LC]]:
+// LLVM-IMPROVED-NEXT: %{{.+}} = phi { double, double } [ %[[#N2]], %[[#LB]] ], [ %[[#N1]], %[[#LA]] ]
+
+//      LLVM-IMPROVED: %[[#LHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#LHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#RHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#RHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#A:]] = mul i32 %[[#LHSR]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#B:]] = mul i32 %[[#LHSI]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#C:]] = mul i32 %[[#RHSR]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#D:]] = mul i32 %[[#RHSI]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#E:]] = add i32 %[[#A]], %[[#B]]
+// LLVM-IMPROVED-NEXT: %[[#F:]] = add i32 %[[#C]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#G:]] = sdiv i32 %[[#E]], %[[#F]]
+// LLVM-IMPROVED-NEXT: %[[#H:]] = mul i32 %[[#LHSI]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#I:]] = mul i32 %[[#LHSR]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#J:]] = sub i32 %[[#H]], %[[#I]]
+// LLVM-IMPROVED-NEXT: %[[#K:]] = sdiv i32 %[[#J]], %[[#F]]
+// LLVM-IMPROVED-NEXT: %[[#L:]] = insertvalue { i32, i32 } undef, i32 %[[#G]], 0
+// LLVM-IMPROVED-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#L]], i32 %[[#K]], 1
+
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(full) : !cir.complex<!cir.double>
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(full) : !cir.complex<!s32i>
+
+//      CIR-FULL: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %{{.+}} = cir.call @__divdc3(%[[#LHSR]], %[[#LHSI]], %[[#RHSR]], %[[#RHSI]]) : (!cir.double, !cir.double, !cir.double, !cir.double) -> !cir.complex<!cir.double>
+
+//      CIR-FULL: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !s32i
+// CIR-FULL-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !s32i
+// CIR-FULL-NEXT: %[[#C:]] = cir.binop(mul, %[[#RHSR]], %[[#RHSR]]) : !s32i
+// CIR-FULL-NEXT: %[[#D:]] = cir.binop(mul, %[[#RHSI]], %[[#RHSI]]) : !s32i
+// CIR-FULL-NEXT: %[[#E:]] = cir.binop(add, %[[#A]], %[[#B]]) : !s32i
+// CIR-FULL-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !s32i
+// CIR-FULL-NEXT: %[[#G:]] = cir.binop(div, %[[#E]], %[[#F]]) : !s32i
+// CIR-FULL-NEXT: %[[#H:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !s32i
+// CIR-FULL-NEXT: %[[#I:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !s32i
+// CIR-FULL-NEXT: %[[#J:]] = cir.binop(sub, %[[#H]], %[[#I]]) : !s32i
+// CIR-FULL-NEXT: %[[#K:]] = cir.binop(div, %[[#J]], %[[#F]]) : !s32i
+// CIR-FULL-NEXT: %{{.+}} = cir.complex.create %[[#G]], %[[#K]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM-FULL: %[[#LHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-FULL-NEXT: %[[#LHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-FULL-NEXT: %[[#RHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-FULL-NEXT: %[[#RHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-FULL-NEXT: %{{.+}} = call { double, double } @__divdc3(double %[[#LHSR]], double %[[#LHSI]], double %[[#RHSR]], double %[[#RHSI]])
+
+//      LLVM-FULL: %[[#LHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-FULL-NEXT: %[[#LHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-FULL-NEXT: %[[#RHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-FULL-NEXT: %[[#RHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-FULL-NEXT: %[[#A:]] = mul i32 %[[#LHSR]], %[[#RHSR]]
+// LLVM-FULL-NEXT: %[[#B:]] = mul i32 %[[#LHSI]], %[[#RHSI]]
+// LLVM-FULL-NEXT: %[[#C:]] = mul i32 %[[#RHSR]], %[[#RHSR]]
+// LLVM-FULL-NEXT: %[[#D:]] = mul i32 %[[#RHSI]], %[[#RHSI]]
+// LLVM-FULL-NEXT: %[[#E:]] = add i32 %[[#A]], %[[#B]]
+// LLVM-FULL-NEXT: %[[#F:]] = add i32 %[[#C]], %[[#D]]
+// LLVM-FULL-NEXT: %[[#G:]] = sdiv i32 %[[#E]], %[[#F]]
+// LLVM-FULL-NEXT: %[[#H:]] = mul i32 %[[#LHSI]], %[[#RHSR]]
+// LLVM-FULL-NEXT: %[[#I:]] = mul i32 %[[#LHSR]], %[[#RHSI]]
+// LLVM-FULL-NEXT: %[[#J:]] = sub i32 %[[#H]], %[[#I]]
+// LLVM-FULL-NEXT: %[[#K:]] = sdiv i32 %[[#J]], %[[#F]]
+// LLVM-FULL-NEXT: %[[#L:]] = insertvalue { i32, i32 } undef, i32 %[[#G]], 0
+// LLVM-FULL-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#L]], i32 %[[#K]], 1
+
+// CHECK: }
+
+void add_assign() {
+  cd1 += cd2;
+  ci1 += ci2;
+}
+
+// CLANG:   @add_assign
+// CPPLANG: @_Z10add_assignv
+
+// CIRGEN: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.complex<!s32i>
+
+// CHECK: }
+
+void sub_assign() {
+  cd1 -= cd2;
+  ci1 -= ci2;
+}
+
+//   CLANG: @sub_assign
+// CPPLANG: @_Z10sub_assignv
+
+// CIRGEN: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.complex<!s32i>
+
+// CHECK: }
+
+void mul_assign() {
+  cd1 *= cd2;
+  ci1 *= ci2;
+}
+
+//   CLANG: @mul_assign
+// CPPLANG: @_Z10mul_assignv
+
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(basic) : !cir.complex<!cir.double>
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(basic) : !cir.complex<!s32i>
+
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(improved) : !cir.complex<!cir.double>
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(improved) : !cir.complex<!s32i>
+
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(full) : !cir.complex<!cir.double>
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(full) : !cir.complex<!s32i>
+
+// CHECK: }
+
+void div_assign() {
+  cd1 /= cd2;
+  ci1 /= ci2;
+}
+
+//   CLANG: @div_assign
+// CPPLANG: @_Z10div_assignv
+
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(basic) : !cir.complex<!cir.double>
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(basic) : !cir.complex<!s32i>
+
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(improved) : !cir.complex<!cir.double>
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(improved) : !cir.complex<!s32i>
+
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(full) : !cir.complex<!cir.double>
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(full) : !cir.complex<!s32i>
+
+// CHECK: }
+
+void unary_plus() {
+  cd1 = +cd1;
+  ci1 = +ci1;
+}
+
+//   CLANG: @unary_plus
+// CPPLANG: @_Z10unary_plusv
+
+// CIRGEN: %{{.+}} = cir.unary(plus, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(plus, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RESR:]] = cir.unary(plus, %[[#OPR]]) : !cir.double, !cir.double
+// CIR-NEXT: %[[#RESI:]] = cir.unary(plus, %[[#OPI]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RESR]], %[[#RESI]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RESR:]] = cir.unary(plus, %[[#OPR]]) : !s32i, !s32i
+// CIR-NEXT: %[[#RESI:]] = cir.unary(plus, %[[#OPI]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RESR]], %[[#RESI]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#OPR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#OPR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#OPI]], 1
+
+//      LLVM: %[[#OPR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#OPR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#OPI]], 1
+
+// CHECK: }
+
+void unary_minus() {
+  cd1 = -cd1;
+  ci1 = -ci1;
+}
+
+//   CLANG: @unary_minus
+// CPPLANG: @_Z11unary_minusv
+
+// CIRGEN: %{{.+}} = cir.unary(minus, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(minus, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RESR:]] = cir.unary(minus, %[[#OPR]]) : !cir.double, !cir.double
+// CIR-NEXT: %[[#RESI:]] = cir.unary(minus, %[[#OPI]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RESR]], %[[#RESI]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RESR:]] = cir.unary(minus, %[[#OPR]]) : !s32i, !s32i
+// CIR-NEXT: %[[#RESI:]] = cir.unary(minus, %[[#OPI]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RESR]], %[[#RESI]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#OPR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RESR:]] = fneg double %[[#OPR]]
+// LLVM-NEXT: %[[#RESI:]] = fneg double %[[#OPI]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#RESR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#RESI]], 1
+
+//      LLVM: %[[#OPR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#RESR:]] = sub i32 0, %[[#OPR]]
+// LLVM-NEXT: %[[#RESI:]] = sub i32 0, %[[#OPI]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#RESR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#RESI]], 1
+
+// CHECK: }
+
+void unary_not() {
+  cd1 = ~cd1;
+  ci1 = ~ci1;
+}
+
+//   CLANG: @unary_not
+// CPPLANG: @_Z9unary_notv
+
+// CIRGEN: %{{.+}} = cir.unary(not, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(not, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RESI:]] = cir.unary(minus, %[[#OPI]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#OPR]], %[[#RESI]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RESI:]] = cir.unary(minus, %[[#OPI]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#OPR]], %[[#RESI]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#OPR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RESI:]] = fneg double %[[#OPI]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#OPR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#RESI]], 1
+
+//      LLVM: %[[#OPR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#RESI:]] = sub i32 0, %[[#OPI]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#OPR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#RESI]], 1
+
+// CHECK: }
+
+void builtin_conj() {
+  cd1 = __builtin_conj(cd1);
+}
+
+//   CLANG: @builtin_conj
+// CPPLANG: @_Z12builtin_conjv
+
+// CIRGEN: %{{.+}} = cir.unary(not, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RESI:]] = cir.unary(minus, %[[#OPI]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#OPR]], %[[#RESI]] : !cir.double -> !cir.complex<!cir.double>
+
+//      LLVM: %[[#OPR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RESI:]] = fneg double %[[#OPI]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#OPR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#RESI]], 1
+
+// CHECK: }
+
+void pre_increment() {
+  ++cd1;
+  ++ci1;
+}
+
+//   CLANG: @pre_increment
+// CPPLANG: @_Z13pre_incrementv
+
+// CIRGEN: %{{.+}} = cir.unary(inc, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(inc, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#IR:]] = cir.unary(inc, %[[#R]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#IR:]] = cir.unary(inc, %[[#R]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#R:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = fadd double 1.000000e+00, %[[#R]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#I]], 1
+
+//      LLVM: %[[#R:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = add i32 %[[#R]], 1
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#I]], 1
+
+// CHECK: }
+
+void post_increment() {
+  cd1++;
+  ci1++;
+}
+
+//   CLANG: @post_increment
+// CPPLANG: @_Z14post_incrementv
+
+// CIRGEN: %{{.+}} = cir.unary(inc, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(inc, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#IR:]] = cir.unary(inc, %[[#R]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#IR:]] = cir.unary(inc, %[[#R]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#R:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = fadd double 1.000000e+00, %[[#R]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#I]], 1
+
+//      LLVM: %[[#R:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = add i32 %[[#R]], 1
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#I]], 1
+
+// CHECK: }
+
+void pre_decrement() {
+  --cd1;
+  --ci1;
+}
+
+//   CLANG: @pre_decrement
+// CPPLANG: @_Z13pre_decrementv
+
+// CIRGEN: %{{.+}} = cir.unary(dec, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(dec, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#IR:]] = cir.unary(dec, %[[#R]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#IR:]] = cir.unary(dec, %[[#R]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#R:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = fadd double -1.000000e+00, %[[#R]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#I]], 1
+
+//      LLVM: %[[#R:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = sub i32 %[[#R]], 1
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#I]], 1
+
+// CHECK: }
+
+void post_decrement() {
+  cd1--;
+  ci1--;
+}
+
+//   CLANG: @post_decrement
+// CPPLANG: @_Z14post_decrementv
+
+// CIRGEN: %{{.+}} = cir.unary(dec, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(dec, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#IR:]] = cir.unary(dec, %[[#R]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#IR:]] = cir.unary(dec, %[[#R]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#R:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = fadd double -1.000000e+00, %[[#R]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#I]], 1
+
+//      LLVM: %[[#R:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = sub i32 %[[#R]], 1
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#I]], 1
+
+// CHECK: }
diff --git a/clang/test/CIR/CodeGen/complex-cast.c b/clang/test/CIR/CodeGen/complex-cast.c
new file mode 100644
index 000000000000..98afabd65340
--- /dev/null
+++ b/clang/test/CIR/CodeGen/complex-cast.c
@@ -0,0 +1,208 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare -o %t.cir %s 2>&1 | FileCheck --check-prefixes=CIR-BEFORE,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare -o %t.cir %s 2>&1 | FileCheck --check-prefixes=CIR-AFTER,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=LLVM,CHECK %s
+
+#include <stdbool.h>
+
+volatile double _Complex cd;
+volatile float _Complex cf;
+volatile int _Complex ci;
+volatile short _Complex cs;
+volatile double sd;
+volatile int si;
+volatile bool b;
+
+void scalar_to_complex() {
+  cd = sd;
+  ci = si;
+  cd = si;
+  ci = sd;
+}
+
+// CHECK-LABEL: @scalar_to_complex()
+
+// CIR-BEFORE: %{{.+}} = cir.cast(float_to_complex, %{{.+}} : !cir.double), !cir.complex<!cir.double>
+
+//      CIR-AFTER: %[[#REAL:]] = cir.load volatile %{{.+}} : !cir.ptr<!cir.double>, !cir.double
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+// CIR-BEFORE: %{{.+}} = cir.cast(int_to_complex, %{{.+}} : !s32i), !cir.complex<!s32i>
+
+//      CIR-AFTER: %[[#REAL:]] = cir.load volatile %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.const #cir.int<0> : !s32i
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      CIR-BEFORE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.double
+// CIR-BEFORE-NEXT: %{{.+}} = cir.cast(float_to_complex, %[[#A]] : !cir.double), !cir.complex<!cir.double>
+
+//      CIR-AFTER: %[[#A:]] = cir.load volatile %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-AFTER-NEXT: %[[#REAL:]] = cir.cast(int_to_float, %[[#A]] : !s32i), !cir.double
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR-BEFORE: %[[#A:]] = cir.cast(float_to_int, %{{.+}} : !cir.double), !s32i
+// CIR-BEFORE-NEXT: %{{.+}} = cir.cast(int_to_complex, %[[#A]] : !s32i), !cir.complex<!s32i>
+
+//      CIR-AFTER: %[[#A:]] = cir.load volatile %{{.+}} : !cir.ptr<!cir.double>, !cir.double
+// CIR-AFTER-NEXT: %[[#REAL:]] = cir.cast(float_to_int, %[[#A]] : !cir.double), !s32i
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.const #cir.int<0> : !s32i
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#REAL:]] = load volatile double, ptr @sd, align 8
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double 0.000000e+00, 1
+
+//      LLVM: %[[#REAL:]] = load volatile i32, ptr @si, align 4
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 0, 1
+
+//      LLVM: %[[#A:]] = load volatile i32, ptr @si, align 4
+// LLVM-NEXT: %[[#REAL:]] = sitofp i32 %[[#A]] to double
+// LLVM-NEXT: %[[#B:]] = insertvalue { double, double } undef, double %[[#REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#B]], double 0.000000e+00, 1
+
+//      LLVM: %[[#A:]] = load volatile double, ptr @sd, align 8
+// LLVM-NEXT: %[[#REAL:]] = fptosi double %[[#A]] to i32
+// LLVM-NEXT: %[[#B:]] = insertvalue { i32, i32 } undef, i32 %[[#REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#B]], i32 0, 1
+
+// CHECK: }
+
+void scalar_to_complex_explicit() {
+  cd = (double _Complex)sd;
+  ci = (int _Complex)si;
+  cd = (double _Complex)si;
+  ci = (int _Complex)sd;
+}
+
+// CHECK-LABEL: @scalar_to_complex_explicit()
+
+// CIR-BEFORE: %{{.+}} = cir.cast(float_to_complex, %{{.+}} : !cir.double), !cir.complex<!cir.double>
+
+//      CIR-AFTER: %[[#IMAG:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %{{.+}}, %[[#IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      LLVM: %[[#A:]] = insertvalue { double, double } undef, double %{{.+}}, 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double 0.000000e+00, 1
+
+// CIR-BEFORE: %{{.+}} = cir.cast(int_to_complex, %{{.+}} : !s32i), !cir.complex<!s32i>
+
+//      CIR-AFTER: %[[#IMAG:]] = cir.const #cir.int<0> : !s32i
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %{{.+}}, %[[#IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %{{.+}}, 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 0, 1
+
+//      CIR-BEFORE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.double
+// CIR-BEFORE-NEXT: %{{.+}} = cir.cast(float_to_complex, %[[#A]] : !cir.double), !cir.complex<!cir.double>
+
+//      CIR-AFTER: %[[#REAL:]] = cir.cast(int_to_float, %11 : !s32i), !cir.double
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      LLVM: %[[#REAL:]] = sitofp i32 %{{.+}} to double
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double 0.000000e+00, 1
+
+//      CIR-BEFORE: %[[#A:]] = cir.cast(float_to_int, %{{.+}} : !cir.double), !s32i
+// CIR-BEFORE-NEXT: %{{.+}} = cir.cast(int_to_complex, %[[#A]] : !s32i), !cir.complex<!s32i>
+
+//      CIR-AFTER: %[[#REAL:]] = cir.cast(float_to_int, %{{.+}} : !cir.double), !s32i
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.const #cir.int<0> : !s32i
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#REAL:]] = fptosi double %{{.+}} to i32
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 0, 1
+
+// CHECK: }
+
+void complex_to_scalar() {
+  sd = (double)cd;
+  si = (int)ci;
+  sd = (double)ci;
+  si = (int)cd;
+}
+
+// CHECK-LABEL: @complex_to_scalar()
+
+// CIR-BEFORE: %{{.+}} = cir.cast(float_complex_to_real, %{{.+}} : !cir.complex<!cir.double>), !cir.double
+
+// CIR-AFTER: %{{.+}} = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+
+// LLVM: %{{.+}} = extractvalue { double, double } %{{.+}}, 0
+
+// CIR-BEFORE: %{{.+}} = cir.cast(int_complex_to_real, %{{.+}} : !cir.complex<!s32i>), !s32i
+
+// CIR-AFTER: %{{.+}} = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+
+// LLVM: %{{.+}} = extractvalue { i32, i32 } %{{.+}}, 0
+
+//      CIR-BEFORE: %[[#A:]] = cir.cast(int_complex_to_real, %{{.+}} : !cir.complex<!s32i>), !s32i
+// CIR-BEFORE-NEXT: %{{.+}} = cir.cast(int_to_float, %[[#A]] : !s32i), !cir.double
+
+//      CIR-AFTER: %[[#A:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-NEXT: %{{.+}} = cir.cast(int_to_float, %[[#A]] : !s32i), !cir.double
+
+//      LLVM: %[[#A:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %{{.+}} = sitofp i32 %[[#A]] to double
+
+//      CIR-BEFORE: %[[#A:]] = cir.cast(float_complex_to_real, %{{.+}} : !cir.complex<!cir.double>), !cir.double
+// CIR-BEFORE-NEXT: %{{.+}} = cir.cast(float_to_int, %[[#A]] : !cir.double), !s32i
+
+//      CIR-AFTER: %[[#A:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-AFTER-NEXT: %{{.+}} = cir.cast(float_to_int, %[[#A]] : !cir.double), !s32i
+
+//      LLVM: %[[#A:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %{{.+}} = fptosi double %[[#A]] to i32
+
+// CHECK: }
+
+void complex_to_bool() {
+  b = (bool)cd;
+  b = (bool)ci;
+}
+
+// CHECK-LABEL: @complex_to_bool()
+
+// CIR-BEFORE: %{{.+}} = cir.cast(float_complex_to_bool, %{{.+}} : !cir.complex<!cir.double>), !cir.bool
+
+//      CIR-AFTER: %[[#REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-AFTER-NEXT: %[[#RB:]] = cir.cast(float_to_bool, %[[#REAL]] : !cir.double), !cir.bool
+// CIR-AFTER-NEXT: %[[#IB:]] = cir.cast(float_to_bool, %[[#IMAG]] : !cir.double), !cir.bool
+// CIR-AFTER-NEXT: %[[#A:]] = cir.const #true
+// CIR-AFTER-NEXT: %{{.+}} = cir.select if %[[#RB]] then %[[#A]] else %[[#IB]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+
+//      LLVM:   %[[#REAL:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT:   %[[#IMAG:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT:   %[[#RB:]] = fcmp une double %[[#REAL]], 0.000000e+00
+// LLVM-NEXT:   %[[#RB2:]] = zext i1 %[[#RB]] to i8
+// LLVM-NEXT:   %[[#IB:]] = fcmp une double %[[#IMAG]], 0.000000e+00
+// LLVM-NEXT:   %[[#IB2:]] = zext i1 %[[#IB]] to i8
+// LLVM-NEXT:   %{{.+}} = or i8 %[[#RB2]], %[[#IB2]]
+
+// CIR-BEFORE: %{{.+}} = cir.cast(int_complex_to_bool, %{{.+}} : !cir.complex<!s32i>), !cir.bool
+
+//      CIR-AFTER: %[[#REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-NEXT: %[[#RB:]] = cir.cast(int_to_bool, %[[#REAL]] : !s32i), !cir.bool
+// CIR-AFTER-NEXT: %[[#IB:]] = cir.cast(int_to_bool, %[[#IMAG]] : !s32i), !cir.bool
+// CIR-AFTER-NEXT: %[[#A:]] = cir.const #true
+// CIR-AFTER-NEXT: %{{.+}} = cir.select if %[[#RB]] then %[[#A]] else %[[#IB]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+
+//      LLVM:   %[[#REAL:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT:   %[[#IMAG:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT:   %[[#RB:]] = icmp ne i32 %[[#REAL]], 0
+// LLVM-NEXT:   %[[#RB2:]] = zext i1 %[[#RB]] to i8
+// LLVM-NEXT:   %[[#IB:]] = icmp ne i32 %[[#IMAG]], 0
+// LLVM-NEXT:   %[[#IB2:]] = zext i1 %[[#IB]] to i8
+// LLVM-NEXT:   %{{.+}} = or i8 %[[#RB2]], %[[#IB2]]
+
+// CHECK: }
+
+void promotion() {
+  cd = cf + cf;
+}
diff --git a/clang/test/CIR/CodeGen/complex.c b/clang/test/CIR/CodeGen/complex.c
new file mode 100644
index 000000000000..1bdf62fe9666
--- /dev/null
+++ b/clang/test/CIR/CodeGen/complex.c
@@ -0,0 +1,335 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CHECK-BEFORE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CHECK-BEFORE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CHECK-AFTER %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CHECK-AFTER %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=LLVM %s
+
+double _Complex c, c2;
+int _Complex ci, ci2;
+
+volatile double _Complex vc, vc2;
+volatile int _Complex vci, vci2;
+
+void list_init() {
+  double _Complex c1 = {1.0, 2.0};
+  int _Complex c2 = {1, 2};
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#REAL:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+// CHECK-BEFORE-NEXT:   %[[#IMAG:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !cir.double -> !cir.complex<!cir.double>
+//      CHECK-BEFORE:   %[[#REAL:]] = cir.const #cir.int<1> : !s32i
+// CHECK-BEFORE-NEXT:   %[[#IMAG:]] = cir.const #cir.int<2> : !s32i
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !s32i -> !cir.complex<!s32i>
+//      CHECK-BEFORE: }
+
+// CHECK-AFTER: cir.func
+// CHECK-AFTER:   %{{.+}} = cir.const #cir.complex<#cir.fp<1.000000e+00> : !cir.double, #cir.fp<2.000000e+00> : !cir.double> : !cir.complex<!cir.double>
+// CHECK-AFTER:   %{{.+}} = cir.const #cir.complex<#cir.int<1> : !s32i, #cir.int<2> : !s32i> : !cir.complex<!s32i>
+// CHECK-AFTER: }
+
+// LLVM: define dso_local void @list_init()
+// LLVM:   store { double, double } { double 1.000000e+00, double 2.000000e+00 }, ptr %{{.+}}, align 8
+// LLVM: }
+
+void list_init_2(double r, double i) {
+  double _Complex c1 = {r, i};
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#R:]] = cir.load %{{.+}} : !cir.ptr<!cir.double>, !cir.double
+// CHECK-BEFORE-NEXT:   %[[#I:]] = cir.load %{{.+}} : !cir.ptr<!cir.double>, !cir.double
+// CHECK-BEFORE-NEXT:   %[[#C:]] = cir.complex.create %[[#R]], %[[#I]] : !cir.double -> !cir.complex<!cir.double>
+// CHECK-BEFORE-NEXT:   cir.store %[[#C]], %{{.+}} : !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#R:]] = cir.load %{{.+}} : !cir.ptr<!cir.double>, !cir.double
+// CHECK-AFTER-NEXT:   %[[#I:]] = cir.load %{{.+}} : !cir.ptr<!cir.double>, !cir.double
+// CHECK-AFTER-NEXT:   %[[#C:]] = cir.complex.create %[[#R]], %[[#I]] : !cir.double -> !cir.complex<!cir.double>
+// CHECK-AFTER-NEXT:   cir.store %[[#C]], %{{.+}} : !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+//      CHECK-AFTER: }
+
+//      LLVM: define dso_local void @list_init_2(double %{{.+}}, double %{{.+}})
+//      LLVM:   %[[#A:]] = insertvalue { double, double } undef, double %{{.+}}, 0
+// LLVM-NEXT:   %[[#B:]] = insertvalue { double, double } %[[#A]], double %{{.+}}, 1
+// LLVM-NEXT:   store { double, double } %[[#B]], ptr %5, align 8
+//      LLVM: }
+
+void builtin_init(double r, double i) {
+  double _Complex c = __builtin_complex(r, i);
+}
+
+// CHECK-BEFORE: cir.func
+// CHECK-BEFORE:   %{{.+}} = cir.complex.create %{{.+}}, %{{.+}} : !cir.double -> !cir.complex<!cir.double>
+// CHECK-BEFORE: }
+
+// CHECK-AFTER: cir.func
+// CHECK-AFTER:   %{{.+}} = cir.complex.create %{{.+}}, %{{.+}} : !cir.double -> !cir.complex<!cir.double>
+// CHECK-AFTER: }
+
+//      LLVM: define dso_local void @builtin_init
+//      LLVM:   %[[#A:]] = insertvalue { double, double } undef, double %{{.+}}, 0
+// LLVM-NEXT:   %[[#B:]] = insertvalue { double, double } %[[#A]], double %{{.+}}, 1
+// LLVM-NEXT:   store { double, double } %[[#B]], ptr %{{.+}}, align 8
+//      LLVM: }
+
+void imag_literal() {
+  c = 3.0i;
+  ci = 3i;
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE: %[[#REAL:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CHECK-BEFORE-NEXT: %[[#IMAG:]] = cir.const #cir.fp<3.000000e+00> : !cir.double
+// CHECK-BEFORE-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !cir.double -> !cir.complex<!cir.double>
+//      CHECK-BEFORE: %[[#REAL:]] = cir.const #cir.int<0> : !s32i
+// CHECK-BEFORE-NEXT: %[[#IMAG:]] = cir.const #cir.int<3> : !s32i
+// CHECK-BEFORE-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !s32i -> !cir.complex<!s32i>
+//      CHECK-BEFORE: }
+
+// CHECK-AFTER: cir.func
+// CHECK-AFTER:   %{{.+}} = cir.const #cir.complex<#cir.fp<0.000000e+00> : !cir.double, #cir.fp<3.000000e+00> : !cir.double> : !cir.complex<!cir.double>
+// CHECK-AFTER:   %{{.+}} = cir.const #cir.complex<#cir.int<0> : !s32i, #cir.int<3> : !s32i> : !cir.complex<!s32i>
+// CHECK-AFTER: }
+
+// LLVM: define dso_local void @imag_literal()
+// LLVM:   store { double, double } { double 0.000000e+00, double 3.000000e+00 }, ptr @c, align 8
+// LLVM:   store { i32, i32 } { i32 0, i32 3 }, ptr @ci, align 4
+// LLVM: }
+
+void load_store() {
+  c = c2;
+  ci = ci2;
+}
+
+//      CHECK-BEFORE: cir.func
+// CHECK-BEFORE-NEXT:   %[[#C2_PTR:]] = cir.get_global @c2 : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#C2:]] = cir.load %[[#C2_PTR]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-BEFORE-NEXT:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   cir.store %[[#C2]], %[[#C_PTR]] : !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#CI2_PTR:]] = cir.get_global @ci2 : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   %[[#CI2:]] = cir.load %[[#CI2_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-BEFORE-NEXT:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   cir.store %[[#CI2]], %[[#CI_PTR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+// CHECK-AFTER-NEXT:   %[[#C2_PTR:]] = cir.get_global @c2 : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#C2:]] = cir.load %[[#C2_PTR]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-AFTER-NEXT:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   cir.store %[[#C2]], %[[#C_PTR]] : !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#CI2_PTR:]] = cir.get_global @ci2 : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   %[[#CI2:]] = cir.load %[[#CI2_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-AFTER-NEXT:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   cir.store %[[#CI2]], %[[#CI_PTR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+//      CHECK-AFTER: }
+
+//      LLVM: define dso_local void @load_store()
+//      LLVM:   %[[#A:]] = load { double, double }, ptr @c2, align 8
+// LLVM-NEXT:   store { double, double } %[[#A]], ptr @c, align 8
+// LLVM-NEXT:   %[[#B:]] = load { i32, i32 }, ptr @ci2, align 4
+// LLVM-NEXT:   store { i32, i32 } %[[#B]], ptr @ci, align 4
+//      LLVM: }
+
+void load_store_volatile() {
+  vc = vc2;
+  vci = vci2;
+}
+
+//      CHECK-BEFORE: cir.func
+// CHECK-BEFORE-NEXT:   %[[#VC2_PTR:]] = cir.get_global @vc2 : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#VC2:]] = cir.load volatile %[[#VC2_PTR]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-BEFORE-NEXT:   %[[#VC_PTR:]] = cir.get_global @vc : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   cir.store volatile %[[#VC2]], %[[#VC_PTR]] : !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#VCI2_PTR:]] = cir.get_global @vci2 : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   %[[#VCI2:]] = cir.load volatile %[[#VCI2_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-BEFORE-NEXT:   %[[#VCI_PTR:]] = cir.get_global @vci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   cir.store volatile %[[#VCI2]], %[[#VCI_PTR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+// CHECK-AFTER-NEXT:   %[[#VC2_PTR:]] = cir.get_global @vc2 : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#VC2:]] = cir.load volatile %[[#VC2_PTR]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-AFTER-NEXT:   %[[#VC_PTR:]] = cir.get_global @vc : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   cir.store volatile %[[#VC2]], %[[#VC_PTR]] : !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#VCI2_PTR:]] = cir.get_global @vci2 : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   %[[#VCI2:]] = cir.load volatile %[[#VCI2_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-AFTER-NEXT:   %[[#VCI_PTR:]] = cir.get_global @vci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   cir.store volatile %[[#VCI2]], %[[#VCI_PTR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+//      CHECK-AFTER: }
+
+//      LLVM: define dso_local void @load_store_volatile()
+//      LLVM:   %[[#A:]] = load volatile { double, double }, ptr @vc2, align 8
+// LLVM-NEXT:   store volatile { double, double } %[[#A]], ptr @vc, align 8
+// LLVM-NEXT:   %[[#B:]] = load volatile { i32, i32 }, ptr @vci2, align 4
+// LLVM-NEXT:   store volatile { i32, i32 } %[[#B]], ptr @vci, align 4
+//      LLVM: }
+
+void real() {
+  double r = __builtin_creal(c);
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#A:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#B:]] = cir.load %[[#A]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.real %[[#B]] : !cir.complex<!cir.double> -> !cir.double
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#A:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#B:]] = cir.load %[[#A]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.complex.real %[[#B]] : !cir.complex<!cir.double> -> !cir.double
+//      CHECK-AFTER: }
+
+//      LLVM: define dso_local void @real()
+//      LLVM:   %[[#A:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT:   store double %[[#A]], ptr %{{.+}}, align 8
+//      LLVM: }
+
+void imag() {
+  double i = __builtin_cimag(c);
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#A:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#B:]] = cir.load %[[#A]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.imag %[[#B]] : !cir.complex<!cir.double> -> !cir.double
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#A:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#B:]] = cir.load %[[#A]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.complex.imag %[[#B]] : !cir.complex<!cir.double> -> !cir.double
+//      CHECK-AFTER: }
+
+//      LLVM: define dso_local void @imag()
+//      LLVM:   %[[#A:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT:   store double %[[#A]], ptr %{{.+}}, align 8
+//      LLVM: }
+
+void real_ptr() {
+  double *r1 = &__real__ c;
+  int *r2 = &__real__ ci;
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.real_ptr %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+//      CHECK-BEFORE:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.real_ptr %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>> -> !cir.ptr<!s32i>
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.complex.real_ptr %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+//      CHECK-AFTER:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.complex.real_ptr %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>> -> !cir.ptr<!s32i>
+//      CHECK-AFTER: }
+
+//      LLVM: define dso_local void @real_ptr()
+//      LLVM:   store ptr @c, ptr %{{.+}}, align 8
+// LLVM-NEXT:   store ptr @ci, ptr %{{.+}}, align 8
+//      LLVM: }
+
+void real_ptr_local() {
+  double _Complex c1 = {1.0, 2.0};
+  double *r3 = &__real__ c1;
+}
+
+// CHECK-BEFORE: cir.func
+// CHECK-BEFORE:   %[[#C:]] = cir.alloca !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE:   %{{.+}} = cir.complex.real_ptr %[[#C]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+// CHECK-BEFORE: }
+
+// CHECK-AFTER: cir.func
+// CHECK-AFTER:   %[[#C:]] = cir.alloca !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER:   %{{.+}} = cir.complex.real_ptr %[[#C]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+// CHECK-AFTER: }
+
+//      LLVM: define dso_local void @real_ptr_local()
+//      LLVM:   store { double, double } { double 1.000000e+00, double 2.000000e+00 }, ptr %{{.+}}, align 8
+// LLVM-NEXT:   %{{.+}} = getelementptr inbounds { double, double }, ptr %{{.+}}, i32 0, i32 0
+//      LLVM: }
+
+void extract_real() {
+  double r1 = __real__ c;
+  int r2 = __real__ ci;
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#REAL_PTR:]] = cir.complex.real_ptr %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.load %[[#REAL_PTR]] : !cir.ptr<!cir.double>, !cir.double
+//      CHECK-BEFORE:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   %[[#REAL_PTR:]] = cir.complex.real_ptr %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>> -> !cir.ptr<!s32i>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.load %[[#REAL_PTR]] : !cir.ptr<!s32i>, !s32i
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#REAL_PTR:]] = cir.complex.real_ptr %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.load %[[#REAL_PTR]] : !cir.ptr<!cir.double>, !cir.double
+//      CHECK-AFTER:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   %[[#REAL_PTR:]] = cir.complex.real_ptr %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>> -> !cir.ptr<!s32i>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.load %[[#REAL_PTR]] : !cir.ptr<!s32i>, !s32i
+//      CHECK-AFTER: }
+
+// LLVM: define dso_local void @extract_real()
+// LLVM:   %{{.+}} = load double, ptr @c, align 8
+// LLVM:   %{{.+}} = load i32, ptr @ci, align 4
+// LLVM: }
+
+void imag_ptr() {
+  double *i1 = &__imag__ c;
+  int *i2 = &__imag__ ci;
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.imag_ptr %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+//      CHECK-BEFORE:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.imag_ptr %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>> -> !cir.ptr<!s32i>
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.complex.imag_ptr %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+//      CHECK-AFTER:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.complex.imag_ptr %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>> -> !cir.ptr<!s32i>
+//      CHECK-AFTER: }
+
+// LLVM: define dso_local void @imag_ptr()
+// LLVM:   store ptr getelementptr inbounds ({ double, double }, ptr @c, i32 0, i32 1), ptr %{{.+}}, align 8
+// LLVM:   store ptr getelementptr inbounds ({ i32, i32 }, ptr @ci, i32 0, i32 1), ptr %{{.+}}, align 8
+// LLVM: }
+
+void extract_imag() {
+  double i1 = __imag__ c;
+  int i2 = __imag__ ci;
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#IMAG_PTR:]] = cir.complex.imag_ptr %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.load %[[#IMAG_PTR]] : !cir.ptr<!cir.double>, !cir.double
+//      CHECK-BEFORE:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   %[[#IMAG_PTR:]] = cir.complex.imag_ptr %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>> -> !cir.ptr<!s32i>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.load %[[#IMAG_PTR]] : !cir.ptr<!s32i>, !s32i
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#IMAG_PTR:]] = cir.complex.imag_ptr %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.load %[[#IMAG_PTR]] : !cir.ptr<!cir.double>, !cir.double
+//      CHECK-AFTER:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   %[[#IMAG_PTR:]] = cir.complex.imag_ptr %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>> -> !cir.ptr<!s32i>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.load %[[#IMAG_PTR]] : !cir.ptr<!s32i>, !s32i
+//      CHECK-AFTER: }
+
+// LLVM: define dso_local void @extract_imag()
+// LLVM:   %{{.+}} = load double, ptr getelementptr inbounds ({ double, double }, ptr @c, i32 0, i32 1), align 8
+// LLVM:   %{{.+}} = load i32, ptr getelementptr inbounds ({ i32, i32 }, ptr @ci, i32 0, i32 1), align 4
+// LLVM: }
diff --git a/clang/test/CIR/CodeGen/compound-literal-empty.c b/clang/test/CIR/CodeGen/compound-literal-empty.c
new file mode 100644
index 000000000000..b0007d96b4cb
--- /dev/null
+++ b/clang/test/CIR/CodeGen/compound-literal-empty.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+short b() { return (short){}; }
+
+// CIR-LABEL: b
+// CIR: {{%.*}} = cir.alloca !s16i, !cir.ptr<!s16i>, [".compoundliteral"] {alignment = 2 : i64}
+
+// LLVM-LABEL: b
+// LLVM: [[RET_P:%.*]] = alloca i16, i64 1, align 2
+// LLVM: [[LITERAL:%.*]] =  alloca i16, i64 1, align 2
+// LLVM: store i16 0, ptr [[LITERAL]], align 2
+// LLVM: [[T0:%.*]] = load i16, ptr [[LITERAL]], align 2
+// LLVM: store i16 [[T0]], ptr [[RET_P]], align 2
+// LLVM: [[T1:%.*]] = load i16, ptr [[RET_P]], align 2
+// LLVM: ret i16 [[T1]]
diff --git a/clang/test/CIR/CodeGen/compound-literal.c b/clang/test/CIR/CodeGen/compound-literal.c
new file mode 100644
index 000000000000..bbd7fa4a4e75
--- /dev/null
+++ b/clang/test/CIR/CodeGen/compound-literal.c
@@ -0,0 +1,106 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+
+typedef struct {
+  int *arr;
+} S;
+
+S a = {
+  .arr = (int[]){}
+};
+
+// CIR: cir.global "private" internal @".compoundLiteral.0" = #cir.zero : !cir.array<!s32i x 0> {alignment = 4 : i64}
+// CIR: cir.global external @a = #cir.const_struct<{#cir.global_view<@".compoundLiteral.0"> : !cir.ptr<!s32i>}> : !ty_S
+
+// LLVM: @.compoundLiteral.0 = internal global [0 x i32] zeroinitializer
+// LLVM: @a = global %struct.S { ptr @.compoundLiteral.0 }
+
+S b = {
+  .arr = (int[]){1}
+};
+
+// CIR: cir.global "private" internal @".compoundLiteral.1" = #cir.const_array<[#cir.int<1> : !s32i]> : !cir.array<!s32i x 1> {alignment = 4 : i64}
+// CIR: cir.global external @b = #cir.const_struct<{#cir.global_view<@".compoundLiteral.1"> : !cir.ptr<!s32i>}> : !ty_S
+
+// LLVM: @.compoundLiteral.1 = internal global [1 x i32] [i32 1]
+// LLVM: @b = global %struct.S { ptr @.compoundLiteral.1 }
+
+int foo() {
+  return (struct {
+           int i;
+         }){1}
+      .i;
+}
+
+// CIR:  cir.func no_proto @foo() -> !s32i
+// CIR:    [[RET_MEM:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CIR:    [[COMPLITERAL_MEM:%.*]] = cir.alloca !ty_anon2E0_, !cir.ptr<!ty_anon2E0_>, [".compoundliteral"] {alignment = 4 : i64}
+// CIR:    [[FIELD:%.*]] = cir.get_member [[COMPLITERAL_MEM]][0] {name = "i"} : !cir.ptr<!ty_anon2E0_> -> !cir.ptr<!s32i>
+// CIR:    [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR:    cir.store [[ONE]], [[FIELD]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR:    cir.store [[ONE]], [[RET_MEM]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[RET:%.*]] = cir.load [[RET_MEM]] : !cir.ptr<!s32i>, !s32i
+// CIR:    cir.return [[RET]] : !s32i
+
+struct G { short x, y, z; };
+struct G g(int x, int y, int z) {
+  return (struct G) { x, y, z };
+}
+
+// CIR:  cir.func @g
+// CIR:    %[[RETVAL:.*]] = cir.alloca !ty_G, !cir.ptr<!ty_G>, ["__retval"] {alignment = 2 : i64}
+// CIR:    %[[X:.*]] = cir.get_member %[[RETVAL]][0] {name = "x"}
+// CIR:    cir.store {{.*}}, %[[X]] : !s16i
+// CIR:    %[[Y:.*]] = cir.get_member %[[RETVAL]][1] {name = "y"}
+// CIR:    cir.store {{.*}}, %[[Y]] : !s16i
+// CIR:    %[[Z:.*]] = cir.get_member %[[RETVAL]][2] {name = "z"}
+// CIR:    cir.store {{.*}}, %[[Z]] : !s16i
+// CIR:    %[[RES:.*]] = cir.load %[[RETVAL]]
+// CIR:    cir.return %[[RES]]
+
+// Nothing meaningful to test for LLVM codegen here.
+// FIXME: ABI note, LLVM lowering differs from traditional LLVM codegen here,
+// because the former does a memcopy + i48 load.
+
+typedef struct { unsigned long pgprot; } pgprot_t;
+void split_large_page(unsigned long addr, pgprot_t prot)
+{
+  (addr ? prot : ((pgprot_t) { 0x001 } )).pgprot;
+}
+
+// CIR-LABEL: @split_large_page
+// CIR:   %[[VAL_2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["addr", init] {alignment = 8 : i64}
+// CIR:   %[[VAL_3:.*]] = cir.alloca !ty_pgprot_t, !cir.ptr<!ty_pgprot_t>, ["prot", init] {alignment = 8 : i64}
+// CIR:   %[[VAL_4:.*]] = cir.alloca !ty_pgprot_t, !cir.ptr<!ty_pgprot_t>, ["tmp"] {alignment = 8 : i64}
+// CIR:   cir.store {{.*}}, %[[VAL_2]] : !u64i, !cir.ptr<!u64i>
+// CIR:   cir.store {{.*}}, %[[VAL_3]] : !ty_pgprot_t, !cir.ptr<!ty_pgprot_t>
+// CIR:   %[[VAL_5:.*]] = cir.load %[[VAL_2]] : !cir.ptr<!u64i>, !u64i
+// CIR:   %[[VAL_6:.*]] = cir.cast(int_to_bool, %[[VAL_5]] : !u64i), !cir.bool
+// CIR:   cir.if %[[VAL_6]] {
+// CIR:     cir.copy %[[VAL_3]] to %[[VAL_4]] : !cir.ptr<!ty_pgprot_t>
+// CIR:   } else {
+// CIR:     %[[VAL_7:.*]] = cir.get_member %[[VAL_4]][0] {name = "pgprot"} : !cir.ptr<!ty_pgprot_t> -> !cir.ptr<!u64i>
+// CIR:     %[[VAL_8:.*]] = cir.const #cir.int<1> : !s32i
+// CIR:     %[[VAL_9:.*]] = cir.cast(integral, %[[VAL_8]] : !s32i), !u64i
+// CIR:     cir.store %[[VAL_9]], %[[VAL_7]] : !u64i, !cir.ptr<!u64i>
+// CIR:   }
+// CIR:   %[[VAL_10:.*]] = cir.get_member %[[VAL_4]][0] {name = "pgprot"} : !cir.ptr<!ty_pgprot_t> -> !cir.ptr<!u64i>
+// CIR:   %[[VAL_11:.*]] = cir.load %[[VAL_10]] : !cir.ptr<!u64i>, !u64i
+// CIR:   cir.return
+// CIR: }
+
+// CHECK-LABEL: @split_large_page
+// CHECK:    br i1 {{.*}}, label %[[TRUE:[a-z0-9]+]], label %[[FALSE:[a-z0-9]+]]
+// CHECK:  [[FALSE]]:
+// CHECK:    %[[GEP:.*]] = getelementptr {{.*}}, ptr %[[ADDR:.*]], i32 0, i32 0
+// CHECK:    store i64 1, ptr %[[GEP]], align 8
+// CHECK:    br label %[[EXIT:[a-z0-9]+]]
+// CHECK:  [[TRUE]]:
+// CHECK:    call void @llvm.memcpy.p0.p0.i32(ptr %[[ADDR]], ptr {{.*}}, i32 8, i1 false)
+// CHECK:    br label %[[EXIT]]
+// CHECK:  [[EXIT]]:
+// CHECK:    ret void
diff --git a/clang/test/CIR/CodeGen/cond.cpp b/clang/test/CIR/CodeGen/cond.cpp
new file mode 100644
index 000000000000..e00ee528a72d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/cond.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct __less {
+  inline constexpr bool operator()(const unsigned long& __x, const unsigned long& __y) const {return __x < __y;}
+};
+
+const unsigned long&
+min(const unsigned long& __a, const unsigned long& __b) {
+  return __less()(__b, __a) ? __b : __a;
+}
+
+// CHECK: cir.func @_Z3minRKmS0_(%arg0: !cir.ptr<!u64i>
+// CHECK:   %0 = cir.alloca !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!u64i>>, ["__a", init] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!u64i>>, ["__b", init] {alignment = 8 : i64}
+// CHECK:   %2 = cir.alloca !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!u64i>>, ["__retval"] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %0 : !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!u64i>>
+// CHECK:   cir.store %arg1, %1 : !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!u64i>>
+// CHECK:   cir.scope {
+// CHECK:     %4 = cir.alloca !ty___less, !cir.ptr<!ty___less>, ["ref.tmp0"] {alignment = 1 : i64}
+// CHECK:     cir.call @_ZN6__lessC1Ev(%4) : (!cir.ptr<!ty___less>) -> ()
+// CHECK:     %5 = cir.load %1 : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK:     %6 = cir.load %0 : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK:     %7 = cir.call @_ZNK6__lessclERKmS1_(%4, %5, %6) : (!cir.ptr<!ty___less>, !cir.ptr<!u64i>, !cir.ptr<!u64i>) -> !cir.bool
+// CHECK:     %8 = cir.ternary(%7, true {
+// CHECK:       %9 = cir.load %1 : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK:       cir.yield %9 : !cir.ptr<!u64i>
+// CHECK:     }, false {
+// CHECK:       %9 = cir.load %0 : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK:       cir.yield %9 : !cir.ptr<!u64i>
+// CHECK:     }) : (!cir.bool) -> !cir.ptr<!u64i>
+// CHECK:     cir.store %8, %2 : !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!u64i>>
diff --git a/clang/test/CIR/CodeGen/conditional-cleanup.cpp b/clang/test/CIR/CodeGen/conditional-cleanup.cpp
new file mode 100644
index 000000000000..cc82fc09b22b
--- /dev/null
+++ b/clang/test/CIR/CodeGen/conditional-cleanup.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+typedef __typeof(sizeof(0)) size_t;
+
+// Declare the reserved global placement new.
+void *operator new(size_t, void*);
+
+namespace test7 {
+  struct A { A(); ~A(); };
+  struct B {
+    static void *operator new(size_t size) throw();
+    B(const A&, B*);
+    ~B();
+  };
+
+  B *test() {
+    return new B(A(), new B(A(), 0));
+  }
+}
+
+// CIR-DAG: ![[A:.*]] = !cir.struct<struct "test7::A" {!cir.int<u, 8>}
+// CIR-DAG: ![[B:.*]] = !cir.struct<struct "test7::B" {!cir.int<u, 8>}
+
+// CIR-LABEL: _ZN5test74testEv
+// CIR:   %[[RET_VAL:.*]] = cir.alloca !cir.ptr<![[B]]>, !cir.ptr<!cir.ptr<![[B]]>>, ["__retval"] {alignment = 8 : i64}
+// CIR:   cir.scope {
+// CIR:     %[[TMP_A0:.*]] = cir.alloca ![[A]], !cir.ptr<![[A]]>, ["ref.tmp0"] {alignment = 1 : i64}
+// CIR:     %[[CLEANUP_COND_OUTER:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["cleanup.cond"] {alignment = 1 : i64}
+// CIR:     %[[TMP_A1:.*]] = cir.alloca ![[A]], !cir.ptr<![[A]]>, ["ref.tmp1"] {alignment = 1 : i64}
+// CIR:     %[[CLEANUP_COND_INNER:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["cleanup.cond"] {alignment = 1 : i64}
+// CIR:     %[[FALSE0:.*]] = cir.const #false
+// CIR:     %[[TRUE0:.*]] = cir.const #true
+// CIR:     %[[FALSE1:.*]] = cir.const #false
+// CIR:     %[[TRUE1:.*]] = cir.const #true
+
+// CIR:     %[[NULL_CHECK0:.*]] = cir.cmp(ne
+// CIR:     %[[PTR_B0:.*]] = cir.cast(bitcast
+// CIR:     cir.store align(1) %[[FALSE1]], %[[CLEANUP_COND_OUTER]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR:     cir.store align(1) %[[FALSE0]], %[[CLEANUP_COND_INNER]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR:     cir.if %[[NULL_CHECK0]] {
+
+// Ctor call: @test7::A::A()
+// CIR:       cir.call @_ZN5test71AC1Ev(%[[TMP_A0]]) : (!cir.ptr<![[A]]>) -> ()
+// CIR:       cir.store %[[TRUE1]], %[[CLEANUP_COND_OUTER]] : !cir.bool, !cir.ptr<!cir.bool>
+
+// CIR:       %[[NULL_CHECK1:.*]] = cir.cmp(ne
+// CIR:       %[[PTR_B1:.*]] = cir.cast(bitcast
+// CIR:       cir.if %[[NULL_CHECK1]] {
+
+// Ctor call: @test7::A::A()
+// CIR:         cir.call @_ZN5test71AC1Ev(%[[TMP_A1]]) : (!cir.ptr<![[A]]>) -> ()
+// CIR:         cir.store %[[TRUE0]], %[[CLEANUP_COND_INNER]] : !cir.bool, !cir.ptr<!cir.bool>
+// Ctor call: @test7::B::B()
+// CIR:         cir.call @_ZN5test71BC1ERKNS_1AEPS0_(%[[PTR_B1]], %[[TMP_A1]], {{.*}}) : (!cir.ptr<![[B]]>, !cir.ptr<![[A]]>, !cir.ptr<![[B]]>) -> ()
+// CIR:       }
+
+// Ctor call: @test7::B::B()
+// CIR:       cir.call @_ZN5test71BC1ERKNS_1AEPS0_(%[[PTR_B0]], %[[TMP_A0]], %[[PTR_B1]]) : (!cir.ptr<![[B]]>, !cir.ptr<![[A]]>, !cir.ptr<![[B]]>) -> ()
+// CIR:     }
+// CIR:     cir.store %[[PTR_B0]], %[[RET_VAL]] : !cir.ptr<![[B]]>, !cir.ptr<!cir.ptr<![[B]]>>
+// CIR:     %[[DO_CLEANUP_INNER:.*]] = cir.load %[[CLEANUP_COND_INNER]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR:     cir.if %[[DO_CLEANUP_INNER]] {
+// Dtor call: @test7::A::~A()
+// CIR:       cir.call @_ZN5test71AD1Ev(%[[TMP_A1]]) : (!cir.ptr<![[A]]>) -> ()
+// CIR:     }
+// CIR:     %[[DO_CLEANUP_OUTER:.*]] = cir.load %[[CLEANUP_COND_OUTER]] : !cir.ptr<!cir.bool>, !cir.bool
+// Dtor call: @test7::A::~A()
+// CIR:     cir.if %[[DO_CLEANUP_OUTER]] {
+// CIR:       cir.call @_ZN5test71AD1Ev(%[[TMP_A0]]) : (!cir.ptr<![[A]]>) -> ()
+// CIR:     }
+// CIR:   }
+// CIR:   cir.return
+// CIR: }
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/const-array.c b/clang/test/CIR/CodeGen/const-array.c
new file mode 100644
index 000000000000..0020d47d9fc3
--- /dev/null
+++ b/clang/test/CIR/CodeGen/const-array.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+void bar() {
+  const int arr[1] = {1};
+}
+
+// CHECK: cir.global "private" constant internal dsolocal @bar.arr = #cir.const_array<[#cir.int<1> : !s32i]> : !cir.array<!s32i x 1> {alignment = 4 : i64}
+// CHECK: cir.func no_proto @bar()
+// CHECK:   {{.*}} = cir.get_global @bar.arr : !cir.ptr<!cir.array<!s32i x 1>>
+
+void foo() {
+  int a[10] = {1};
+}
+
+// CHECK: cir.func {{.*@foo}}
+// CHECK:   %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["a"] {alignment = 16 : i64}
+// CHECK:   %1 = cir.const #cir.const_array<[#cir.int<1> : !s32i], trailing_zeros> : !cir.array<!s32i x 10>
+// CHECK:   cir.store %1, %0 : !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>
diff --git a/clang/test/CIR/CodeGen/const-bitfields.c b/clang/test/CIR/CodeGen/const-bitfields.c
new file mode 100644
index 000000000000..b58db7f193a2
--- /dev/null
+++ b/clang/test/CIR/CodeGen/const-bitfields.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o - 2>&1 | FileCheck %s
+
+struct T {
+  int X : 5;
+  int Y : 6;
+  int Z : 9;
+  int W;  
+};
+
+struct Inner {
+  unsigned a :  1;
+  unsigned b :  1;
+  unsigned c :  1;
+  unsigned d : 30;
+};
+
+// CHECK: !ty_anon_struct = !cir.struct<struct  {!cir.int<u, 8>, !cir.int<u, 8>, !cir.int<u, 8>, !cir.int<s, 32>}>
+// CHECK: !ty_T = !cir.struct<struct "T" {!cir.array<!cir.int<u, 8> x 3>, !cir.int<s, 32>} #cir.record.decl.ast>
+// CHECK: !ty_anon_struct1 = !cir.struct<struct  {!cir.int<u, 8>, !cir.array<!cir.int<u, 8> x 3>, !cir.int<u, 8>, !cir.int<u, 8>, !cir.int<u, 8>, !cir.int<u, 8>}>
+// CHECK: #bfi_Z = #cir.bitfield_info<name = "Z", storage_type = !cir.array<!u8i x 3>, size = 9, offset = 11, is_signed = true>
+
+struct T GV = { 1, 5, 26, 42 };
+// CHECK: cir.global external @GV = #cir.const_struct<{#cir.int<161> : !u8i, #cir.int<208> : !u8i, #cir.int<0> : !u8i, #cir.int<42> : !s32i}> : !ty_anon_struct
+
+// check padding is used (const array of zeros)
+struct Inner var = { 1, 0, 1, 21};
+// CHECK: cir.global external @var = #cir.const_struct<{#cir.int<5> : !u8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 3>, #cir.int<21> : !u8i, #cir.int<0> : !u8i, #cir.int<0> : !u8i, #cir.int<0> : !u8i}> : !ty_anon_struct1
+
+
+// CHECK: cir.func {{.*@getZ()}}
+// CHECK:   %1 = cir.get_global @GV : !cir.ptr<!ty_anon_struct>
+// CHECK:   %2 = cir.cast(bitcast, %1 : !cir.ptr<!ty_anon_struct>), !cir.ptr<!ty_T>
+// CHECK:   %3 = cir.cast(bitcast, %2 : !cir.ptr<!ty_T>), !cir.ptr<!cir.array<!u8i x 3>>
+// CHECK:   %4 = cir.get_bitfield(#bfi_Z, %3 : !cir.ptr<!cir.array<!u8i x 3>>) -> !s32i
+int getZ() {
+  return GV.Z;
+}
+
+// check the type used is the type of T struct for plain field
+// CHECK:  cir.func {{.*@getW()}}
+// CHECK:    %1 = cir.get_global @GV : !cir.ptr<!ty_anon_struct>
+// CHECK:    %2 = cir.cast(bitcast, %1 : !cir.ptr<!ty_anon_struct>), !cir.ptr<!ty_T>
+// CHECK:    %3 = cir.get_member %2[1] {name = "W"} : !cir.ptr<!ty_T> -> !cir.ptr<!s32i>
+int getW() {
+  return GV.W;
+}
+
diff --git a/clang/test/CIR/CodeGen/constptr.c b/clang/test/CIR/CodeGen/constptr.c
new file mode 100644
index 000000000000..e19f7574566b
--- /dev/null
+++ b/clang/test/CIR/CodeGen/constptr.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+int *p = (int*)0x1234;
+
+
+// CIR:  cir.global external @p = #cir.ptr<4660 : i64> : !cir.ptr<!s32i>
+// LLVM: @p = global ptr inttoptr (i64 4660 to ptr)
diff --git a/clang/test/CIR/CodeGen/coro-task.cpp b/clang/test/CIR/CodeGen/coro-task.cpp
new file mode 100644
index 000000000000..0dd171f201e5
--- /dev/null
+++ b/clang/test/CIR/CodeGen/coro-task.cpp
@@ -0,0 +1,431 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -clangir-disable-emit-cxx-default -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+namespace std {
+
+template<typename T> struct remove_reference       { typedef T type; };
+template<typename T> struct remove_reference<T &>  { typedef T type; };
+template<typename T> struct remove_reference<T &&> { typedef T type; };
+
+template<typename T>
+typename remove_reference<T>::type &&move(T &&t) noexcept;
+
+template <class Ret, typename... T>
+struct coroutine_traits { using promise_type = typename Ret::promise_type; };
+
+template <class Promise = void>
+struct coroutine_handle {
+  static coroutine_handle from_address(void *) noexcept;
+};
+template <>
+struct coroutine_handle<void> {
+  template <class PromiseType>
+  coroutine_handle(coroutine_handle<PromiseType>) noexcept;
+  static coroutine_handle from_address(void *);
+};
+
+struct suspend_always {
+  bool await_ready() noexcept { return false; }
+  void await_suspend(coroutine_handle<>) noexcept {}
+  void await_resume() noexcept {}
+};
+
+struct suspend_never {
+  bool await_ready() noexcept { return true; }
+  void await_suspend(coroutine_handle<>) noexcept {}
+  void await_resume() noexcept {}
+};
+
+struct string {
+  int size() const;
+  string();
+  string(char const *s);
+};
+
+template<typename T>
+struct optional {
+  optional();
+  optional(const T&);
+  T &operator*() &;
+  T &&operator*() &&;
+  T &value() &;
+  T &&value() &&;
+};
+} // namespace std
+
+namespace folly {
+namespace coro {
+
+using std::suspend_always;
+using std::suspend_never;
+using std::coroutine_handle;
+
+using SemiFuture = int;
+
+template<class T>
+struct Task {
+    struct promise_type {
+        Task<T> get_return_object() noexcept;
+        suspend_always initial_suspend() noexcept;
+        suspend_always final_suspend() noexcept;
+        void return_value(T);
+        void unhandled_exception();
+        auto yield_value(Task<T>) noexcept { return final_suspend(); }
+    };
+    bool await_ready() noexcept { return false; }
+    void await_suspend(coroutine_handle<>) noexcept {}
+    T await_resume();
+};
+
+template<>
+struct Task<void> {
+    struct promise_type {
+        Task<void> get_return_object() noexcept;
+        suspend_always initial_suspend() noexcept;
+        suspend_always final_suspend() noexcept;
+        void return_void() noexcept;
+        void unhandled_exception() noexcept;
+        auto yield_value(Task<void>) noexcept { return final_suspend(); }
+    };
+    bool await_ready() noexcept { return false; }
+    void await_suspend(coroutine_handle<>) noexcept {}
+    void await_resume() noexcept {}
+    SemiFuture semi();
+};
+
+// FIXME: add CIRGen support here.
+// struct blocking_wait_fn {
+//   template <typename T>
+//   T operator()(Task<T>&& awaitable) const {
+//     return T();
+//   }
+// };
+
+// inline constexpr blocking_wait_fn blocking_wait{};
+// static constexpr blocking_wait_fn const& blockingWait = blocking_wait;
+
+template <typename T>
+T blockingWait(Task<T>&& awaitable) {
+  return T();
+}
+
+template <typename T>
+Task<T> collectAllRange(Task<T>* awaitable);
+
+template <typename... SemiAwaitables>
+Task<void> collectAll(SemiAwaitables&&... awaitables);
+
+struct co_invoke_fn {
+  template <typename F, typename... A>
+  Task<void> operator()(F&& f, A&&... a) const {
+    return Task<void>();
+  }
+};
+
+co_invoke_fn co_invoke;
+
+}} // namespace folly::coro
+
+// CHECK-DAG: ![[IntTask:.*]] = !cir.struct<struct "folly::coro::Task<int>" {!cir.int<u, 8>}>
+// CHECK-DAG: ![[VoidTask:.*]] = !cir.struct<struct "folly::coro::Task<void>" {!cir.int<u, 8>}>
+// CHECK-DAG: ![[VoidPromisse:.*]] = !cir.struct<struct "folly::coro::Task<void>::promise_type" {!cir.int<u, 8>}>
+// CHECK-DAG: ![[CoroHandleVoid:.*]] = !cir.struct<struct "std::coroutine_handle<void>" {!cir.int<u, 8>}>
+// CHECK-DAG: ![[CoroHandlePromise:ty_.*]]  = !cir.struct<struct "std::coroutine_handle<folly::coro::Task<void>::promise_type>" {!cir.int<u, 8>}>
+// CHECK-DAG: ![[StdString:.*]] = !cir.struct<struct "std::string" {!cir.int<u, 8>}>
+// CHECK-DAG: ![[SuspendAlways:.*]] = !cir.struct<struct "std::suspend_always" {!cir.int<u, 8>}>
+
+// CHECK: module {{.*}} {
+// CHECK-NEXT: cir.global external @_ZN5folly4coro9co_invokeE = #cir.zero : !ty_folly3A3Acoro3A3Aco_invoke_fn
+
+// CHECK: cir.func builtin private @__builtin_coro_id(!u32i, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>) -> !u32i
+// CHECK: cir.func builtin private @__builtin_coro_alloc(!u32i) -> !cir.bool
+// CHECK: cir.func builtin private @__builtin_coro_size() -> !u64i
+// CHECK: cir.func builtin private @__builtin_coro_begin(!u32i, !cir.ptr<!void>) -> !cir.ptr<!void>
+
+using VoidTask = folly::coro::Task<void>;
+
+VoidTask silly_task() {
+  co_await std::suspend_always();
+}
+
+// CHECK: cir.func coroutine @_Z10silly_taskv() -> ![[VoidTask]] extra{{.*}}{
+
+// Allocate promise.
+
+// CHECK: %[[#VoidTaskAddr:]] = cir.alloca ![[VoidTask]], {{.*}}, ["__retval"]
+// CHECK: %[[#SavedFrameAddr:]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["__coro_frame_addr"] {alignment = 8 : i64}
+// CHECK: %[[#VoidPromisseAddr:]] = cir.alloca ![[VoidPromisse]], {{.*}}, ["__promise"]
+
+// Get coroutine id with __builtin_coro_id.
+
+// CHECK: %[[#NullPtr:]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CHECK: %[[#Align:]] = cir.const #cir.int<16> : !u32i
+// CHECK: %[[#CoroId:]] = cir.call @__builtin_coro_id(%[[#Align]], %[[#NullPtr]], %[[#NullPtr]], %[[#NullPtr]])
+
+// Perform allocation calling operator 'new' depending on __builtin_coro_alloc and
+// call __builtin_coro_begin for the final coroutine frame address.
+
+// CHECK: %[[#ShouldAlloc:]] = cir.call @__builtin_coro_alloc(%[[#CoroId]]) : (!u32i) -> !cir.bool
+// CHECK: cir.store %[[#NullPtr]], %[[#SavedFrameAddr]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CHECK: cir.if %[[#ShouldAlloc]] {
+// CHECK:   %[[#CoroSize:]] = cir.call @__builtin_coro_size() : () -> !u64i
+// CHECK:   %[[#AllocAddr:]] = cir.call @_Znwm(%[[#CoroSize]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:   cir.store %[[#AllocAddr]], %[[#SavedFrameAddr]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CHECK: }
+// CHECK: %[[#Load0:]] = cir.load %[[#SavedFrameAddr]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CHECK: %[[#CoroFrameAddr:]] = cir.call @__builtin_coro_begin(%[[#CoroId]], %[[#Load0]])
+
+// Call promise.get_return_object() to retrieve the task object.
+
+// CHECK: %[[#RetObj:]] = cir.call @_ZN5folly4coro4TaskIvE12promise_type17get_return_objectEv(%[[#VoidPromisseAddr]]) : {{.*}} -> ![[VoidTask]]
+// CHECK: cir.store %[[#RetObj]], %[[#VoidTaskAddr]] : ![[VoidTask]]
+
+// Start a new scope for the actual codegen for co_await, create temporary allocas for
+// holding coroutine handle and the suspend_always struct.
+
+// CHECK: cir.scope {
+// CHECK:   %[[#SuspendAlwaysAddr:]] = cir.alloca ![[SuspendAlways]], {{.*}} ["ref.tmp0"] {alignment = 1 : i64}
+// CHECK:   %[[#CoroHandleVoidAddr:]] = cir.alloca ![[CoroHandleVoid]], {{.*}} ["agg.tmp0"] {alignment = 1 : i64}
+// CHECK:   %[[#CoroHandlePromiseAddr:]] = cir.alloca ![[CoroHandlePromise]], {{.*}} ["agg.tmp1"] {alignment = 1 : i64}
+
+// Effectively execute `coawait promise_type::initial_suspend()` by calling initial_suspend() and getting
+// the suspend_always struct to use for cir.await. Note that we return by-value since we defer ABI lowering
+// to later passes, same is done elsewhere.
+
+// CHECK:   %[[#Tmp0:]] = cir.call @_ZN5folly4coro4TaskIvE12promise_type15initial_suspendEv(%[[#VoidPromisseAddr]])
+// CHECK:   cir.store %[[#Tmp0]], %[[#SuspendAlwaysAddr]]
+
+//
+// Here we start mapping co_await to cir.await.
+//
+
+// First regions `ready` has a special cir.yield code to veto suspension.
+
+// CHECK:   cir.await(init, ready : {
+// CHECK:     %[[#ReadyVeto:]] = cir.scope {
+// CHECK:       %[[#TmpCallRes:]] = cir.call @_ZNSt14suspend_always11await_readyEv(%[[#SuspendAlwaysAddr]])
+// CHECK:       cir.yield %[[#TmpCallRes]] : !cir.bool
+// CHECK:     }
+// CHECK:     cir.condition(%[[#ReadyVeto]])
+
+// Second region `suspend` contains the actual suspend logic.
+//
+// - Start by getting the coroutine handle using from_address().
+// - Implicit convert coroutine handle from task specific promisse
+//   specialization to a void one.
+// - Call suspend_always::await_suspend() passing the handle.
+//
+// FIXME: add veto support for non-void await_suspends.
+
+// CHECK:   }, suspend : {
+// CHECK:     %[[#FromAddrRes:]] = cir.call @_ZNSt16coroutine_handleIN5folly4coro4TaskIvE12promise_typeEE12from_addressEPv(%[[#CoroFrameAddr]])
+// CHECK:     cir.store %[[#FromAddrRes]], %[[#CoroHandlePromiseAddr]] : ![[CoroHandlePromise]]
+// CHECK:     %[[#CoroHandlePromiseReload:]] = cir.load %[[#CoroHandlePromiseAddr]]
+// CHECK:     cir.call @_ZNSt16coroutine_handleIvEC1IN5folly4coro4TaskIvE12promise_typeEEES_IT_E(%[[#CoroHandleVoidAddr]], %[[#CoroHandlePromiseReload]])
+// CHECK:     %[[#CoroHandleVoidReload:]] = cir.load %[[#CoroHandleVoidAddr]] : !cir.ptr<![[CoroHandleVoid]]>, ![[CoroHandleVoid]]
+// CHECK:     cir.call @_ZNSt14suspend_always13await_suspendESt16coroutine_handleIvE(%[[#SuspendAlwaysAddr]], %[[#CoroHandleVoidReload]])
+// CHECK:     cir.yield
+
+// Third region `resume` handles coroutine resuming logic.
+
+// CHECK:   }, resume : {
+// CHECK:     cir.call @_ZNSt14suspend_always12await_resumeEv(%[[#SuspendAlwaysAddr]])
+// CHECK:     cir.yield
+// CHECK:   },)
+// CHECK: }
+
+// Since we already tested cir.await guts above, the remaining checks for:
+// - The actual user written co_await
+// - The promise call
+// - The final suspend co_await
+// - Return
+
+// The actual user written co_await
+// CHECK: cir.scope {
+// CHECK:   cir.await(user, ready : {
+// CHECK:   }, suspend : {
+// CHECK:   }, resume : {
+// CHECK:   },)
+// CHECK: }
+
+// The promise call
+// CHECK: cir.call @_ZN5folly4coro4TaskIvE12promise_type11return_voidEv(%[[#VoidPromisseAddr]])
+
+// The final suspend co_await
+// CHECK: cir.scope {
+// CHECK:   cir.await(final, ready : {
+// CHECK:   }, suspend : {
+// CHECK:   }, resume : {
+// CHECK:   },)
+// CHECK: }
+
+// Call builtin coro end and return
+
+// CHECK-NEXT: %[[#CoroEndArg0:]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CHECK-NEXT: %[[#CoroEndArg1:]] = cir.const #false
+// CHECK-NEXT: = cir.call @__builtin_coro_end(%[[#CoroEndArg0]], %[[#CoroEndArg1]])
+
+// CHECK: %[[#Tmp1:]] = cir.load %[[#VoidTaskAddr]]
+// CHECK-NEXT: cir.return %[[#Tmp1]]
+// CHECK-NEXT: }
+
+folly::coro::Task<int> byRef(const std::string& s) {
+  co_return s.size();
+}
+
+// FIXME: this could be less redundant than two allocas + reloads
+// CHECK: cir.func coroutine @_Z5byRefRKSt6string(%arg0: !cir.ptr<![[StdString]]> {{.*}} ![[IntTask]] extra{{.*}}{
+// CHECK: %[[#AllocaParam:]] = cir.alloca !cir.ptr<![[StdString]]>, {{.*}} ["s", init]
+// CHECK: %[[#AllocaFnUse:]] = cir.alloca !cir.ptr<![[StdString]]>, {{.*}} ["s", init]
+
+folly::coro::Task<void> silly_coro() {
+  std::optional<folly::coro::Task<int>> task;
+  {
+    std::string s = "yolo";
+    task = byRef(s);
+  }
+  folly::coro::blockingWait(std::move(task.value()));
+  co_return;
+}
+
+// Make sure we properly handle OnFallthrough coro body sub stmt and
+// check there are not multiple co_returns emitted.
+
+// CHECK: cir.func coroutine @_Z10silly_corov() {{.*}} ![[VoidTask]] extra{{.*}}{
+// CHECK: cir.await(init, ready : {
+// CHECK: cir.call @_ZN5folly4coro4TaskIvE12promise_type11return_voidEv
+// CHECK-NOT: cir.call @_ZN5folly4coro4TaskIvE12promise_type11return_voidEv
+// CHECK: cir.await(final, ready : {
+
+folly::coro::Task<int> go(int const& val);
+folly::coro::Task<int> go1() {
+  auto task = go(1);
+  co_return co_await task;
+}
+
+// CHECK: cir.func coroutine @_Z3go1v() {{.*}} ![[IntTask]] extra{{.*}}{
+// CHECK: %[[#IntTaskAddr:]] = cir.alloca ![[IntTask]], !cir.ptr<![[IntTask]]>, ["task", init]
+
+// CHECK:   cir.await(init, ready : {
+// CHECK:   }, suspend : {
+// CHECK:   }, resume : {
+// CHECK:   },)
+// CHECK: }
+
+// The call to go(1) has its own scope due to full-expression rules.
+// CHECK: cir.scope {
+// CHECK:   %[[#OneAddr:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp1", init] {alignment = 4 : i64}
+// CHECK:   %[[#One:]] = cir.const #cir.int<1> : !s32i
+// CHECK:   cir.store %[[#One]], %[[#OneAddr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   %[[#IntTaskTmp:]] = cir.call @_Z2goRKi(%[[#OneAddr]]) : (!cir.ptr<!s32i>) -> ![[IntTask]]
+// CHECK:   cir.store %[[#IntTaskTmp]], %[[#IntTaskAddr]] : ![[IntTask]], !cir.ptr<![[IntTask]]>
+// CHECK: }
+
+// CHECK: %[[#CoReturnValAddr:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__coawait_resume_rval"] {alignment = 1 : i64}
+// CHECK: cir.await(user, ready : {
+// CHECK: }, suspend : {
+// CHECK: }, resume : {
+// CHECK:   %[[#ResumeVal:]] = cir.call @_ZN5folly4coro4TaskIiE12await_resumeEv(%3)
+// CHECK:   cir.store %[[#ResumeVal]], %[[#CoReturnValAddr]] : !s32i, !cir.ptr<!s32i>
+// CHECK: },)
+// CHECK: %[[#V:]] = cir.load %[[#CoReturnValAddr]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.call @_ZN5folly4coro4TaskIiE12promise_type12return_valueEi({{.*}}, %[[#V]])
+
+folly::coro::Task<int> go1_lambda() {
+  auto task = []() -> folly::coro::Task<int> {
+    co_return 1;
+  }();
+  co_return co_await task;
+}
+
+// CHECK: cir.func coroutine lambda internal private @_ZZ10go1_lambdavENK3$_0clEv{{.*}} ![[IntTask]] extra{{.*}}{
+// CHECK: cir.func coroutine @_Z10go1_lambdav() {{.*}} ![[IntTask]] extra{{.*}}{
+
+folly::coro::Task<int> go4() {
+  auto* fn = +[](int const& i) -> folly::coro::Task<int> { co_return i; };
+  auto task = fn(3);
+  co_return co_await std::move(task);
+}
+
+// CHECK: cir.func coroutine @_Z3go4v() {{.*}} ![[IntTask]] extra{{.*}}{
+
+// CHECK:   cir.await(init, ready : {
+// CHECK:   }, suspend : {
+// CHECK:   }, resume : {
+// CHECK:   },)
+// CHECK: }
+
+// CHECK: %12 = cir.scope {
+// CHECK:   %17 = cir.alloca !ty_anon2E2_, !cir.ptr<!ty_anon2E2_>, ["ref.tmp1"] {alignment = 1 : i64}
+
+// Get the lambda invoker ptr via `lambda operator folly::coro::Task<int> (*)(int const&)()`
+// CHECK:   %18 = cir.call @_ZZ3go4vENK3$_0cvPFN5folly4coro4TaskIiEERKiEEv(%17) : (!cir.ptr<!ty_anon2E2_>) -> !cir.ptr<!cir.func<![[IntTask]] (!cir.ptr<!s32i>)>>
+// CHECK:   %19 = cir.unary(plus, %18) : !cir.ptr<!cir.func<![[IntTask]] (!cir.ptr<!s32i>)>>, !cir.ptr<!cir.func<![[IntTask]] (!cir.ptr<!s32i>)>>
+// CHECK:   cir.yield %19 : !cir.ptr<!cir.func<![[IntTask]] (!cir.ptr<!s32i>)>>
+// CHECK: }
+// CHECK: cir.store %12, %3 : !cir.ptr<!cir.func<![[IntTask]] (!cir.ptr<!s32i>)>>, !cir.ptr<!cir.ptr<!cir.func<![[IntTask]] (!cir.ptr<!s32i>)>>>
+// CHECK: cir.scope {
+// CHECK:   %17 = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp2", init] {alignment = 4 : i64}
+// CHECK:   %18 = cir.load %3 : !cir.ptr<!cir.ptr<!cir.func<![[IntTask]] (!cir.ptr<!s32i>)>>>, !cir.ptr<!cir.func<![[IntTask]] (!cir.ptr<!s32i>)>>
+// CHECK:   %19 = cir.const #cir.int<3> : !s32i
+// CHECK:   cir.store %19, %17 : !s32i, !cir.ptr<!s32i>
+
+// Call invoker, which calls operator() indirectly.
+// CHECK:   %20 = cir.call %18(%17) : (!cir.ptr<!cir.func<![[IntTask]] (!cir.ptr<!s32i>)>>, !cir.ptr<!s32i>) -> ![[IntTask]]
+// CHECK:   cir.store %20, %4 : ![[IntTask]], !cir.ptr<![[IntTask]]>
+// CHECK: }
+
+// CHECK:   cir.await(user, ready : {
+// CHECK:   }, suspend : {
+// CHECK:   }, resume : {
+// CHECK:   },)
+// CHECK: }
+
+folly::coro::Task<void> yield();
+folly::coro::Task<void> yield1() {
+  auto t = yield();
+  co_yield t;
+}
+
+// CHECK: cir.func coroutine @_Z6yield1v() -> !ty_folly3A3Acoro3A3ATask3Cvoid3E
+
+// CHECK: cir.await(init, ready : {
+// CHECK: }, suspend : {
+// CHECK: }, resume : {
+// CHECK: },)
+
+//      CHECK: cir.scope {
+// CHECK-NEXT:   %[[#SUSPEND_PTR:]] = cir.alloca !ty_std3A3Asuspend_always, !cir.ptr<!ty_std3A3Asuspend_always>
+// CHECK-NEXT:   %[[#AWAITER_PTR:]] = cir.alloca !ty_folly3A3Acoro3A3ATask3Cvoid3E, !cir.ptr<!ty_folly3A3Acoro3A3ATask3Cvoid3E>
+// CHECK-NEXT:   %[[#CORO_PTR:]] = cir.alloca !ty_std3A3Acoroutine_handle3Cvoid3E, !cir.ptr<!ty_std3A3Acoroutine_handle3Cvoid3E>
+// CHECK-NEXT:   %[[#CORO2_PTR:]] = cir.alloca !ty_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E, !cir.ptr<!ty_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E>
+// CHECK-NEXT:   cir.call @_ZN5folly4coro4TaskIvEC1ERKS2_(%[[#AWAITER_PTR]], %{{.+}}) : (!cir.ptr<!ty_folly3A3Acoro3A3ATask3Cvoid3E>, !cir.ptr<!ty_folly3A3Acoro3A3ATask3Cvoid3E>) -> ()
+// CHECK-NEXT:   %[[#AWAITER:]] = cir.load %[[#AWAITER_PTR]] : !cir.ptr<!ty_folly3A3Acoro3A3ATask3Cvoid3E>, !ty_folly3A3Acoro3A3ATask3Cvoid3E
+// CHECK-NEXT:   %[[#SUSPEND:]] = cir.call @_ZN5folly4coro4TaskIvE12promise_type11yield_valueES2_(%{{.+}}, %[[#AWAITER]]) : (!cir.ptr<!ty_folly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type>, !ty_folly3A3Acoro3A3ATask3Cvoid3E) -> !ty_std3A3Asuspend_always
+// CHECK-NEXT:   cir.store %[[#SUSPEND]], %[[#SUSPEND_PTR]] : !ty_std3A3Asuspend_always, !cir.ptr<!ty_std3A3Asuspend_always>
+// CHECK-NEXT:   cir.await(yield, ready : {
+// CHECK-NEXT:     %[[#READY:]] = cir.scope {
+// CHECK-NEXT:       %[[#A:]] = cir.call @_ZNSt14suspend_always11await_readyEv(%[[#SUSPEND_PTR]]) : (!cir.ptr<!ty_std3A3Asuspend_always>) -> !cir.bool
+// CHECK-NEXT:       cir.yield %[[#A]] : !cir.bool
+// CHECK-NEXT:     } : !cir.bool
+// CHECK-NEXT:     cir.condition(%[[#READY]])
+// CHECK-NEXT:   }, suspend : {
+// CHECK-NEXT:     %[[#CORO2:]] = cir.call @_ZNSt16coroutine_handleIN5folly4coro4TaskIvE12promise_typeEE12from_addressEPv(%9) : (!cir.ptr<!void>) -> !ty_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E
+// CHECK-NEXT:     cir.store %[[#CORO2]], %[[#CORO2_PTR]] : !ty_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E, !cir.ptr<!ty_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E>
+// CHECK-NEXT:     %[[#B:]] = cir.load %[[#CORO2_PTR]] : !cir.ptr<!ty_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E>, !ty_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E
+// CHECK-NEXT:     cir.call @_ZNSt16coroutine_handleIvEC1IN5folly4coro4TaskIvE12promise_typeEEES_IT_E(%[[#CORO_PTR]], %[[#B]]) : (!cir.ptr<!ty_std3A3Acoroutine_handle3Cvoid3E>, !ty_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E) -> ()
+// CHECK-NEXT:     %[[#C:]] = cir.load %[[#CORO_PTR]] : !cir.ptr<!ty_std3A3Acoroutine_handle3Cvoid3E>, !ty_std3A3Acoroutine_handle3Cvoid3E
+// CHECK-NEXT:     cir.call @_ZNSt14suspend_always13await_suspendESt16coroutine_handleIvE(%[[#SUSPEND_PTR]], %[[#C]]) : (!cir.ptr<!ty_std3A3Asuspend_always>, !ty_std3A3Acoroutine_handle3Cvoid3E) -> ()
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   }, resume : {
+// CHECK-NEXT:     cir.call @_ZNSt14suspend_always12await_resumeEv(%[[#SUSPEND_PTR]]) : (!cir.ptr<!ty_std3A3Asuspend_always>) -> ()
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   },)
+// CHECK-NEXT: }
+
+// CHECK: cir.await(final, ready : {
+// CHECK: }, suspend : {
+// CHECK: }, resume : {
+// CHECK: },)
+
+// CHECK: }
diff --git a/clang/test/CIR/CodeGen/ctor-alias.cpp b/clang/test/CIR/CodeGen/ctor-alias.cpp
new file mode 100644
index 000000000000..3739ecef1cce
--- /dev/null
+++ b/clang/test/CIR/CodeGen/ctor-alias.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-cir %s -o - | FileCheck %s
+
+struct DummyString {
+  DummyString(const char *s) {}
+};
+
+void t() {
+  DummyString s4 = "yolo";
+}
+
+//      CHECK: cir.func linkonce_odr @_ZN11DummyStringC2EPKc
+// CHECK-NEXT:     %0 = cir.alloca !cir.ptr<!ty_DummyString>, !cir.ptr<!cir.ptr<!ty_DummyString>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:     %1 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["s", init] {alignment = 8 : i64}
+// CHECK-NEXT:     cir.store %arg0, %0 : !cir.ptr<!ty_DummyString>, !cir.ptr<!cir.ptr<!ty_DummyString>>
+// CHECK-NEXT:     cir.store %arg1, %1 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:     %2 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_DummyString>>, !cir.ptr<!ty_DummyString>
+// CHECK-NEXT:     cir.return
+
+// CHECK-NOT: cir.fun @_ZN11DummyStringC1EPKc
+
+//      CHECK:   cir.func @_Z1tv
+// CHECK-NEXT:     %0 = cir.alloca !ty_DummyString, !cir.ptr<!ty_DummyString>, ["s4", init] {alignment = 1 : i64}
+// CHECK-NEXT:     %1 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 5>>
+// CHECK-NEXT:     %2 = cir.cast(array_to_ptrdecay, %1 : !cir.ptr<!cir.array<!s8i x 5>>), !cir.ptr<!s8i>
+// CHECK-NEXT:     cir.call @_ZN11DummyStringC2EPKc(%0, %2) : (!cir.ptr<!ty_DummyString>, !cir.ptr<!s8i>) -> ()
+// CHECK-NEXT:     cir.return
+
+struct B {
+  B();
+};
+B::B() {
+}
+
+// CHECK: cir.func @_ZN1BC2Ev(%arg0: !cir.ptr<!ty_B>
+// CHECK:   %0 = cir.alloca !cir.ptr<!ty_B>, !cir.ptr<!cir.ptr<!ty_B>>, ["this", init] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %0 : !cir.ptr<!ty_B>, !cir.ptr<!cir.ptr<!ty_B>>
+// CHECK:   %1 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_B>>, !cir.ptr<!ty_B>
+// CHECK:   cir.return
+// CHECK: }
+// CHECK: cir.func @_ZN1BC1Ev(!cir.ptr<!ty_B>) alias(@_ZN1BC2Ev)
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/ctor-member-lvalue-to-rvalue.cpp b/clang/test/CIR/CodeGen/ctor-member-lvalue-to-rvalue.cpp
new file mode 100644
index 000000000000..d8e42f46429f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/ctor-member-lvalue-to-rvalue.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -std=c++17 -mconstructor-aliases -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+// TODO: support -mno-constructor-aliases
+
+struct String {
+  long size;
+  String(const String &s) : size{s.size} {}
+// CHECK: cir.func linkonce_odr @_ZN6StringC2ERKS_
+// CHECK:     %0 = cir.alloca !cir.ptr<!ty_String>, !cir.ptr<!cir.ptr<!ty_String>>, ["this", init] {alignment = 8 : i64}
+// CHECK:     %1 = cir.alloca !cir.ptr<!ty_String>, !cir.ptr<!cir.ptr<!ty_String>>, ["s", init] {alignment = 8 : i64}
+// CHECK:     cir.store %arg0, %0
+// CHECK:     cir.store %arg1, %1
+// CHECK:     %2 = cir.load %0
+// CHECK:     %3 = cir.get_member %2[0] {name = "size"}
+// CHECK:     %4 = cir.load %1
+// CHECK:     %5 = cir.get_member %4[0] {name = "size"}
+// CHECK:     %6 = cir.load %5 : !cir.ptr<!s64i>, !s64i
+// CHECK:     cir.store %6, %3 : !s64i, !cir.ptr<!s64i>
+// CHECK:     cir.return
+// CHECK:   }
+
+  String() {}
+};
+
+void foo() {
+  String s;
+  String s1{s};
+}
+// CHECK: cir.func @_Z3foov() {{.*}} {
+// CHECK:  %0 = cir.alloca !ty_String, !cir.ptr<!ty_String>, ["s", init] {alignment = 8 : i64}
+// CHECK:  %1 = cir.alloca !ty_String, !cir.ptr<!ty_String>, ["s1", init] {alignment = 8 : i64}
+// CHECK:  cir.call @_ZN6StringC2Ev(%0) : (!cir.ptr<!ty_String>) -> ()
+// CHECK:  cir.call @_ZN6StringC2ERKS_(%1, %0) : (!cir.ptr<!ty_String>, !cir.ptr<!ty_String>) -> ()
+// CHECK:  cir.return
+// }
diff --git a/clang/test/CIR/CodeGen/ctor.cpp b/clang/test/CIR/CodeGen/ctor.cpp
new file mode 100644
index 000000000000..9f8217532ac5
--- /dev/null
+++ b/clang/test/CIR/CodeGen/ctor.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct Struk {
+  int a;
+  Struk() {}
+  void test() {}
+};
+
+void baz() {
+  Struk s;
+}
+
+// CHECK: !ty_Struk = !cir.struct<struct "Struk" {!cir.int<s, 32>}>
+
+// CHECK:   cir.func linkonce_odr @_ZN5StrukC2Ev(%arg0: !cir.ptr<!ty_Struk>
+// CHECK-NEXT:     %0 = cir.alloca !cir.ptr<!ty_Struk>, !cir.ptr<!cir.ptr<!ty_Struk>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:     cir.store %arg0, %0 : !cir.ptr<!ty_Struk>, !cir.ptr<!cir.ptr<!ty_Struk>>
+// CHECK-NEXT:     %1 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_Struk>>, !cir.ptr<!ty_Struk>
+// CHECK-NEXT:     cir.return
+
+// CHECK:   cir.func linkonce_odr @_ZN5StrukC1Ev(%arg0: !cir.ptr<!ty_Struk>
+// CHECK-NEXT:     %0 = cir.alloca !cir.ptr<!ty_Struk>, !cir.ptr<!cir.ptr<!ty_Struk>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:     cir.store %arg0, %0 : !cir.ptr<!ty_Struk>, !cir.ptr<!cir.ptr<!ty_Struk>>
+// CHECK-NEXT:     %1 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_Struk>>, !cir.ptr<!ty_Struk>
+// CHECK-NEXT:     cir.call @_ZN5StrukC2Ev(%1) : (!cir.ptr<!ty_Struk>) -> ()
+// CHECK-NEXT:     cir.return
+
+// CHECK:   cir.func @_Z3bazv()
+// CHECK-NEXT:     %0 = cir.alloca !ty_Struk, !cir.ptr<!ty_Struk>, ["s", init] {alignment = 4 : i64}
+// CHECK-NEXT:     cir.call @_ZN5StrukC1Ev(%0) : (!cir.ptr<!ty_Struk>) -> ()
+// CHECK-NEXT:     cir.return
diff --git a/clang/test/CIR/CodeGen/cxx-default-arg.cpp b/clang/test/CIR/CodeGen/cxx-default-arg.cpp
new file mode 100644
index 000000000000..c5665337608b
--- /dev/null
+++ b/clang/test/CIR/CodeGen/cxx-default-arg.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: cir.func private @_ZN12MyIntPointerC1EPi
+
+struct MyIntPointer {
+  MyIntPointer(int *p = nullptr);
+};
+
+void foo() {
+  MyIntPointer p;
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/cxx1z-inline-variables.cpp b/clang/test/CIR/CodeGen/cxx1z-inline-variables.cpp
new file mode 100644
index 000000000000..68cddd578767
--- /dev/null
+++ b/clang/test/CIR/CodeGen/cxx1z-inline-variables.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck -check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// For compatibility with C++11 and C++14, an out-of-line declaration of a
+// static constexpr local variable promotes the variable to weak_odr.
+struct compat {
+  static constexpr int a = 1;
+  static constexpr int b = 2;
+  static constexpr int c = 3;
+  static inline constexpr int d = 4;
+  static const int e = 5;
+  static const int f = 6;
+  static const int g = 7;
+};
+const int &compat_use_before_redecl = compat::b;
+const int compat::a;
+const int compat::b;
+const int compat::c;
+const int compat::d;
+const int compat::e;
+constexpr int compat::f;
+constexpr inline int compat::g;
+const int &compat_use_after_redecl1 = compat::c;
+const int &compat_use_after_redecl2 = compat::d;
+const int &compat_use_after_redecl3 = compat::g;
+
+// CIR: cir.global  weak_odr comdat @_ZN6compat1bE = #cir.int<2> : !s32i {alignment = 4 : i64}
+// CIR: cir.global  weak_odr comdat @_ZN6compat1aE = #cir.int<1> : !s32i {alignment = 4 : i64}
+// CIR: cir.global  weak_odr comdat @_ZN6compat1cE = #cir.int<3> : !s32i {alignment = 4 : i64}
+// CIR: cir.global  external @_ZN6compat1eE = #cir.int<5> : !s32i {alignment = 4 : i64}
+// CIR: cir.global  weak_odr comdat @_ZN6compat1fE = #cir.int<6> : !s32i {alignment = 4 : i64}
+// CIR: cir.global  linkonce_odr comdat @_ZN6compat1dE = #cir.int<4> : !s32i {alignment = 4 : i64}
+// CIR: cir.global  linkonce_odr comdat @_ZN6compat1gE = #cir.int<7> : !s32i {alignment = 4 : i64}
+
+// LLVM: $_ZN6compat1bE = comdat any
+// LLVM: $_ZN6compat1aE = comdat any
+// LLVM: $_ZN6compat1cE = comdat any
+// LLVM: $_ZN6compat1fE = comdat any
+// LLVM: $_ZN6compat1dE = comdat any
+// LLVM: $_ZN6compat1gE = comdat any
+
+// LLVM: @_ZN6compat1bE = weak_odr global i32 2, comdat, align 4
+// LLVM: @_ZN6compat1aE = weak_odr global i32 1, comdat, align 4
+// LLVM: @_ZN6compat1cE = weak_odr global i32 3, comdat, align 4
+// LLVM: @_ZN6compat1eE = global i32 5, align 4
+// LLVM: @_ZN6compat1fE = weak_odr global i32 6, comdat, align 4
+// LLVM: @_ZN6compat1dE = linkonce_odr global i32 4, comdat, align 4
+// LLVM: @_ZN6compat1gE = linkonce_odr global i32 7, comdat, align 4
diff --git a/clang/test/CIR/CodeGen/defined-pure-virtual-func.cpp b/clang/test/CIR/CodeGen/defined-pure-virtual-func.cpp
new file mode 100644
index 000000000000..86e46ee503bb
--- /dev/null
+++ b/clang/test/CIR/CodeGen/defined-pure-virtual-func.cpp
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Pure virtual functions are allowed to be defined, but the vtable should still
+// point to __cxa_pure_virtual instead of the definition. For destructors, the
+// base object destructor (which is not included in the vtable) should be
+// defined as usual. The complete object destructors and deleting destructors
+// should contain a trap, and the vtable entries for them should point to
+// __cxa_pure_virtual.
+class C {
+  C();
+  virtual ~C() = 0;
+  virtual void pure() = 0;
+};
+
+C::C() = default;
+C::~C() = default;
+void C::pure() {}
+
+// CHECK: @_ZTV1C = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1C> : !cir.ptr<!u8i>
+// complete object destructor (D1)
+// CHECK-SAME: #cir.global_view<@__cxa_pure_virtual> : !cir.ptr<!u8i>,
+// deleting destructor (D0)
+// CHECK-SAME: #cir.global_view<@__cxa_pure_virtual> : !cir.ptr<!u8i>,
+// C::pure
+// CHECK-SAME: #cir.global_view<@__cxa_pure_virtual> : !cir.ptr<!u8i>]>
+
+// The base object destructor should be emitted as normal.
+// CHECK-LABEL: cir.func @_ZN1CD2Ev(%arg0: !cir.ptr<!ty_C> loc({{[^)]+}})) {{.*}} {
+// CHECK-NEXT:    %0 = cir.alloca !cir.ptr<!ty_C>, !cir.ptr<!cir.ptr<!ty_C>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:    cir.store %arg0, %0 : !cir.ptr<!ty_C>, !cir.ptr<!cir.ptr<!ty_C>>
+// CHECK-NEXT:    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_C>>, !cir.ptr<!ty_C>
+// CHECK-NEXT:    cir.return
+// CHECK-NEXT:  }
+
+// The complete object destructor should trap.
+// CHECK-LABEL: cir.func @_ZN1CD1Ev(%arg0: !cir.ptr<!ty_C> loc({{[^)]+}})) {{.*}} {
+// CHECK-NEXT:    %0 = cir.alloca !cir.ptr<!ty_C>, !cir.ptr<!cir.ptr<!ty_C>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:    cir.store %arg0, %0 : !cir.ptr<!ty_C>, !cir.ptr<!cir.ptr<!ty_C>>
+// CHECK-NEXT:    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_C>>, !cir.ptr<!ty_C>
+// CHECK-NEXT:    cir.trap
+// CHECK-NEXT:  }
+
+// The deleting destructor should trap.
+// CHECK-LABEL: cir.func @_ZN1CD0Ev(%arg0: !cir.ptr<!ty_C> loc({{[^)]+}})) {{.*}} {
+// CHECK-NEXT:    %0 = cir.alloca !cir.ptr<!ty_C>, !cir.ptr<!cir.ptr<!ty_C>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:    cir.store %arg0, %0 : !cir.ptr<!ty_C>, !cir.ptr<!cir.ptr<!ty_C>>
+// CHECK-NEXT:    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_C>>, !cir.ptr<!ty_C>
+// CHECK-NEXT:    cir.trap
+// CHECK-NEXT:  }
+
+// C::pure should be emitted as normal.
+// CHECK-LABEL: cir.func @_ZN1C4pureEv(%arg0: !cir.ptr<!ty_C> loc({{[^)]+}})) {{.*}} {
+// CHECK-NEXT:    %0 = cir.alloca !cir.ptr<!ty_C>, !cir.ptr<!cir.ptr<!ty_C>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:    cir.store %arg0, %0 : !cir.ptr<!ty_C>, !cir.ptr<!cir.ptr<!ty_C>>
+// CHECK-NEXT:    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_C>>, !cir.ptr<!ty_C>
+// CHECK-NEXT:    cir.return
+// CHECK-NEXT:  }
diff --git a/clang/test/CIR/CodeGen/delegating-ctor.cpp b/clang/test/CIR/CodeGen/delegating-ctor.cpp
new file mode 100644
index 000000000000..3c64d76df2cd
--- /dev/null
+++ b/clang/test/CIR/CodeGen/delegating-ctor.cpp
@@ -0,0 +1,88 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fexceptions -fcxx-exceptions %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct Delegating {
+  Delegating();
+  Delegating(int);
+};
+
+// Check that the constructor being delegated to is called with the correct
+// arguments.
+Delegating::Delegating() : Delegating(0) {}
+
+// CHECK-LABEL: cir.func @_ZN10DelegatingC2Ev(%arg0: !cir.ptr<!ty_Delegating> {{.*}}) {{.*}} {
+// CHECK-NEXT:    %0 = cir.alloca !cir.ptr<!ty_Delegating>, !cir.ptr<!cir.ptr<!ty_Delegating>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:    cir.store %arg0, %0 : !cir.ptr<!ty_Delegating>, !cir.ptr<!cir.ptr<!ty_Delegating>>
+// CHECK-NEXT:    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_Delegating>>, !cir.ptr<!ty_Delegating>
+// CHECK-NEXT:    %2 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:    cir.call @_ZN10DelegatingC2Ei(%1, %2) : (!cir.ptr<!ty_Delegating>, !s32i) -> ()
+// CHECK-NEXT:    cir.return
+// CHECK-NEXT:  }
+
+struct DelegatingWithZeroing {
+  int i;
+  DelegatingWithZeroing() = default;
+  DelegatingWithZeroing(int);
+};
+
+// Check that the delegating constructor performs zero-initialization here.
+// FIXME: we should either emit the trivial default constructor or remove the
+// call to it in a lowering pass.
+DelegatingWithZeroing::DelegatingWithZeroing(int) : DelegatingWithZeroing() {}
+
+// CHECK-LABEL: cir.func @_ZN21DelegatingWithZeroingC2Ei(%arg0: !cir.ptr<!ty_DelegatingWithZeroing> {{.*}}, %arg1: !s32i {{.*}}) {{.*}} {
+// CHECK-NEXT:    %0 = cir.alloca !cir.ptr<!ty_DelegatingWithZeroing>, !cir.ptr<!cir.ptr<!ty_DelegatingWithZeroing>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["", init] {alignment = 4 : i64}
+// CHECK-NEXT:    cir.store %arg0, %0 : !cir.ptr<!ty_DelegatingWithZeroing>, !cir.ptr<!cir.ptr<!ty_DelegatingWithZeroing>>
+// CHECK-NEXT:    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:    %2 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_DelegatingWithZeroing>>, !cir.ptr<!ty_DelegatingWithZeroing>
+// CHECK-NEXT:    %3 = cir.const #cir.zero : !ty_DelegatingWithZeroing
+// CHECK-NEXT:    cir.store %3, %2 : !ty_DelegatingWithZeroing, !cir.ptr<!ty_DelegatingWithZeroing>
+// CHECK-NEXT:    cir.call @_ZN21DelegatingWithZeroingC2Ev(%2) : (!cir.ptr<!ty_DelegatingWithZeroing>) -> () extra(#fn_attr{{[0-9]*}})
+// CHECK-NEXT:    cir.return
+// CHECK-NEXT:  }
+
+void canThrow();
+struct HasNonTrivialDestructor {
+  HasNonTrivialDestructor();
+  HasNonTrivialDestructor(int);
+  ~HasNonTrivialDestructor();
+};
+
+// Check that we call the destructor whenever a cleanup is needed.
+// FIXME: enable and check this when exceptions are fully supported.
+#if 0
+HasNonTrivialDestructor::HasNonTrivialDestructor(int)
+    : HasNonTrivialDestructor() {
+  canThrow();
+}
+#endif
+
+// From clang/test/CodeGenCXX/cxx0x-delegating-ctors.cpp, check that virtual
+// inheritance and delegating constructors interact correctly.
+// FIXME: enable and check this when virtual inheritance is fully supported.
+#if 0
+namespace PR14588 {
+void other();
+
+class Base {
+public:
+  Base() { squawk(); }
+  virtual ~Base() {}
+
+  virtual void squawk() { other(); }
+};
+
+class Foo : public virtual Base {
+public:
+  Foo();
+  Foo(const void *inVoid);
+  virtual ~Foo() {}
+
+  virtual void squawk() { other(); }
+};
+
+Foo::Foo() : Foo(nullptr) { other(); }
+Foo::Foo(const void *inVoid) { squawk(); }
+} // namespace PR14588
+#endif
diff --git a/clang/test/CIR/CodeGen/delete.cpp b/clang/test/CIR/CodeGen/delete.cpp
new file mode 100644
index 000000000000..b02641ff87b0
--- /dev/null
+++ b/clang/test/CIR/CodeGen/delete.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -clangir-disable-emit-cxx-default -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef __typeof(sizeof(int)) size_t;
+
+namespace test1 {
+  struct A { void operator delete(void*,size_t); int x; };
+  void a(A *x) {
+    delete x;
+  }
+  // CHECK: cir.func @_ZN5test11aEPNS_1AE
+
+  // CHECK: %[[CONST:.*]] = cir.const #cir.int<4> : !u64i
+  // CHECK: cir.call @_ZN5test11AdlEPvm({{.*}}, %[[CONST]])
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/derived-to-base.cpp b/clang/test/CIR/CodeGen/derived-to-base.cpp
new file mode 100644
index 000000000000..ad8bf4b2a736
--- /dev/null
+++ b/clang/test/CIR/CodeGen/derived-to-base.cpp
@@ -0,0 +1,174 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -clangir-disable-emit-cxx-default -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef enum {
+  RequestFailed = -2004,
+} enumy;
+
+typedef struct {
+  const void* samples;
+  int cound;
+} buffy;
+
+class C1 {
+ public:
+  virtual ~C1();
+  C1(int i);
+
+  struct IE {
+    bool supported = false;
+    unsigned version = 0;
+  };
+
+  struct IEs {
+    IE chain;
+  };
+
+  static IEs availableIEs;
+  class Layer {
+   public:
+    Layer(int d);
+    virtual ~Layer() {}
+  };
+
+  virtual enumy SetStuff(enumy e, buffy b);
+  virtual enumy Initialize() = 0;
+};
+
+class C2 : public C1 {
+ public:
+  C2(
+    void* p,
+    int i
+  );
+
+  ~C2() override;
+
+  class Layer : public C1::Layer {
+   public:
+    Layer(int d, const C2* C1);
+    virtual ~Layer();
+
+   protected:
+    const C2* m_C1;
+  };
+
+  virtual enumy SetStuff(enumy e, buffy b) override;
+  virtual enumy Initialize() override;
+};
+
+class C3 : public C2 {
+  struct Layer : public C2::Layer {
+   public:
+    Layer(int d, const C2* C1);
+    void Initialize();
+  };
+
+  virtual enumy Initialize() override;
+};
+
+void C3::Layer::Initialize() {
+  if (m_C1 == nullptr) {
+    return;
+  }
+  if (m_C1->availableIEs.chain.supported) {
+  }
+}
+
+// CHECK-DAG: !ty_C23A3ALayer = !cir.struct<class "C2::Layer"
+// CHECK-DAG: !ty_C33A3ALayer = !cir.struct<struct "C3::Layer"
+// CHECK-DAG: !ty_A = !cir.struct<class "A"
+// CHECK-DAG: !ty_A2Ebase = !cir.struct<class "A.base"
+// CHECK-DAG: !ty_B = !cir.struct<class "B" {!cir.struct<class "A.base"
+
+// CHECK: cir.func @_ZN2C35Layer10InitializeEv
+
+// CHECK:  cir.scope {
+// CHECK:    %2 = cir.base_class_addr(%1 : !cir.ptr<!ty_C33A3ALayer>) -> !cir.ptr<!ty_C23A3ALayer>
+// CHECK:    %3 = cir.get_member %2[1] {name = "m_C1"} : !cir.ptr<!ty_C23A3ALayer> -> !cir.ptr<!cir.ptr<!ty_C2_>>
+// CHECK:    %4 = cir.load %3 : !cir.ptr<!cir.ptr<!ty_C2_>>, !cir.ptr<!ty_C2_>
+// CHECK:    %5 = cir.const #cir.ptr<null> : !cir.ptr<!ty_C2_>
+// CHECK:    %6 = cir.cmp(eq, %4, %5) : !cir.ptr<!ty_C2_>, !cir.bool
+
+enumy C3::Initialize() {
+  return C2::Initialize();
+}
+
+// CHECK: cir.func @_ZN2C310InitializeEv(%arg0: !cir.ptr<!ty_C3_>
+// CHECK:     %0 = cir.alloca !cir.ptr<!ty_C3_>, !cir.ptr<!cir.ptr<!ty_C3_>>, ["this", init] {alignment = 8 : i64}
+
+// CHECK:     cir.store %arg0, %0 : !cir.ptr<!ty_C3_>, !cir.ptr<!cir.ptr<!ty_C3_>>
+// CHECK:     %2 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_C3_>>, !cir.ptr<!ty_C3_>
+// CHECK:     %3 = cir.base_class_addr(%2 : !cir.ptr<!ty_C3_>) -> !cir.ptr<!ty_C2_>
+// CHECK:     %4 = cir.call @_ZN2C210InitializeEv(%3) : (!cir.ptr<!ty_C2_>) -> !s32i
+
+void vcall(C1 &c1) {
+  buffy b;
+  enumy e;
+  c1.SetStuff(e, b);
+}
+
+// CHECK: cir.func @_Z5vcallR2C1(%arg0: !cir.ptr<!ty_C1_>
+// CHECK:   %0 = cir.alloca !cir.ptr<!ty_C1_>, !cir.ptr<!cir.ptr<!ty_C1_>>, ["c1", init] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !ty_buffy, !cir.ptr<!ty_buffy>, ["b"] {alignment = 8 : i64}
+// CHECK:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["e"] {alignment = 4 : i64}
+// CHECK:   %3 = cir.alloca !ty_buffy, !cir.ptr<!ty_buffy>, ["agg.tmp0"] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %0 : !cir.ptr<!ty_C1_>, !cir.ptr<!cir.ptr<!ty_C1_>>
+// CHECK:   %4 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_C1_>>, !cir.ptr<!ty_C1_>
+// CHECK:   %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.call @_ZN5buffyC2ERKS_(%3, %1) : (!cir.ptr<!ty_buffy>, !cir.ptr<!ty_buffy>) -> ()
+// CHECK:   %6 = cir.load %3 : !cir.ptr<!ty_buffy>, !ty_buffy
+// CHECK:   %7 = cir.cast(bitcast, %4 : !cir.ptr<!ty_C1_>), !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_C1_>, !s32i, !ty_buffy)>>>>
+// CHECK:   %8 = cir.load %7 : !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_C1_>, !s32i, !ty_buffy)>>>>, !cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_C1_>, !s32i, !ty_buffy)>>>
+// CHECK:   %9 = cir.vtable.address_point( %8 : !cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_C1_>, !s32i, !ty_buffy)>>>, vtable_index = 0, address_point_index = 2) : !cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_C1_>, !s32i, !ty_buffy)>>>
+// CHECK:   %10 = cir.load align(8) %9 : !cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_C1_>, !s32i, !ty_buffy)>>>, !cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_C1_>, !s32i, !ty_buffy)>>
+// CHECK:   %11 = cir.call %10(%4, %5, %6) : (!cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_C1_>, !s32i, !ty_buffy)>>, !cir.ptr<!ty_C1_>, !s32i, !ty_buffy) -> !s32i
+// CHECK:   cir.return
+// CHECK: }
+
+class A {
+public:
+  int a;
+  virtual void foo() {a++;}
+};
+
+class B : public A {
+public:
+  int b;
+  void foo ()  { static_cast<A>(*this).foo();}
+};
+
+// CHECK: cir.func linkonce_odr @_ZN1B3fooEv(%arg0: !cir.ptr<!ty_B>
+// CHECK:   %0 = cir.alloca !cir.ptr<!ty_B>, !cir.ptr<!cir.ptr<!ty_B>>, ["this", init] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %0 : !cir.ptr<!ty_B>, !cir.ptr<!cir.ptr<!ty_B>>
+// CHECK:   %1 = cir.load deref %0 : !cir.ptr<!cir.ptr<!ty_B>>, !cir.ptr<!ty_B>
+// CHECK:   cir.scope {
+// CHECK:     %2 = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["ref.tmp0"] {alignment = 8 : i64}
+// CHECK:     %3 = cir.base_class_addr(%1 : !cir.ptr<!ty_B>) -> !cir.ptr<!ty_A>
+
+// Call @A::A(A const&)
+// CHECK:     cir.call @_ZN1AC2ERKS_(%2, %3) : (!cir.ptr<!ty_A>, !cir.ptr<!ty_A>) -> ()
+
+// Call @A::foo()
+// CHECK:     cir.call @_ZN1A3fooEv(%2) : (!cir.ptr<!ty_A>) -> ()
+// CHECK:   }
+// CHECK:   cir.return
+// CHECK: }
+
+void t() {
+  B b;
+  b.foo();
+}
+
+struct C : public A {
+  int& ref;
+  C(int& x) : ref(x) {}
+};
+
+// CHECK: cir.func @_Z8test_refv()
+// CHECK: cir.get_member %2[1] {name = "ref"}
+int test_ref() {
+  int x = 42;
+  C c(x);
+  return c.ref;
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/dlti.c b/clang/test/CIR/CodeGen/dlti.c
new file mode 100644
index 000000000000..4ea8f5ca6359
--- /dev/null
+++ b/clang/test/CIR/CodeGen/dlti.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void foo() {}
+
+//      CHECK: module @"{{.*}}dlti.c" attributes {
+//  CHECK-DAG: cir.sob = #cir.signed_overflow_behavior<undefined>,
+//  CHECK-DAG: dlti.dl_spec =
+//  CHECK-DAG: #dlti.dl_spec<
+//  CHECK-DAG:   #dlti.dl_entry<"dlti.endianness", "little">
+//  CHECK-DAG:   #dlti.dl_entry<i64, dense<64> : vector<2xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<f80, dense<128> : vector<2xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<i32, dense<32> : vector<2xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<f16, dense<16> : vector<2xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<f64, dense<64> : vector<2xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<f128, dense<128> : vector<2xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<i1, dense<8> : vector<2xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<i8, dense<8> : vector<2xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<i16, dense<16> : vector<2xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<i128, dense<128> : vector<2xi64>>
+//  CHECK-DAG:   #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>
+//  CHECK-DAG: >,
+//  CHECK-DAG: llvm.data_layout =
+//  CHECK-DAG:   "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+
diff --git a/clang/test/CIR/CodeGen/dtors-scopes.cpp b/clang/test/CIR/CodeGen/dtors-scopes.cpp
new file mode 100644
index 000000000000..6d363f0254bf
--- /dev/null
+++ b/clang/test/CIR/CodeGen/dtors-scopes.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -clangir-disable-emit-cxx-default -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -std=c++20 -fclangir -emit-cir %s -o %t2.cir
+// RUN: FileCheck --input-file=%t2.cir %s --check-prefix=DTOR_BODY
+
+extern "C" int printf(char const*, ...);
+struct C {
+  C()  { printf("++A\n"); }
+  ~C()  { printf("--A\n"); }
+};
+void dtor1() {
+  {
+    C c;
+  }
+  printf("Done\n");
+}
+
+// CHECK: cir.func @_Z5dtor1v()
+// CHECK:   cir.scope {
+// CHECK:     %4 = cir.alloca !ty_C, !cir.ptr<!ty_C>, ["c", init] {alignment = 1 : i64}
+// CHECK:     cir.call @_ZN1CC2Ev(%4) : (!cir.ptr<!ty_C>) -> ()
+// CHECK:     cir.call @_ZN1CD2Ev(%4) : (!cir.ptr<!ty_C>) -> ()
+// CHECK:   }
+
+// DTOR_BODY: cir.func linkonce_odr @_ZN1CD2Ev{{.*}}{
+// DTOR_BODY:   %2 = cir.get_global @printf
+// DTOR_BODY:   %3 = cir.get_global @".str2"
+// DTOR_BODY:   %4 = cir.cast(array_to_ptrdecay, %3
+// DTOR_BODY:   %5 = cir.call @printf(%4)
+// DTOR_BODY:   cir.return
+
+// DTOR_BODY: cir.func linkonce_odr @_ZN1CD1Ev(%arg0: !cir.ptr<!ty_C>
+
+// DTOR_BODY:   cir.call @_ZN1CD2Ev
+// DTOR_BODY:   cir.return
+// DTOR_BODY: }
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/dtors.cpp b/clang/test/CIR/CodeGen/dtors.cpp
new file mode 100644
index 000000000000..fe07c0ff1245
--- /dev/null
+++ b/clang/test/CIR/CodeGen/dtors.cpp
@@ -0,0 +1,81 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -clangir-disable-emit-cxx-default -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+
+enum class EFMode { Always, Verbose };
+
+class PSEvent {
+ public:
+  PSEvent(
+      EFMode m,
+      const char* n);
+  ~PSEvent();
+
+ private:
+  const char* n;
+  EFMode m;
+};
+
+void blue() {
+  PSEvent p(EFMode::Verbose, __FUNCTION__);
+}
+
+class A
+{
+public:
+    A() noexcept {}
+    A(const A&) noexcept = default;
+
+    virtual ~A() noexcept;
+    virtual const char* quack() const noexcept;
+};
+
+class B : public A
+{
+public:
+    virtual ~B() noexcept {}
+};
+
+// Class A
+// CHECK: ![[ClassA:ty_.*]] = !cir.struct<class "A" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>
+
+// Class B
+// CHECK: ![[ClassB:ty_.*]] = !cir.struct<class "B" {!cir.struct<class "A" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>}>
+
+// CHECK: cir.func @_Z4bluev()
+// CHECK:   %0 = cir.alloca !ty_PSEvent, !cir.ptr<!ty_PSEvent>, ["p", init] {alignment = 8 : i64}
+// CHECK:   %1 = cir.const #cir.int<1> : !s32i
+// CHECK:   %2 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 5>>
+// CHECK:   %3 = cir.cast(array_to_ptrdecay, %2 : !cir.ptr<!cir.array<!s8i x 5>>), !cir.ptr<!s8i>
+// CHECK:   cir.call @_ZN7PSEventC1E6EFModePKc(%0, %1, %3) : (!cir.ptr<!ty_PSEvent>, !s32i, !cir.ptr<!s8i>) -> ()
+// CHECK:   cir.return
+// CHECK: }
+
+// @B::~B() #1 definition call into base @A::~A()
+// CHECK:  cir.func linkonce_odr @_ZN1BD2Ev{{.*}}{
+// CHECK:    cir.call @_ZN1AD2Ev(
+
+// void foo()
+// CHECK: cir.func @_Z3foov()
+// CHECK:   cir.scope {
+// CHECK:     cir.call @_ZN1BC2Ev(%0) : (!cir.ptr<!ty_B>) -> ()
+// CHECK:     cir.call @_ZN1BD2Ev(%0) : (!cir.ptr<!ty_B>) -> ()
+
+// operator delete(void*) declaration
+// CHECK:   cir.func private @_ZdlPvm(!cir.ptr<!void>, !u64i)
+
+// B dtor => @B::~B() #2
+// Calls dtor #1
+// Calls operator delete
+//
+// CHECK:   cir.func linkonce_odr @_ZN1BD0Ev(%arg0: !cir.ptr<![[ClassB]]>
+// CHECK:     %0 = cir.alloca !cir.ptr<![[ClassB]]>, !cir.ptr<!cir.ptr<![[ClassB]]>>, ["this", init] {alignment = 8 : i64}
+// CHECK:     cir.store %arg0, %0 : !cir.ptr<![[ClassB]]>, !cir.ptr<!cir.ptr<![[ClassB]]>>
+// CHECK:     %1 = cir.load %0 : !cir.ptr<!cir.ptr<![[ClassB]]>>, !cir.ptr<![[ClassB]]>
+// CHECK:     cir.call @_ZN1BD2Ev(%1) : (!cir.ptr<![[ClassB]]>) -> ()
+// CHECK:     %2 = cir.cast(bitcast, %1 : !cir.ptr<![[ClassB]]>), !cir.ptr<!void>
+// CHECK:     cir.call @_ZdlPvm(%2, %3) : (!cir.ptr<!void>, !u64i) -> ()
+// CHECK:     cir.return
+// CHECK:   }
+
+void foo() { B(); }
diff --git a/clang/test/CIR/CodeGen/dynamic-cast-exact.cpp b/clang/test/CIR/CodeGen/dynamic-cast-exact.cpp
new file mode 100644
index 000000000000..6ff93c998927
--- /dev/null
+++ b/clang/test/CIR/CodeGen/dynamic-cast-exact.cpp
@@ -0,0 +1,87 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O1 -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O1 -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+struct Base1 {
+  virtual ~Base1();
+};
+
+struct Base2 {
+  virtual ~Base2();
+};
+
+struct Derived final : Base1 {};
+
+Derived *ptr_cast(Base1 *ptr) {
+  return dynamic_cast<Derived *>(ptr);
+  //      CHECK: %[[#SRC:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!ty_Base1_>>, !cir.ptr<!ty_Base1_>
+  // CHECK-NEXT: %[[#EXPECTED_VPTR:]] = cir.vtable.address_point(@_ZTV7Derived, vtable_index = 0, address_point_index = 2) : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>
+  // CHECK-NEXT: %[[#SRC_VPTR_PTR:]] = cir.cast(bitcast, %[[#SRC]] : !cir.ptr<!ty_Base1_>), !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>
+  // CHECK-NEXT: %[[#SRC_VPTR:]] = cir.load %[[#SRC_VPTR_PTR]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>, !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>
+  // CHECK-NEXT: %[[#SUCCESS:]] = cir.cmp(eq, %[[#SRC_VPTR]], %[[#EXPECTED_VPTR]]) : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.ternary(%[[#SUCCESS]], true {
+  // CHECK-NEXT:   %[[#RES:]] = cir.cast(bitcast, %[[#SRC]] : !cir.ptr<!ty_Base1_>), !cir.ptr<!ty_Derived>
+  // CHECK-NEXT:   cir.yield %[[#RES]] : !cir.ptr<!ty_Derived>
+  // CHECK-NEXT: }, false {
+  // CHECK-NEXT:   %[[#NULL:]] = cir.const #cir.ptr<null> : !cir.ptr<!ty_Derived>
+  // CHECK-NEXT:   cir.yield %[[#NULL]] : !cir.ptr<!ty_Derived>
+  // CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!ty_Derived>
+}
+
+//      LLVM: define dso_local ptr @_Z8ptr_castP5Base1(ptr readonly %[[#SRC:]])
+// LLVM-NEXT:   %[[#VPTR:]] = load ptr, ptr %[[#SRC]], align 8
+// LLVM-NEXT:   %[[#SUCCESS:]] = icmp eq ptr %[[#VPTR]], getelementptr inbounds (i8, ptr @_ZTV7Derived, i64 16)
+// LLVM-NEXT:   %[[RESULT:.+]] = select i1 %[[#SUCCESS]], ptr %[[#SRC]], ptr null
+// LLVM-NEXT:   ret ptr %[[RESULT]]
+// LLVM-NEXT: }
+
+Derived &ref_cast(Base1 &ref) {
+  return dynamic_cast<Derived &>(ref);
+  //      CHECK: %[[#SRC:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!ty_Base1_>>, !cir.ptr<!ty_Base1_>
+  // CHECK-NEXT: %[[#EXPECTED_VPTR:]] = cir.vtable.address_point(@_ZTV7Derived, vtable_index = 0, address_point_index = 2) : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>
+  // CHECK-NEXT: %[[#SRC_VPTR_PTR:]] = cir.cast(bitcast, %[[#SRC]] : !cir.ptr<!ty_Base1_>), !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>
+  // CHECK-NEXT: %[[#SRC_VPTR:]] = cir.load %[[#SRC_VPTR_PTR]] : !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>, !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>
+  // CHECK-NEXT: %[[#SUCCESS:]] = cir.cmp(eq, %[[#SRC_VPTR]], %[[#EXPECTED_VPTR]]) : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>, !cir.bool
+  // CHECK-NEXT: %[[#FAILED:]] = cir.unary(not, %[[#SUCCESS]]) : !cir.bool, !cir.bool
+  // CHECK-NEXT: cir.if %[[#FAILED]] {
+  // CHECK-NEXT:   cir.call @__cxa_bad_cast() : () -> ()
+  // CHECK-NEXT:   cir.unreachable
+  // CHECK-NEXT: }
+  // CHECK-NEXT: %{{.+}} = cir.cast(bitcast, %[[#SRC]] : !cir.ptr<!ty_Base1_>), !cir.ptr<!ty_Derived>
+}
+
+//      LLVM: define dso_local noundef ptr @_Z8ref_castR5Base1(ptr readonly returned %[[#SRC:]])
+// LLVM-NEXT:   %[[#VPTR:]] = load ptr, ptr %[[#SRC]], align 8
+// LLVM-NEXT:   %[[OK:.+]] = icmp eq ptr %[[#VPTR]], getelementptr inbounds (i8, ptr @_ZTV7Derived, i64 16)
+// LLVM-NEXT:   br i1 %[[OK]], label %[[#LABEL_OK:]], label %[[#LABEL_FAIL:]]
+//      LLVM: [[#LABEL_FAIL]]:
+// LLVM-NEXT:   tail call void @__cxa_bad_cast()
+// LLVM-NEXT:   unreachable
+//      LLVM: [[#LABEL_OK]]:
+// LLVM-NEXT:   ret ptr %[[#SRC]]
+// LLVM-NEXT: }
+
+Derived *ptr_cast_always_fail(Base2 *ptr) {
+  return dynamic_cast<Derived *>(ptr);
+  //      CHECK: %{{.+}} = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!ty_Base2_>>, !cir.ptr<!ty_Base2_>
+  // CHECK-NEXT: %[[#RESULT:]] = cir.const #cir.ptr<null> : !cir.ptr<!ty_Derived>
+  // CHECK-NEXT: cir.store %[[#RESULT]], %{{.+}} : !cir.ptr<!ty_Derived>, !cir.ptr<!cir.ptr<!ty_Derived>>
+}
+
+//      LLVM: define dso_local noalias noundef ptr @_Z20ptr_cast_always_failP5Base2(ptr nocapture readnone %{{.+}})
+// LLVM-NEXT:   ret ptr null
+// LLVM-NEXT: }
+
+Derived &ref_cast_always_fail(Base2 &ref) {
+  return dynamic_cast<Derived &>(ref);
+  //      CHECK: %{{.+}} = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!ty_Base2_>>, !cir.ptr<!ty_Base2_>
+  // CHECK-NEXT: %{{.+}} = cir.const #cir.ptr<null> : !cir.ptr<!ty_Derived>
+  // CHECK-NEXT: cir.call @__cxa_bad_cast() : () -> ()
+  // CHECK-NEXT: cir.unreachable
+}
+
+//      LLVM: define dso_local noalias noundef nonnull ptr @_Z20ref_cast_always_failR5Base2(ptr nocapture readnone %{{.+}})
+// LLVM-NEXT:   tail call void @__cxa_bad_cast()
+// LLVM-NEXT:   unreachable
+// LLVM-NEXT: }
diff --git a/clang/test/CIR/CodeGen/dynamic-cast-relative-layout.cpp b/clang/test/CIR/CodeGen/dynamic-cast-relative-layout.cpp
new file mode 100644
index 000000000000..27cff8b2d172
--- /dev/null
+++ b/clang/test/CIR/CodeGen/dynamic-cast-relative-layout.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fexperimental-relative-c++-abi-vtables -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fexperimental-relative-c++-abi-vtables -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+
+struct Base {
+  virtual ~Base();
+};
+
+// BEFORE: !ty_Base = !cir.struct<struct "Base"
+
+void *ptr_cast_to_complete(Base *ptr) {
+  return dynamic_cast<void *>(ptr);
+}
+
+// BEFORE: cir.func @_Z20ptr_cast_to_completeP4Base
+// BEFORE:   %{{.+}} = cir.dyn_cast(ptr, %{{.+}} : !cir.ptr<!ty_Base> relative_layout) -> !cir.ptr<!void>
+// BEFORE: }
+
+//      AFTER: cir.func @_Z20ptr_cast_to_completeP4Base
+//      AFTER:   %[[#SRC:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!ty_Base>>, !cir.ptr<!ty_Base>
+// AFTER-NEXT:   %[[#SRC_IS_NOT_NULL:]] = cir.cast(ptr_to_bool, %[[#SRC]] : !cir.ptr<!ty_Base>), !cir.bool
+// AFTER-NEXT:   %{{.+}} = cir.ternary(%[[#SRC_IS_NOT_NULL]], true {
+// AFTER-NEXT:     %[[#VPTR_PTR:]] = cir.cast(bitcast, %[[#SRC]] : !cir.ptr<!ty_Base>), !cir.ptr<!cir.ptr<!s32i>>
+// AFTER-NEXT:     %[[#VPTR:]] = cir.load %[[#VPTR_PTR]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// AFTER-NEXT:     %[[#OFFSET_TO_TOP_PTR:]] = cir.vtable.address_point( %[[#VPTR]] : !cir.ptr<!s32i>, vtable_index = 0, address_point_index = -2) : !cir.ptr<!s32i>
+// AFTER-NEXT:     %[[#OFFSET_TO_TOP:]] = cir.load align(4) %[[#OFFSET_TO_TOP_PTR]] : !cir.ptr<!s32i>, !s32i
+// AFTER-NEXT:     %[[#SRC_BYTES_PTR:]] = cir.cast(bitcast, %[[#SRC]] : !cir.ptr<!ty_Base>), !cir.ptr<!u8i>
+// AFTER-NEXT:     %[[#DST_BYTES_PTR:]] = cir.ptr_stride(%[[#SRC_BYTES_PTR]] : !cir.ptr<!u8i>, %[[#OFFSET_TO_TOP]] : !s32i), !cir.ptr<!u8i>
+// AFTER-NEXT:     %[[#DST:]] = cir.cast(bitcast, %[[#DST_BYTES_PTR]] : !cir.ptr<!u8i>), !cir.ptr<!void>
+// AFTER-NEXT:     cir.yield %[[#DST]] : !cir.ptr<!void>
+// AFTER-NEXT:   }, false {
+// AFTER-NEXT:     %[[#NULL:]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// AFTER-NEXT:     cir.yield %[[#NULL]] : !cir.ptr<!void>
+// AFTER-NEXT:   }) : (!cir.bool) -> !cir.ptr<!void>
+//      AFTER: }
diff --git a/clang/test/CIR/CodeGen/dynamic-cast.cpp b/clang/test/CIR/CodeGen/dynamic-cast.cpp
new file mode 100644
index 000000000000..2d1393b4a582
--- /dev/null
+++ b/clang/test/CIR/CodeGen/dynamic-cast.cpp
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+
+struct Base {
+  virtual ~Base();
+};
+
+struct Derived : Base {};
+
+// BEFORE: #dyn_cast_info__ZTI4Base__ZTI7Derived = #cir.dyn_cast_info<#cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i>, @__dynamic_cast, @__cxa_bad_cast, #cir.int<0> : !s64i>
+// BEFORE: !ty_Base = !cir.struct
+// BEFORE: !ty_Derived = !cir.struct
+
+Derived *ptr_cast(Base *b) {
+  return dynamic_cast<Derived *>(b);
+}
+
+// BEFORE: cir.func @_Z8ptr_castP4Base
+// BEFORE:   %{{.+}} = cir.dyn_cast(ptr, %{{.+}} : !cir.ptr<!ty_Base>, #dyn_cast_info__ZTI4Base__ZTI7Derived) -> !cir.ptr<!ty_Derived>
+// BEFORE: }
+
+//      AFTER: cir.func @_Z8ptr_castP4Base
+//      AFTER:   %[[#SRC:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!ty_Base>>, !cir.ptr<!ty_Base>
+// AFTER-NEXT:   %[[#SRC_IS_NOT_NULL:]] = cir.cast(ptr_to_bool, %[[#SRC]] : !cir.ptr<!ty_Base>), !cir.bool
+// AFTER-NEXT:   %{{.+}} = cir.ternary(%[[#SRC_IS_NOT_NULL]], true {
+// AFTER-NEXT:     %[[#SRC_VOID_PTR:]] = cir.cast(bitcast, %[[#SRC]] : !cir.ptr<!ty_Base>), !cir.ptr<!void>
+// AFTER-NEXT:     %[[#BASE_RTTI:]] = cir.const #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i>
+// AFTER-NEXT:     %[[#DERIVED_RTTI:]] = cir.const #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i>
+// AFTER-NEXT:     %[[#HINT:]] = cir.const #cir.int<0> : !s64i
+// AFTER-NEXT:     %[[#RT_CALL_RET:]] = cir.call @__dynamic_cast(%[[#SRC_VOID_PTR]], %[[#BASE_RTTI]], %[[#DERIVED_RTTI]], %[[#HINT]]) : (!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void>
+// AFTER-NEXT:     %[[#CASTED:]] = cir.cast(bitcast, %[[#RT_CALL_RET]] : !cir.ptr<!void>), !cir.ptr<!ty_Derived>
+// AFTER-NEXT:     cir.yield %[[#CASTED]] : !cir.ptr<!ty_Derived>
+// AFTER-NEXT:   }, false {
+// AFTER-NEXT:     %[[#NULL_PTR:]] = cir.const #cir.ptr<null> : !cir.ptr<!ty_Derived>
+// AFTER-NEXT:     cir.yield %[[#NULL_PTR]] : !cir.ptr<!ty_Derived>
+// AFTER-NEXT:   }) : (!cir.bool) -> !cir.ptr<!ty_Derived>
+//      AFTER: }
+
+Derived &ref_cast(Base &b) {
+  return dynamic_cast<Derived &>(b);
+}
+
+// BEFORE: cir.func @_Z8ref_castR4Base
+// BEFORE:   %{{.+}} = cir.dyn_cast(ref, %{{.+}} : !cir.ptr<!ty_Base>, #dyn_cast_info__ZTI4Base__ZTI7Derived) -> !cir.ptr<!ty_Derived>
+// BEFORE: }
+
+//      AFTER: cir.func @_Z8ref_castR4Base
+//      AFTER:   %[[#SRC_VOID_PTR:]] = cir.cast(bitcast, %{{.+}} : !cir.ptr<!ty_Base>), !cir.ptr<!void>
+// AFTER-NEXT:   %[[#SRC_RTTI:]] = cir.const #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i>
+// AFTER-NEXT:   %[[#DEST_RTTI:]] = cir.const #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i>
+// AFTER-NEXT:   %[[#OFFSET_HINT:]] = cir.const #cir.int<0> : !s64i
+// AFTER-NEXT:   %[[#CASTED_PTR:]] = cir.call @__dynamic_cast(%[[#SRC_VOID_PTR]], %[[#SRC_RTTI]], %[[#DEST_RTTI]], %[[#OFFSET_HINT]]) : (!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void>
+// AFTER-NEXT:   %[[#CASTED_PTR_IS_NOT_NULL:]] = cir.cast(ptr_to_bool, %[[#CASTED_PTR]] : !cir.ptr<!void>), !cir.bool
+// AFTER-NEXT:   %[[#CASTED_PTR_IS_NULL:]] = cir.unary(not, %[[#CASTED_PTR_IS_NOT_NULL]]) : !cir.bool, !cir.bool
+// AFTER-NEXT:   cir.if %[[#CASTED_PTR_IS_NULL]] {
+// AFTER-NEXT:     cir.call @__cxa_bad_cast() : () -> ()
+// AFTER-NEXT:     cir.unreachable
+// AFTER-NEXT:   }
+// AFTER-NEXT:   %{{.+}} = cir.cast(bitcast, %[[#CASTED_PTR]] : !cir.ptr<!void>), !cir.ptr<!ty_Derived>
+//      AFTER: }
+
+void *ptr_cast_to_complete(Base *ptr) {
+  return dynamic_cast<void *>(ptr);
+}
+
+// BEFORE: cir.func @_Z20ptr_cast_to_completeP4Base
+// BEFORE:   %{{.+}} = cir.dyn_cast(ptr, %{{.+}} : !cir.ptr<!ty_Base>) -> !cir.ptr<!void>
+// BEFORE: }
+
+//      AFTER: cir.func @_Z20ptr_cast_to_completeP4Base
+//      AFTER:   %[[#SRC:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!ty_Base>>, !cir.ptr<!ty_Base>
+// AFTER-NEXT:   %[[#SRC_IS_NOT_NULL:]] = cir.cast(ptr_to_bool, %[[#SRC]] : !cir.ptr<!ty_Base>), !cir.bool
+// AFTER-NEXT:   %{{.+}} = cir.ternary(%[[#SRC_IS_NOT_NULL]], true {
+// AFTER-NEXT:     %[[#VPTR_PTR:]] = cir.cast(bitcast, %[[#SRC]] : !cir.ptr<!ty_Base>), !cir.ptr<!cir.ptr<!s64i>>
+// AFTER-NEXT:     %[[#VPTR:]] = cir.load %[[#VPTR_PTR]] : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// AFTER-NEXT:     %[[#BASE_OFFSET_PTR:]] = cir.vtable.address_point( %[[#VPTR]] : !cir.ptr<!s64i>, vtable_index = 0, address_point_index = -2) : !cir.ptr<!s64i>
+// AFTER-NEXT:     %[[#BASE_OFFSET:]] = cir.load align(8) %[[#BASE_OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i
+// AFTER-NEXT:     %[[#SRC_BYTES_PTR:]] = cir.cast(bitcast, %[[#SRC]] : !cir.ptr<!ty_Base>), !cir.ptr<!u8i>
+// AFTER-NEXT:     %[[#DST_BYTES_PTR:]] = cir.ptr_stride(%[[#SRC_BYTES_PTR]] : !cir.ptr<!u8i>, %[[#BASE_OFFSET]] : !s64i), !cir.ptr<!u8i>
+// AFTER-NEXT:     %[[#CASTED_PTR:]] = cir.cast(bitcast, %[[#DST_BYTES_PTR]] : !cir.ptr<!u8i>), !cir.ptr<!void>
+// AFTER-NEXT:     cir.yield %[[#CASTED_PTR]] : !cir.ptr<!void>
+// AFTER-NEXT:   }, false {
+// AFTER-NEXT:     %[[#NULL_PTR:]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// AFTER-NEXT:     cir.yield %[[#NULL_PTR]] : !cir.ptr<!void>
+// AFTER-NEXT:   }) : (!cir.bool) -> !cir.ptr<!void>
+//      AFTER: }
diff --git a/clang/test/CIR/CodeGen/evaluate-expr.c b/clang/test/CIR/CodeGen/evaluate-expr.c
new file mode 100644
index 000000000000..4b7146622dbf
--- /dev/null
+++ b/clang/test/CIR/CodeGen/evaluate-expr.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+static const int g = 1;
+void foo() {
+  if ((g != 1) && (g != 1))
+    return;
+  if ((g == 1) || (g == 1))
+    return;
+}
+// CHECK:  cir.func no_proto @foo()
+// CHECK:    cir.scope {
+// CHECK:      [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:      [[FALSE:%.*]] = cir.cast(int_to_bool, [[ZERO:%.*]] : !s32i), !cir.bool
+// CHECK:      cir.if [[FALSE]] {
+// CHECK:        cir.return
+// CHECK:      }
+// CHECK:    }
+// CHECK:    cir.return
+
+typedef struct { int x; } S;
+static const S s = {0};
+void bar() {
+  int a =  s.x;
+}
+// CHECK:  cir.func no_proto @bar()
+// CHECK:    [[ALLOC:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK:    {{%.*}} = cir.get_global @s : !cir.ptr<!ty_S>
+// CHECK:    [[CONST:%.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:    cir.store [[CONST]], [[ALLOC]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    cir.return
+
diff --git a/clang/test/CIR/CodeGen/expressions.cpp b/clang/test/CIR/CodeGen/expressions.cpp
new file mode 100644
index 000000000000..fb29394fbe2d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/expressions.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void test(int a) {
+// CHECK: cir.func @{{.+}}test
+
+  // Should generate LValue parenthesis expression.
+  (a) = 1;
+  // CHECK: %[[#C:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: cir.store %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+}
diff --git a/clang/test/CIR/CodeGen/float16-ops.c b/clang/test/CIR/CodeGen/float16-ops.c
new file mode 100644
index 000000000000..5b3b7127476b
--- /dev/null
+++ b/clang/test/CIR/CodeGen/float16-ops.c
@@ -0,0 +1,1636 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -o %t.cir %s
+// FileCheck --input-file=%t.cir --check-prefix=NONATIVE %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fnative-half-type -fclangir -emit-cir -o %t.cir %s
+// FileCheck --input-file=%t.cir --check-prefix=NATIVE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// FileCheck --input-file=%t.ll --check-prefix=NONATIVE-LLVM %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fnative-half-type -fclangir -emit-llvm -o %t.ll %s
+// FileCheck --input-file=%t.ll --check-prefix=NATIVE-LLVM %s
+
+volatile unsigned test;
+volatile int i0;
+volatile _Float16 h0 = 0.0, h1 = 1.0, h2;
+volatile float f0, f1, f2;
+volatile double d0;
+short s0;
+
+void foo(void) {
+  test = (h0);
+  // NONATIVE: %{{.+}} = cir.cast(float_to_int, %{{.+}} : !cir.f16), !u32i
+  // NATIVE: %{{.+}} = cir.cast(float_to_int, %{{.+}} : !cir.f16), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fptoui half %{{.+}} to i32
+  // NATIVE-LLVM: %{{.+}} = fptoui half %{{.+}} to i32
+
+  h0 = (test);
+  // NONATIVE: %{{.+}} = cir.cast(int_to_float, %{{.+}} : !u32i), !cir.f16
+  // NATIVE: %{{.+}} = cir.cast(int_to_float, %{{.+}} : !u32i), !cir.f16
+
+  // NONATIVE-LLVM: %{{.+}} = uitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = uitofp i32 %{{.+}} to half
+
+  test = (!h1);
+  //      NONATIVE: %[[#A:]] = cir.cast(float_to_bool, %{{.+}} : !cir.f16), !cir.bool
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(not, %[[#A]]) : !cir.bool, !cir.bool
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(float_to_bool, %{{.+}} : !cir.f16), !cir.bool
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(not, %[[#A]]) : !cir.bool, !cir.bool
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(bool_to_int, %[[#B]] : !cir.bool), !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#A:]] = fcmp une half %{{.+}}, 0xH0000
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = zext i1 %[[#A]] to i8
+  // NONATIVE-LLVM-NEXT: %[[#C:]] = xor i8 %[[#B]], 1
+  // NONATIVE-LLVM-NEXT: %{{.+}} = zext i8 %[[#C]] to i32
+
+  //      NATIVE-LLVM: %[[#A:]] = fcmp une half %{{.+}}, 0xH0000
+  // NATIVE-LLVM-NEXT: %[[#B:]] = zext i1 %[[#A]] to i8
+  // NATIVE-LLVM-NEXT: %[[#C:]] = xor i8 %[[#B]], 1
+  // NATIVE-LLVM-NEXT: %{{.+}} = zext i8 %[[#C]] to i32
+
+  h1 = -h1;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //  NATIVE-NOT: %{{.+}} = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //  NATIVE-NOT: %{{.+}} = cir.cast(floating, %{{.+}} : !cir.float), !cir.f16
+  //      NATIVE: %{{.+}} = cir.unary(minus, %{{.+}}) : !cir.f16, !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fneg float %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fneg half %{{.+}}
+
+  h1 = +h1;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(plus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //  NATIVE-NOT: %{{.+}} = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //  NATIVE-NOT: %{{.+}} = cir.cast(floating, %{{.+}} : !cir.float), !cir.f16
+  //      NATIVE: %{{.+}} = cir.unary(plus, %{{.+}}) : !cir.f16, !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc float %[[#A]] to half
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h1, align 2
+  // NATIVE-LLVM-NEXT: store volatile half %[[#A]], ptr @h1, align 2
+
+  h1++;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.f16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  // NONATIVE-LLVM: %{.+} = fadd half %{.+}, 0xH3C00
+
+  // NATIVE-LLVM: %{.+} = fadd half %{.+}, 0xH3C00
+
+  ++h1;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.f16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  // NONATIVE-LLVM: %{.+} = fadd half %{.+}, 0xH3C00
+
+  // NATIVE-LLVM: %{.+} = fadd half %{.+}, 0xH3C00
+
+  --h1;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.f16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  // NONATIVE-LLVM: %{.+} = fadd half %{.+}, 0xHBC00
+
+  // NATIVE-LLVM: %{.+} = fadd half %{.+}, 0xHBC00
+
+  h1--;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.f16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  // NONATIVE-LLVM: %{.+} = fadd half %{.+}, 0xHBC00
+
+  // NATIVE-LLVM: %{.+} = fadd half %{.+}, 0xHBC00
+
+  h1 = h0 * h2;
+  //      NONATIVE: %[[#LHS:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#RHS:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHS]], %[[#RHS]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#A]] : !cir.float), !cir.f16
+
+  // NATIVE: %{{.+}} = cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#SUM:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#SUM]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fmul half %{{.+}}, %{{.+}}
+
+  h1 = h0 * (_Float16) -2.0f;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.unary(minus, %[[#B]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(mul, %[[#A]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#F]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(mul, %{{.+}}, %[[#C]]) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fmul float %[[#A]], -2.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fmul half %{{.+}}, 0xHC000
+
+  h1 = h0 * f2;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NATIVE-LLVM: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = f0 * h2;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = h0 * i0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(mul, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#D]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fpext half %[[#RHS]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fmul half %{{.+}}, %[[#A]]
+
+  h1 = (h0 / h2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(div, %[[#A]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#C]] : !cir.float), !cir.f16
+
+  // NATIVE: %{{.+}} = cir.binop(div, %{{.+}}, %{{.+}}) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fdiv half %{{.+}}, %{{.+}}
+
+  h1 = (h0 / (_Float16) -2.0f);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.unary(minus, %[[#B]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(div, %[[#A]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#F]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(div, %{{.+}}, %[[#C]]) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fdiv float %[[#A]], -2.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fdiv half %{{.+}}, 0xHC000
+
+  h1 = (h0 / f2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NATIVE-LLVM: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (f0 / h2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (h0 / i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(div, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#D]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fpext half %[[#RHS]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fdiv half %{{.+}}, %[[#A]]
+
+  h1 = (h2 + h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(add, %[[#A]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#C]] : !cir.float), !cir.f16
+
+  // NATIVE: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fadd half %{{.+}}, %{{.+}}
+
+  h1 = ((_Float16)-2.0 + h0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.f16), !cir.float
+  //      NONATIVE: %[[#E:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(add, %[[#D]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#F]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.f16
+  //      NATIVE: %{{.+}} = cir.binop(add, %[[#C]], %{{.+}}) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fadd float -2.000000e+00, %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fadd half 0xHC000, %{{.+}}
+
+  h1 = (h2 + f0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RES:]] = fadd float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NATIVE-LLVM: %[[#RES:]] = fadd float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (f2 + h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fadd float %{{.+}}, %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.=}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (h0 + i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(add, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#D]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fpext half %[[#RHS]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.=}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fadd half %{{.+}}, %[[#A]]
+
+  h1 = (h2 - h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#C]] : !cir.float), !cir.f16
+
+  // NATIVE: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fsub half %{{.+}}, %{{.+}}
+
+  h1 = ((_Float16)-2.0f - h0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.f16), !cir.float
+  //      NONATIVE: %[[#E:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(sub, %[[#D]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#F]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  //      NATIVE: %{{.+}} = cir.binop(sub, %[[#C]], %{{.+}}) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fsub float -2.000000e+00, %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fsub half 0xHC000, %{{.+}}
+
+  h1 = (h2 - f0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RES:]] = fsub float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NATIVE-LLVM: %[[#RES:]] = fsub float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (f2 - h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.=}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (h0 - i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(sub, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(floating, %[[#D]] : !cir.float), !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fpext half %[[#RHS]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fsub half %{{.+}}, %[[#A]]
+
+  test = (h2 < h0);
+  //      NONATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt half %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp olt half %{{.+}}, %{{.+}}
+
+  test = (h2 < (_Float16)42.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt half %{{.+}}, 0xH5140
+
+  // NATIVE-LLVM: %{{.+}} = fcmp olt half %{{.+}}, 0xH5140
+
+  test = (h2 < f0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt float %[[#A]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp olt float %[[#A]], %{{.+}}
+
+  test = (f2 < h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.=}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp olt float %{{.+}}, %[[#A]]
+
+  //      NATIVE-LLVM: %[[#A:]] = fpext half %{{.=}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp olt float %{{.+}}, %[[#A]]
+
+  test = (i0 < h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt half %[[#A]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fcmp olt half %[[#A]], %{{.+}}
+
+  test = (h0 < i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp olt half %{{.+}}, %[[#A]]
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp olt half %{{.+}}, %[[#A]]
+
+  test = (h0 > h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt half %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt half %{{.+}}, %{{.+}}
+
+  test = ((_Float16)42.0 > h2);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  //      NONATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  //      NATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt half 0xH5140, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt half 0xH5140, %{{.+}}
+
+  test = (h0 > f2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.=}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.=}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt float %[[#LHS]], %{{.+}}
+
+  test = (f0 > h2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt float %{{.+}}, %[[#RHS]]
+
+  test = (i0 > h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt half %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt half %[[#LHS]], %{{.+}}
+
+  test = (h0 > i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt half %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt half %{{.+}}, %[[#RHS]]
+
+  test = (h2 <= h0);
+  //      NONATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole half %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ole half %{{.+}}, %{{.+}}
+
+  test = (h2 <= (_Float16)42.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole half %{{.+}}, 0xH5140
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ole half %{{.+}}, 0xH5140
+
+  test = (h2 <= f0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp ole float %[[#LHS]], %{{.+}}
+
+  test = (f2 <= h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ole float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ole float %{{.+}}, %[[#RHS]]
+
+  test = (i0 <= h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole half %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fcmp ole half %[[#LHS]], %{{.+}}
+
+  test = (h0 <= i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ole half %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ole half %{{.+}}, %[[#RHS]]
+
+  test = (h0 >= h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+  // NONATIVE-NEXT: %{{.+}} = cir.get_global @test : !cir.ptr<!u32i>
+
+  //      NATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge half %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oge half %{{.+}}, %{{.+}}
+
+  test = (h0 >= (_Float16)-2.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#D]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.f16
+  // NATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#D]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge half %{{.+}}, 0xHC000
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oge half %{{.+}}, 0xHC000
+
+  test = (h0 >= f2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp oge float %[[#LHS]], %{{.+}}
+
+  test = (f0 >= h2);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oge float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oge float %{{.+}}, %[[#RHS]]
+
+  test = (i0 >= h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge half %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fcmp oge half %[[#LHS]], %{{.+}}
+
+  test = (h0 >= i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oge half %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oge half %{{.+}}, %[[#RHS]]
+
+  test = (h1 == h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, %{{.+}}
+
+  test = (h1 == (_Float16)1.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, 0xH3C00
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, 0xH3C00
+
+  test = (h1 == f1);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq float %[[#LHS]], %{{.+}}
+
+  test = (f1 == h1);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq float %{{.+}}, %[[#RHS]]
+
+  test = (i0 == h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq half %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq half %[[#LHS]], %{{.+}}
+
+  test = (h0 == i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %{{.=}} = fcmp oeq half %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.=}} = fcmp oeq half %{{.+}}, %[[#RHS]]
+
+  test = (h1 != h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#A]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp une half %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp une half %{{.+}}, %{{.+}}
+
+  test = (h1 != (_Float16)1.0);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.double), !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(ne, %{{.+}}, %[[#B]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#C]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp une half %{{.+}}, 0xH3C00
+
+  // NATIVE-LLVM: %{{.+}} = fcmp une half %{{.+}}, 0xH3C00
+
+  test = (h1 != f1);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp une float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.=}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp une float %[[#LHS]], %{{.+}}
+
+  test = (f1 != h1);
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp une float %{{.+}}, %[[#A]]
+
+  //      NATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp une float %{{.+}}, %[[#A]]
+
+  test = (i0 != h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM: %{{.+}} = fcmp une half %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fcmp une half %[[#LHS]], %{{.+}}
+
+  test = (h0 != i0);
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast(integral, %[[#B]] : !s32i), !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp une half %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp une half %{{.+}}, %[[#RHS]]
+
+  h1 = (h1 ? h2 : h0);
+  //      NONATIVE: %[[#A:]] = cir.cast(float_to_bool, %{{.+}} : !cir.f16), !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.ternary(%[[#A]], true {
+  //      NONATIVE:   cir.yield %{{.+}} : !cir.f16
+  // NONATIVE-NEXT: }, false {
+  //      NONATIVE:   cir.yield %{{.+}} : !cir.f16
+  // NONATIVE-NEXT: }) : (!cir.bool) -> !cir.f16
+  //      NONATIVE: %{{.+}} = cir.get_global @h1 : !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(float_to_bool, %{{.+}} : !cir.f16), !cir.bool
+  // NATIVE-NEXT: %[[#B:]] = cir.ternary(%[[#A]], true {
+  //      NATIVE:   cir.yield %{{.+}} : !cir.f16
+  // NATIVE-NEXT: }, false {
+  //      NATIVE:   cir.yield %{{.+}} : !cir.f16
+  // NATIVE-NEXT: }) : (!cir.bool) -> !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.get_global @h1 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %[[#C]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM:   %[[#A:]] = fcmp une half %{{.+}}, 0xH0000
+  // NONATIVE-LLVM-NEXT:   br i1 %[[#A]], label %[[#LABEL_A:]], label %[[#LABEL_B:]]
+  //      NONATIVE-LLVM: [[#LABEL_A]]:
+  // NONATIVE-LLVM-NEXT:   %[[#B:]] = load volatile half, ptr @h2, align 2
+  // NONATIVE-LLVM-NEXT:   br label %[[#LABEL_C:]]
+  //      NONATIVE-LLVM: [[#LABEL_B]]:
+  // NONATIVE-LLVM-NEXT:   %[[#C:]] = load volatile half, ptr @h0, align 2
+  // NONATIVE-LLVM-NEXT:   br label %[[#LABEL_C]]
+  //      NONATIVE-LLVM: [[#LABEL_C]]:
+  // NONATIVE-LLVM-NEXT:   %8 = phi half [ %[[#C]], %[[#LABEL_B]] ], [ %[[#B]], %[[#LABEL_A]] ]
+
+  //      NATIVE-LLVM:   %[[#A:]] = fcmp une half %{{.+}}, 0xH0000
+  // NATIVE-LLVM-NEXT:   br i1 %[[#A]], label %[[#LABEL_A:]], label %[[#LABEL_B:]]
+  //      NATIVE-LLVM: [[#LABEL_A]]:
+  // NATIVE-LLVM-NEXT:   %[[#B:]] = load volatile half, ptr @h2, align 2
+  // NATIVE-LLVM-NEXT:   br label %[[#LABEL_C:]]
+  //      NATIVE-LLVM: [[#LABEL_B]]:
+  // NATIVE-LLVM-NEXT:   %[[#C:]] = load volatile half, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT:   br label %[[#LABEL_C]]
+  //      NATIVE-LLVM: [[#LABEL_C]]:
+  // NATIVE-LLVM-NEXT:   %8 = phi half [ %[[#C]], %[[#LABEL_B]] ], [ %[[#B]], %[[#LABEL_A]] ]
+
+  h0 = h1;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h1 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#B]], %[[#C]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h1 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %[[#C]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h1, align 2
+  // NONATIVE-LLVM-NEXT: store volatile half %[[#A]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h1, align 2
+  // NATIVE-LLVM-NEXT: store volatile half %[[#A]], ptr @h0, align 2
+
+  h0 = (_Float16)-2.0f;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  // NONATIVE-LLVM: store volatile half 0xHC000, ptr @h0, align 2
+
+  // NATIVE-LLVM: store volatile half 0xHC000, ptr @h0, align 2
+
+  h0 = f0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @f0 : !cir.ptr<!cir.float>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.float>, !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @f0 : !cir.ptr<!cir.float>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.float>, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile float, ptr @f0, align 4
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc float %[[#A]] to half
+  // NONATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile float, ptr @f0, align 4
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptrunc float %[[#A]] to half
+  // NATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  h0 = i0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!s32i>, !s32i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(int_to_float, %[[#B]] : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!s32i>, !s32i
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(int_to_float, %[[#B]] : !s32i), !cir.f16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile i32, ptr @i0, align 4
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = sitofp i32 %[[#A]] to half
+  // NONATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile i32, ptr @i0, align 4
+  // NATIVE-LLVM-NEXT: %[[#B:]] = sitofp i32 %[[#A]] to half
+  // NATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  i0 = h0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(float_to_int, %[[#B]] : !cir.f16), !s32i
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(float_to_int, %[[#B]] : !cir.f16), !s32i
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !s32i, !cir.ptr<!s32i>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptosi half %[[#A]] to i32
+  // NONATIVE-LLVM-NEXT: store volatile i32 %[[#B]], ptr @i0, align 4
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptosi half %[[#A]] to i32
+  // NATIVE-LLVM-NEXT: store volatile i32 %[[#B]], ptr @i0, align 4
+
+  h0 += h1;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(add, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fadd half %{{.+}}, %{{.+}}
+
+  h0 += (_Float16)1.0f;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.f16), !cir.float
+  //      NONATIVE: %[[#D:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.binop(add, %[[#D]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.cast(floating, %[[#E]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#F]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.float), !cir.f16
+  //      NATIVE: %[[#C:]] = cir.binop(add, %{{.+}}, %[[#B]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fadd float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fadd half %{{.+}}, 0xH3C00
+
+  h0 += f2;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  i0 += h0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(add, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(float_to_int, %[[#C]] : !cir.float), !s32i
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(float_to_int, %[[#B]] : !cir.f16), !s32i
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATVE-LLVM: %[[#LHS:]] = sitofp i32 %3 to float
+  // NONATVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fadd half %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi half %[[#B]] to i32
+
+  h0 += i0;
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.f16), !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(add, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NATIVE: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#C:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#D:]] = fadd float %[[#C]], %[[#B]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#D]] to half
+
+  // NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fadd half %{{.+}}, %[[#A]]
+
+  h0 -= h1;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(sub, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fsub half %{{.+}}, %{{.+}}
+
+  h0 -= (_Float16)1.0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.double), !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.f16), !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(sub, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  //      NATIVE: %[[#C:]] = cir.binop(sub, %{{.+}}, %[[#B]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fsub float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fsub half %{{.+}}, 0xH3C00
+
+  h0 -= f2;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  i0 -= h0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(sub, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(float_to_int, %[[#C]] : !cir.float), !s32i
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(float_to_int, %[[#B]] : !cir.f16), !s32i
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATVE-LLVM: %[[#LHS:]] = sitofp i32 %3 to float
+  // NONATVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fsub half %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi half %[[#B]] to i32
+
+  h0 -= i0;
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.f16), !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(sub, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NATIVE: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#C:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#D:]] = fsub float %[[#C]], %[[#B]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#D]] to half
+
+  // NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fsub half %{{.+}}, %[[#A]]
+
+  h0 *= h1;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(mul, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fmul half %{{.+}}, %{{.+}}
+
+  h0 *= (_Float16)1.0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.double), !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.f16), !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(mul, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  //      NATIVE: %[[#C:]] = cir.binop(mul, %{{.+}}, %[[#B]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fmul float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fmul half %{{.+}}, 0xH3C00
+
+  h0 *= f2;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  i0 *= h0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(mul, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(float_to_int, %[[#C]] : !cir.float), !s32i
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(float_to_int, %[[#B]] : !cir.f16), !s32i
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATVE-LLVM: %[[#LHS:]] = sitofp i32 %3 to float
+  // NONATVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fmul half %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi half %[[#B]] to i32
+
+  h0 *= i0;
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.f16), !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(mul, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NATIVE: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#C:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#D:]] = fmul float %[[#C]], %[[#B]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#D]] to half
+
+  // NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fmul half %{{.+}}, %[[#A]]
+
+  h0 /= h1;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(div, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(div, %{{.+}}, %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fdiv half %{{.+}}, %{{.+}}
+
+  h0 /= (_Float16)1.0;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.f16), !cir.float
+  //      NONATIVE: %[[#D:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.binop(div, %[[#D]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.cast(floating, %[[#E]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#F]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.double), !cir.f16
+  //      NATIVE: %[[#C:]] = cir.binop(div, %{{.+}}, %[[#B]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fdiv float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fdiv half %{{.+}}, 0xH3C00
+
+  h0 /= f2;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.float), !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  i0 /= h0;
+  //      NONATIVE: %[[#A:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(div, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(float_to_int, %[[#C]] : !cir.float), !s32i
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(float_to_int, %[[#B]] : !cir.f16), !s32i
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATVE-LLVM: %[[#LHS:]] = sitofp i32 %3 to float
+  // NONATVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fdiv half %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi half %[[#B]] to i32
+
+  h0 /= i0;
+  //      NONATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast(floating, %[[#A]] : !cir.f16), !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast(floating, %{{.+}} : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(div, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast(floating, %[[#D]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast(int_to_float, %{{.+}} : !s32i), !cir.f16
+  //      NATIVE: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#C:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#D:]] = fdiv float %[[#C]], %[[#B]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#D]] to half
+
+  // NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fdiv half %{{.+}}, %[[#A]]
+
+  h0 = d0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.f16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to half
+  // NONATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to half
+  // NATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  h0 = (float)d0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.f16
+  // NONATIVE-NEXT: %[[#E:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.double), !cir.float
+  // NATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.f16
+  // NATIVE-NEXT: %[[#E:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to float
+  // NONATIVE-LLVM-NEXT: %[[#C:]] = fptrunc float %[[#B]] to half
+  // NONATIVE-LLVM-NEXT: store volatile half %[[#C]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to float
+  // NATIVE-LLVM-NEXT: %[[#C:]] = fptrunc float %[[#B]] to half
+  // NATIVE-LLVM-NEXT: store volatile half %[[#C]], ptr @h0, align 2
+
+  d0 = h0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.f16), !cir.double
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.f16), !cir.double
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NONATVE-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // NONATVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to double
+  // NONATVE-LLVM-NEXT: store volatile double %[[#B]], ptr @d0, align 8
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to double
+  // NATIVE-LLVM-NEXT: store volatile double %[[#B]], ptr @d0, align 8
+
+  d0 = (float)h0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.f16), !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.double
+  // NONATIVE-NEXT: %[[#E:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(floating, %[[#B]] : !cir.f16), !cir.float
+  // NATIVE-NEXT: %[[#D:]] = cir.cast(floating, %[[#C]] : !cir.float), !cir.double
+  // NATIVE-NEXT: %[[#E:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NONATVE-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // NONATVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  // NONATVE-LLVM-NEXT: %[[#C:]] = fpext float %[[#B]] to double
+  // NONATVE-LLVM-NEXT: store volatile double %[[#C]], ptr @d0, align 8
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  // NATIVE-LLVM-NEXT: %[[#C:]] = fpext float %[[#B]] to double
+  // NATIVE-LLVM-NEXT: store volatile double %[[#C]], ptr @d0, align 8
+
+  h0 = s0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @s0 : !cir.ptr<!s16i>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load %[[#A]] : !cir.ptr<!s16i>, !s16i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast(int_to_float, %[[#B]] : !s16i), !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @s0 : !cir.ptr<!s16i>
+  // NATIVE-NEXT: %[[#B:]] = cir.load %[[#A]] : !cir.ptr<!s16i>, !s16i
+  // NATIVE-NEXT: %[[#C:]] = cir.cast(int_to_float, %[[#B]] : !s16i), !cir.f16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load i16, ptr @s0, align 2
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = sitofp i16 %[[#A]] to half
+  // NONATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load i16, ptr @s0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = sitofp i16 %[[#A]] to half
+  // NATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+}
diff --git a/clang/test/CIR/CodeGen/forward-decls.cpp b/clang/test/CIR/CodeGen/forward-decls.cpp
new file mode 100644
index 000000000000..0fc00c000e9a
--- /dev/null
+++ b/clang/test/CIR/CodeGen/forward-decls.cpp
@@ -0,0 +1,124 @@
+// RUN: split-file %s %t
+
+
+//--- incomplete_struct
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %t/incomplete_struct -o %t/incomplete_struct.cir
+// RUN: FileCheck %s --input-file=%t/incomplete_struct.cir --check-prefix=CHECK1
+
+// Forward declaration of the record is never defined, so it is created as
+// an incomplete struct in CIR and will remain as such.
+
+// CHECK1: ![[INC_STRUCT:.+]] = !cir.struct<struct "IncompleteStruct" incomplete>
+struct IncompleteStruct;
+// CHECK1: testIncompleteStruct(%arg0: !cir.ptr<![[INC_STRUCT]]>
+void testIncompleteStruct(struct IncompleteStruct *s) {};
+
+
+
+//--- mutated_struct
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %t/mutated_struct -o %t/mutated_struct.cir
+// RUN: FileCheck %s --input-file=%t/mutated_struct.cir --check-prefix=CHECK2
+
+// Foward declaration of the struct is followed by usage, then definition.
+// This means it will initially be created as incomplete, then completed.
+
+// CHECK2: ![[COMPLETE:.+]] = !cir.struct<struct "ForwardDeclaredStruct" {!cir.int<s, 32>} #cir.record.decl.ast>
+// CHECK2: testForwardDeclaredStruct(%arg0: !cir.ptr<![[COMPLETE]]>
+struct ForwardDeclaredStruct;
+void testForwardDeclaredStruct(struct ForwardDeclaredStruct *fds) {};
+struct ForwardDeclaredStruct {
+  int testVal;
+};
+
+
+
+//--- recursive_struct
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %t/recursive_struct -o %t/recursive_struct.cir
+// RUN: FileCheck --check-prefix=CHECK3 --input-file=%t/recursive_struct.cir %s
+
+// Struct is initially forward declared since the self-reference is generated
+// first. Then, once the type is fully generated, it is completed.
+
+// CHECK3: ![[STRUCT:.+]] = !cir.struct<struct "RecursiveStruct" {!cir.int<s, 32>, !cir.ptr<!cir.struct<struct "RecursiveStruct">>} #cir.record.decl.ast>
+struct RecursiveStruct {
+  int value;
+  struct RecursiveStruct *next;
+};
+// CHECK3: testRecursiveStruct(%arg0: !cir.ptr<![[STRUCT]]>
+void testRecursiveStruct(struct RecursiveStruct *arg) {
+  // CHECK3: %[[#NEXT:]] = cir.get_member %{{.+}}[1] {name = "next"} : !cir.ptr<![[STRUCT]]> -> !cir.ptr<!cir.ptr<![[STRUCT]]>>
+  // CHECK3: %[[#DEREF:]] = cir.load %[[#NEXT]] : !cir.ptr<!cir.ptr<![[STRUCT]]>>, !cir.ptr<![[STRUCT]]>
+  // CHECK3: cir.get_member %[[#DEREF]][0] {name = "value"} : !cir.ptr<![[STRUCT]]> -> !cir.ptr<!s32i>
+  arg->next->value;
+}
+
+
+
+//--- indirect_recursive_struct
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %t/indirect_recursive_struct -o %t/indirect_recursive_struct.cir
+// RUN: FileCheck --check-prefix=CHECK4 --input-file=%t/indirect_recursive_struct.cir %s
+
+// Node B refers to A, and vice-versa, so a forward declaration is used to
+// ensure the classes can be defined. Since types alias are not yet supported
+// in recursive type, each struct is expanded until there are no more recursive
+// types, or all the recursive types are self references.
+
+// CHECK4: ![[B:.+]] = !cir.struct<struct "StructNodeB" {!cir.int<s, 32>, !cir.ptr<!cir.struct<struct "StructNodeA" {!cir.int<s, 32>, !cir.ptr<!cir.struct<struct "StructNodeB">>}
+// CHECK4: ![[A:.+]] = !cir.struct<struct "StructNodeA" {!cir.int<s, 32>, !cir.ptr<!cir.struct<struct "StructNodeB" {!cir.int<s, 32>, !cir.ptr<!cir.struct<struct "StructNodeA">>}
+struct StructNodeB;
+struct StructNodeA {
+  int value;
+  struct StructNodeB *next;
+};
+struct StructNodeB {
+  int value;
+  struct StructNodeA *next;
+};
+
+void testIndirectSelfReference(struct StructNodeA arg) {
+  // CHECK4: %[[#V1:]] = cir.get_member %{{.+}}[1] {name = "next"} : !cir.ptr<![[A]]> -> !cir.ptr<!cir.ptr<![[B]]>>
+  // CHECK4: %[[#V2:]] = cir.load %[[#V1]] : !cir.ptr<!cir.ptr<![[B]]>>, !cir.ptr<![[B]]>
+  // CHECK4: %[[#V3:]] = cir.get_member %[[#V2]][1] {name = "next"} : !cir.ptr<![[B]]> -> !cir.ptr<!cir.ptr<![[A]]>>
+  // CHECK4: %[[#V4:]] = cir.load %[[#V3]] : !cir.ptr<!cir.ptr<![[A]]>>, !cir.ptr<![[A]]>
+  // CHECK4: cir.get_member %[[#V4]][0] {name = "value"} : !cir.ptr<![[A]]> -> !cir.ptr<!s32i>
+  arg.next->next->value;
+}
+
+
+
+//--- complex_struct
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %t/complex_struct -o %t/complex_struct.cir
+// RUN: FileCheck --check-prefix=CHECK5 --input-file=%t/complex_struct.cir %s
+
+// A sizeable complex struct just to double check that stuff is working.
+// CHECK5: !cir.struct<struct "anon.0" {!cir.ptr<!cir.struct<struct "A" {!cir.struct<struct "anon.0">, !cir.struct<struct "B" {!cir.ptr<!cir.struct<struct "B">>, !cir.struct<struct "C" {!cir.ptr<!cir.struct<struct "A">>, !cir.ptr<!cir.struct<struct "B">>, !cir.ptr<!cir.struct<struct "C">>} #cir.record.decl.ast>, !cir.struct<union "anon.1" {!cir.ptr<!cir.struct<struct "A">>, !cir.struct<struct "anon.2" {!cir.ptr<!cir.struct<struct "B">>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>>} #cir.record.decl.ast>
+// CHECK5: !cir.struct<struct "C" {!cir.ptr<!cir.struct<struct "A" {!cir.struct<struct "anon.0" {!cir.ptr<!cir.struct<struct "A">>} #cir.record.decl.ast>, !cir.struct<struct "B" {!cir.ptr<!cir.struct<struct "B">>, !cir.struct<struct "C">, !cir.struct<union "anon.1" {!cir.ptr<!cir.struct<struct "A">>, !cir.struct<struct "anon.2" {!cir.ptr<!cir.struct<struct "B">>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>>, !cir.ptr<!cir.struct<struct "B" {!cir.ptr<!cir.struct<struct "B">>, !cir.struct<struct "C">, !cir.struct<union "anon.1" {!cir.ptr<!cir.struct<struct "A" {!cir.struct<struct "anon.0" {!cir.ptr<!cir.struct<struct "A">>} #cir.record.decl.ast>, !cir.struct<struct "B">} #cir.record.decl.ast>>, !cir.struct<struct "anon.2" {!cir.ptr<!cir.struct<struct "B">>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>>, !cir.ptr<!cir.struct<struct "C">>} #cir.record.decl.ast>
+// CHECK5: !cir.struct<struct "anon.2" {!cir.ptr<!cir.struct<struct "B" {!cir.ptr<!cir.struct<struct "B">>, !cir.struct<struct "C" {!cir.ptr<!cir.struct<struct "A" {!cir.struct<struct "anon.0" {!cir.ptr<!cir.struct<struct "A">>} #cir.record.decl.ast>, !cir.struct<struct "B">} #cir.record.decl.ast>>, !cir.ptr<!cir.struct<struct "B">>, !cir.ptr<!cir.struct<struct "C">>} #cir.record.decl.ast>, !cir.struct<union "anon.1" {!cir.ptr<!cir.struct<struct "A" {!cir.struct<struct "anon.0" {!cir.ptr<!cir.struct<struct "A">>} #cir.record.decl.ast>, !cir.struct<struct "B">} #cir.record.decl.ast>>, !cir.struct<struct "anon.2">} #cir.record.decl.ast>} #cir.record.decl.ast>>} #cir.record.decl.ast>
+// CHECK5: !cir.struct<union "anon.1" {!cir.ptr<!cir.struct<struct "A" {!cir.struct<struct "anon.0" {!cir.ptr<!cir.struct<struct "A">>} #cir.record.decl.ast>, !cir.struct<struct "B" {!cir.ptr<!cir.struct<struct "B">>, !cir.struct<struct "C" {!cir.ptr<!cir.struct<struct "A">>, !cir.ptr<!cir.struct<struct "B">>, !cir.ptr<!cir.struct<struct "C">>} #cir.record.decl.ast>, !cir.struct<union "anon.1">} #cir.record.decl.ast>} #cir.record.decl.ast>>, !cir.struct<struct "anon.2" {!cir.ptr<!cir.struct<struct "B" {!cir.ptr<!cir.struct<struct "B">>, !cir.struct<struct "C" {!cir.ptr<!cir.struct<struct "A" {!cir.struct<struct "anon.0" {!cir.ptr<!cir.struct<struct "A">>} #cir.record.decl.ast>, !cir.struct<struct "B">} #cir.record.decl.ast>>, !cir.ptr<!cir.struct<struct "B">>, !cir.ptr<!cir.struct<struct "C">>} #cir.record.decl.ast>, !cir.struct<union "anon.1">} #cir.record.decl.ast>>} #cir.record.decl.ast>} #cir.record.decl.ast>
+// CHECK5: !cir.struct<struct "B" {!cir.ptr<!cir.struct<struct "B">>, !cir.struct<struct "C" {!cir.ptr<!cir.struct<struct "A" {!cir.struct<struct "anon.0" {!cir.ptr<!cir.struct<struct "A">>} #cir.record.decl.ast>, !cir.struct<struct "B">} #cir.record.decl.ast>>, !cir.ptr<!cir.struct<struct "B">>, !cir.ptr<!cir.struct<struct "C">>} #cir.record.decl.ast>, !cir.struct<union "anon.1" {!cir.ptr<!cir.struct<struct "A" {!cir.struct<struct "anon.0" {!cir.ptr<!cir.struct<struct "A">>} #cir.record.decl.ast>, !cir.struct<struct "B">} #cir.record.decl.ast>>, !cir.struct<struct "anon.2" {!cir.ptr<!cir.struct<struct "B">>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>
+// CHECK5: !cir.struct<struct "A" {!cir.struct<struct "anon.0" {!cir.ptr<!cir.struct<struct "A">>} #cir.record.decl.ast>, !cir.struct<struct "B" {!cir.ptr<!cir.struct<struct "B">>, !cir.struct<struct "C" {!cir.ptr<!cir.struct<struct "A">>, !cir.ptr<!cir.struct<struct "B">>, !cir.ptr<!cir.struct<struct "C">>} #cir.record.decl.ast>, !cir.struct<union "anon.1" {!cir.ptr<!cir.struct<struct "A">>, !cir.struct<struct "anon.2" {!cir.ptr<!cir.struct<struct "B">>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>
+struct A {
+  struct {
+    struct A *a1;
+  };
+  struct B {
+    struct B *b1;
+    struct C {
+      struct A *a2;
+      struct B *b2;
+      struct C *c1;
+    } c;
+    union {
+      struct A *a2;
+      struct {
+        struct B *b3;
+      };
+    } u;
+  } b;
+};
+void test(struct A *a){};
diff --git a/clang/test/CIR/CodeGen/fullexpr.cpp b/clang/test/CIR/CodeGen/fullexpr.cpp
new file mode 100644
index 000000000000..a83ce7d530cc
--- /dev/null
+++ b/clang/test/CIR/CodeGen/fullexpr.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int go(int const& val);
+
+int go1() {
+  auto x = go(1);
+  return x;
+}
+
+// CHECK: cir.func @_Z3go1v() -> !s32i
+// CHECK: %[[#XAddr:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK: %[[#RVal:]] = cir.scope {
+// CHECK-NEXT:   %[[#TmpAddr:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp0", init] {alignment = 4 : i64}
+// CHECK-NEXT:   %[[#One:]] = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   cir.store %[[#One]], %[[#TmpAddr]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   %[[#RValTmp:]] = cir.call @_Z2goRKi(%[[#TmpAddr]]) : (!cir.ptr<!s32i>) -> !s32i
+// CHECK-NEXT:   cir.yield %[[#RValTmp]] : !s32i
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.store %[[#RVal]], %[[#XAddr]] : !s32i, !cir.ptr<!s32i>
diff --git a/clang/test/CIR/CodeGen/fun-ptr.c b/clang/test/CIR/CodeGen/fun-ptr.c
new file mode 100644
index 000000000000..e66b7dd0f0b1
--- /dev/null
+++ b/clang/test/CIR/CodeGen/fun-ptr.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o -  | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -x c++ -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o -  | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+// RUN: %clang_cc1 -x c++ -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+typedef struct {
+    int a;
+    int b;
+} Data;
+
+typedef int (*fun_t)(Data* d);
+
+struct A;
+typedef int (*fun_typ)(struct A*);
+
+typedef struct A {
+  fun_typ fun;
+} A;
+
+// CIR: !ty_A = !cir.struct<struct "A" {!cir.ptr<!cir.func<!cir.int<s, 32> (!cir.ptr<!cir.struct<struct "A">>)>>} #cir.record.decl.ast>
+A a = {(fun_typ)0};
+
+int extract_a(Data* d) {
+    return d->a;
+}
+
+// CIR: cir.func {{@.*foo.*}}(%arg0: !cir.ptr<!ty_Data>
+// CIR:   [[TMP0:%.*]] = cir.alloca !cir.ptr<!ty_Data>, !cir.ptr<!cir.ptr<!ty_Data>>, ["d", init]
+// CIR:   [[TMP1:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   [[TMP2:%.*]] = cir.alloca !cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_Data>)>>, !cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_Data>)>>>, ["f", init]
+// CIR:   cir.store %arg0, [[TMP0]] : !cir.ptr<!ty_Data>, !cir.ptr<!cir.ptr<!ty_Data>>
+// CIR:   [[TMP3:%.*]] = cir.const #cir.ptr<null> : !cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_Data>)>>
+// CIR:   cir.store [[TMP3]], [[TMP2]] : !cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_Data>)>>, !cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_Data>)>>>
+// CIR:   [[TMP4:%.*]] = cir.get_global {{@.*extract_a.*}} : !cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_Data>)>>
+// CIR:   cir.store [[TMP4]], [[TMP2]] : !cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_Data>)>>, !cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_Data>)>>>
+// CIR:   [[TMP5:%.*]] = cir.load [[TMP2]] : !cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_Data>)>>>, !cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_Data>)>>
+// CIR:   [[TMP6:%.*]] = cir.load [[TMP0]] : !cir.ptr<!cir.ptr<!ty_Data>>, !cir.ptr<!ty_Data>
+// CIR:   [[TMP7:%.*]] = cir.call [[TMP5]]([[TMP6]]) : (!cir.ptr<!cir.func<!s32i (!cir.ptr<!ty_Data>)>>, !cir.ptr<!ty_Data>) -> !s32i
+// CIR:   cir.store [[TMP7]], [[TMP1]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: define dso_local i32 {{@.*foo.*}}(ptr %0)
+// LLVM:   [[TMP1:%.*]] = alloca ptr, i64 1
+// LLVM:   [[TMP2:%.*]] = alloca i32, i64 1
+// LLVM:   [[TMP3:%.*]] = alloca ptr, i64 1
+// LLVM:   store ptr %0, ptr [[TMP1]]
+// LLVM:   store ptr null, ptr [[TMP3]]
+// LLVM:   store ptr {{@.*extract_a.*}}, ptr [[TMP3]]
+// LLVM:   [[TMP4:%.*]] = load ptr, ptr [[TMP3]]
+// LLVM:   [[TMP5:%.*]] = load ptr, ptr [[TMP1]]
+// LLVM:   [[TMP6:%.*]] = call i32 [[TMP4]](ptr [[TMP5]])
+// LLVM:   store i32 [[TMP6]], ptr [[TMP2]]
+int foo(Data* d) {
+    fun_t f = 0;
+    f = extract_a;
+    return f(d);
+}
+
+// CIR:  cir.func private {{@.*test.*}}() -> !cir.ptr<!cir.func<!void ()>>
+// CIR:  cir.func {{@.*bar.*}}()
+// CIR:    [[RET:%.*]] = cir.call {{@.*test.*}}() : () -> !cir.ptr<!cir.func<!void ()>>
+// CIR:    cir.call [[RET]]() : (!cir.ptr<!cir.func<!void ()>>) -> ()
+// CIR:    cir.return
+
+// LLVM: declare {{.*}} ptr {{@.*test.*}}()
+// LLVM: define dso_local void {{@.*bar.*}}()
+// LLVM:   [[RET:%.*]] = call ptr {{@.*test.*}}()
+// LLVM:   call void [[RET]]()
+// LLVM:   ret void
+void (*test(void))(void);
+void bar(void) {
+  test()();
+}
diff --git a/clang/test/CIR/CodeGen/func_dsolocal_pie.c b/clang/test/CIR/CodeGen/func_dsolocal_pie.c
new file mode 100644
index 000000000000..94f0dda5392f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/func_dsolocal_pie.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -pic-is-pie -pic-level 1 %s -o %t1.cir
+// RUN: FileCheck --input-file=%t1.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm -pic-is-pie -pic-level 1 %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+void foo(int i) {
+
+}
+
+int main() {
+  foo(2);
+  return 0;
+}
+
+// CIR: cir.func @foo(%arg0: !s32i
+// CIR-NEXT:   [[TMP0:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CIR-NEXT:   cir.store %arg0, [[TMP0]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:   cir.return
+
+// CIR: cir.func no_proto @main() -> !s32i
+// CIR: [[TMP1:%.*]] = cir.const #cir.int<2> : !s32i
+// CIR: cir.call @foo([[TMP1]]) : (!s32i) -> ()
+
+// LLVM: define dso_local void @foo(i32 [[TMP3:%.*]])
+// LLVM: [[ARG_STACK:%.*]] = alloca i32, i64 1, align 4,
+// LLVM: store i32 [[TMP3]], ptr [[ARG_STACK]], align 4
+// LLVM: ret void,
+
+// LLVM: define dso_local i32 @main()
+// LLVM: [[TMP4:%.*]] = alloca i32, i64 1, align 4,
+// LLVM: call void @foo(i32 2),
+// LLVM: store i32 0, ptr [[TMP4]], align 4
+// LLVM: [[RET_VAL:%.*]] = load i32, ptr [[TMP4]], align 4
+// LLVM: ret i32 [[RET_VAL]],
diff --git a/clang/test/CIR/CodeGen/function-attrs.cpp b/clang/test/CIR/CodeGen/function-attrs.cpp
new file mode 100644
index 000000000000..8ded0a7d9730
--- /dev/null
+++ b/clang/test/CIR/CodeGen/function-attrs.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+
+inline int s0(int a, int b) {
+  int x = a + b;
+  return x;
+}
+
+__attribute__((noinline))
+int s1(int a, int b) {
+  return s0(a,b);
+}
+
+__attribute__((always_inline))
+int s2(int a, int b) {
+  return s0(a,b);
+}
+
+int s3(int a, int b) {
+  int x = a + b;
+  return x;
+}
+
+// CIR: #fn_attr = #cir<extra({inline = #cir.inline<hint>, nothrow = #cir.nothrow})>
+// CIR: #fn_attr1 = #cir<extra({inline = #cir.inline<no>, nothrow = #cir.nothrow})>
+// CIR: #fn_attr2 = #cir<extra({inline = #cir.inline<always>, nothrow = #cir.nothrow})>
+
+// CIR:   cir.func linkonce_odr @_Z2s0ii(%arg0:{{.*}}, %arg1:{{.*}} -> {{.*}} extra(#fn_attr)
+// CIR:   cir.func @_Z2s1ii(%arg0:{{.*}}, %arg1:{{.*}} -> {{.*}} extra(#fn_attr1)
+// CIR:   cir.func @_Z2s2ii(%arg0:{{.*}}, %arg1:{{.*}} -> {{.*}} extra(#fn_attr2)
+// CIR:   cir.func @_Z2s3ii(%arg0:{{.*}}, %arg1:{{.*}} -> {{.*}} {
+
+// LLVM: define dso_local i32 @_Z2s1ii(i32 %0, i32 %1) {{.*}} #[[#ATTR1:]]
+// LLVM: define dso_local i32 @_Z2s2ii(i32 %0, i32 %1) {{.*}} #[[#ATTR2:]]
+// LLVM: attributes #[[#ATTR1]] = {{.*}} noinline
+// LLVM: attributes #[[#ATTR2]] = {{.*}} alwaysinline
diff --git a/clang/test/CIR/CodeGen/global-ctor-dtor.cpp b/clang/test/CIR/CodeGen/global-ctor-dtor.cpp
new file mode 100644
index 000000000000..230b223b0040
--- /dev/null
+++ b/clang/test/CIR/CodeGen/global-ctor-dtor.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=BEFORE --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t2.cir 2>&1
+// RUN: FileCheck --check-prefix=AFTER --input-file=%t2.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+extern int bar();
+void foo(void) __attribute__((constructor));
+void foo(void) {
+  bar();
+}
+
+// BEFORE: cir.func @_Z3foov() global_ctor(65535)
+
+void foo2(void) __attribute__((constructor(777)));
+void foo2(void) {
+  bar();
+}
+
+// BEFORE: cir.func @_Z4foo2v() global_ctor(777)
+
+void foo3(void) __attribute__((destructor));
+void foo3(void) {
+  bar();
+}
+
+// BEFORE: cir.func @_Z4foo3v() global_dtor(65535)
+
+void foo4(void) __attribute__((destructor(789)));
+void foo4(void) {
+  bar();
+}
+
+// BEFORE: cir.func @_Z4foo4v() global_dtor(789)
+
+// AFTER: module @{{.*}} attributes {cir.global_ctors = [#cir.global_ctor<"_Z3foov", 65535>, #cir.global_ctor<"_Z4foo2v", 777>], cir.global_dtors = [#cir.global_dtor<"_Z4foo3v", 65535>, #cir.global_dtor<"_Z4foo4v", 789>]
+// LLVM: @llvm.global_ctors = appending constant [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_Z3foov, ptr null }, { i32, ptr, ptr } { i32 777, ptr @_Z4foo2v, ptr null }]
+// LLVM-NEXT: @llvm.global_dtors = appending constant [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_Z4foo3v, ptr null }, { i32, ptr, ptr } { i32 789, ptr @_Z4foo4v, ptr null }]
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/global-new.cpp b/clang/test/CIR/CodeGen/global-new.cpp
new file mode 100644
index 000000000000..bc9e792aaa7b
--- /dev/null
+++ b/clang/test/CIR/CodeGen/global-new.cpp
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=CIR_BEFORE
+// RUN: FileCheck %s -check-prefix=CIR_AFTER --input-file=%t.cir
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck %s -check-prefix=LLVM --input-file=%t.ll
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-cir -fexceptions -fcxx-exceptions %s -o %t.eh.cir
+// RUN: FileCheck %s -check-prefix=CIR_EH --input-file=%t.eh.cir
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-cir-flat -fexceptions -fcxx-exceptions %s -o %t.eh.flat.cir
+// RUN: FileCheck %s -check-prefix=CIR_FLAT_EH --input-file=%t.eh.flat.cir
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-llvm -fexceptions -fcxx-exceptions %s -o %t.eh.ll
+// RUN: FileCheck %s -check-prefix=LLVM_EH --input-file=%t.eh.ll
+
+struct e { e(int); };
+e *g = new e(0);
+
+// CIR_BEFORE: ![[ty:.*]] = !cir.struct<struct "e" {!cir.int<u, 8>}
+
+// CIR_BEFORE: cir.global  external @g = ctor : !cir.ptr<![[ty]]> {
+// CIR_BEFORE:     %[[GlobalAddr:.*]] = cir.get_global @g : !cir.ptr<!cir.ptr<![[ty]]>>
+// CIR_BEFORE:     %[[Size:.*]] = cir.const #cir.int<1> : !u64i
+// CIR_BEFORE:     %[[NewAlloc:.*]] = cir.call @_Znwm(%[[Size]]) : (!u64i) -> !cir.ptr<!void>
+// CIR_BEFORE:     %[[NewCasted:.*]] = cir.cast(bitcast, %[[NewAlloc]] : !cir.ptr<!void>), !cir.ptr<![[ty]]>
+// CIR_BEFORE:     %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR_BEFORE:     cir.call @_ZN1eC1Ei(%[[NewCasted]], %[[ZERO]]) : (!cir.ptr<![[ty]]>, !s32i) -> ()
+// CIR_BEFORE:     cir.store %3, %[[GlobalAddr]] : !cir.ptr<![[ty]]>, !cir.ptr<!cir.ptr<![[ty]]>>
+// CIR_BEFORE: }
+
+// CIR_AFTER:  {{%.*}} = cir.const #cir.int<1> : !u64i
+// CIR_AFTER:  {{%.*}} = cir.call @_Znwm(%1) : (!u64i) -> !cir.ptr<!void>
+
+// CIR_EH: cir.try synthetic cleanup {
+// CIR_EH:   cir.call exception @_ZN1eC1Ei{{.*}} cleanup {
+// CIR_EH:     cir.call @_ZdlPvm
+// CIR_EH:     cir.yield
+// CIR_EH:   }
+// CIR_EH:   cir.yield
+// CIR_EH: } catch [#cir.unwind {
+// CIR_EH:   cir.resume
+// CIR_EH: }]
+
+// CIR_FLAT_EH: cir.func internal private  @__cxx_global_var_init()
+// CIR_FLAT_EH: ^bb3:
+// CIR_FLAT_EH:   %exception_ptr, %type_id = cir.eh.inflight_exception
+// CIR_FLAT_EH:   cir.call @_ZdlPvm({{.*}}) : (!cir.ptr<!void>, !u64i) -> ()
+// CIR_FLAT_EH:   cir.br ^bb4(%exception_ptr, %type_id : !cir.ptr<!void>, !u32i)
+
+// LLVM_EH: define internal void @__cxx_global_var_init() personality ptr @__gxx_personality_v0
+// LLVM_EH:   call ptr @_Znwm(i64 1)
+// LLVM_EH:   br label %[[L2:.*]],
+
+// LLVM_EH: [[L2]]:
+// LLVM_EH:   invoke void @_ZN1eC1Ei
+// LLVM_EH:           to label %[[CONT:.*]] unwind label %[[PAD:.*]],
+
+// LLVM_EH: [[CONT]]:
+// LLVM_EH:   br label %[[END:.*]],
+
+// LLVM_EH: [[PAD]]:
+// LLVM_EH:   landingpad { ptr, i32 }
+// LLVM_EH:      cleanup
+// LLVM_EH:   call void @_ZdlPvm
+// LLVM_EH:   br label %[[RESUME:.*]],
+
+// LLVM_EH: [[RESUME]]:
+// LLVM_EH:   resume { ptr, i32 }
+
+// LLVM_EH: [[END]]:
+// LLVM_EH:   store ptr {{.*}}, ptr @g, align 8
+// LLVM_EH:   ret void
+// LLVM_EH: }
+
+// LLVM-DAG: @llvm.global_ctors = appending constant [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65536, ptr @__cxx_global_var_init, ptr null }, { i32, ptr, ptr } { i32 65536, ptr @__cxx_global_var_init.1, ptr null }]
+// LLVM: define internal void @__cxx_global_var_init()
+// LLVM: call ptr @_Znwm(i64 1)
+
+// LLVM: define internal void @__cxx_global_var_init.1()
+// LLVM:   call ptr @_Znwm(i64 1)
+
+// LLVM: define void @_GLOBAL__sub_I_global_new.cpp()
+// LLVM:   call void @__cxx_global_var_init()
+// LLVM:   call void @__cxx_global_var_init.1()
+
+struct PackedStruct {
+};
+PackedStruct*const packed_2 = new PackedStruct();
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/globals-neg-index-array.c b/clang/test/CIR/CodeGen/globals-neg-index-array.c
new file mode 100644
index 000000000000..7f7a80ea2c9e
--- /dev/null
+++ b/clang/test/CIR/CodeGen/globals-neg-index-array.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct __attribute__((packed)) PackedStruct {
+    char a1;
+    char a2;
+    char a3;
+};
+struct PackedStruct packed[10];
+char *packed_element = &(packed[-2].a3);
+// CHECK: cir.global  external @packed = #cir.zero : !cir.array<!ty_PackedStruct x 10> {alignment = 16 : i64} loc(#loc5)
+// CHECK: cir.global  external @packed_element = #cir.global_view<@packed, [-2 : i32, 2 : i32]>
+// LLVM: @packed = global [10 x %struct.PackedStruct] zeroinitializer
+// LLVM: @packed_element = global ptr getelementptr inbounds ([10 x %struct.PackedStruct], ptr @packed, i32 -2, i32 2)
diff --git a/clang/test/CIR/CodeGen/globals.c b/clang/test/CIR/CodeGen/globals.c
new file mode 100644
index 000000000000..48a4db18bb63
--- /dev/null
+++ b/clang/test/CIR/CodeGen/globals.c
@@ -0,0 +1,107 @@
+// There seems to be some differences in how constant expressions are evaluated
+// in C vs C++. This causees the code gen for C initialized globals to be a
+// bit different from the C++ version. This test ensures that these differences
+// are accounted for.
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+char string[] = "whatnow";
+// CHECK: cir.global external @string = #cir.const_array<"whatnow\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8>
+char big_string[100000] = "123";
+// CHECK: cir.global external @big_string = #cir.const_array<"123" : !cir.array<!s8i x 3>, trailing_zeros> : !cir.array<!s8i x 100000>
+int sint[] = {123, 456, 789};
+// CHECK: cir.global external @sint = #cir.const_array<[#cir.int<123> : !s32i, #cir.int<456> : !s32i, #cir.int<789> : !s32i]> : !cir.array<!s32i x 3>
+int filler_sint[4] = {1, 2}; // Ensure missing elements are zero-initialized.
+// CHECK: cir.global external @filler_sint = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.array<!s32i x 4>
+int excess_sint[2] = {1, 2, 3, 4}; // Ensure excess elements are ignored.
+// CHECK: cir.global external @excess_sint = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i]> : !cir.array<!s32i x 2>
+float flt[] = {1.0, 2.0};
+// CHECK: cir.global external @flt = #cir.const_array<[#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float]> : !cir.array<!cir.float x 2>
+
+// Tentative definition is just a declaration.
+int tentativeB;
+int tentativeB = 1;
+// CHECK: cir.global external @tentativeB = #cir.int<1> : !s32i
+
+// Tentative incomplete definition is just a declaration.
+int tentativeE[];
+int tentativeE[2] = {1, 2};
+// CHECK: cir.global external @tentativeE = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i]> : !cir.array<!s32i x 2>
+
+int twoDim[2][2] = {{1, 2}, {3, 4}};
+// CHECK: cir.global external @twoDim = #cir.const_array<[#cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i]> : !cir.array<!s32i x 2>, #cir.const_array<[#cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 2>]> : !cir.array<!cir.array<!s32i x 2> x 2>
+
+struct {
+  int x;
+  int y[2][2];
+} nestedTwoDim = {1, {{2, 3}, {4, 5}}};
+// CHECK: cir.global external @nestedTwoDim = #cir.const_struct<{#cir.int<1> : !s32i, #cir.const_array<[#cir.const_array<[#cir.int<2> : !s32i, #cir.int<3> : !s32i]> : !cir.array<!s32i x 2>, #cir.const_array<[#cir.int<4> : !s32i, #cir.int<5> : !s32i]> : !cir.array<!s32i x 2>]> : !cir.array<!cir.array<!s32i x 2> x 2>}>
+
+struct {
+  char x[3];
+  char y[3];
+  char z[3];
+} nestedString = {"1", "", "\0"};
+// CHECK: cir.global external @nestedString = #cir.const_struct<{#cir.const_array<"1" : !cir.array<!s8i x 1>, trailing_zeros> : !cir.array<!s8i x 3>, #cir.zero : !cir.array<!s8i x 3>, #cir.zero : !cir.array<!s8i x 3>}>
+
+struct {
+  char *name;
+} nestedStringPtr = {"1"};
+// CHECK: cir.global external @nestedStringPtr = #cir.const_struct<{#cir.global_view<@".str"> : !cir.ptr<!s8i>}>
+
+int *globalPtr = &nestedString.y[1];
+// CHECK: cir.global external @globalPtr = #cir.global_view<@nestedString, [1 : i32, 1 : i32]> : !cir.ptr<!s32i>
+
+const int i = 12;
+int i2 = i;
+struct { int i; } i3 = {i};
+// CHECK: cir.global external @i2 = #cir.int<12> : !s32i
+// CHECK: cir.global external @i3 = #cir.const_struct<{#cir.int<12> : !s32i}> : !ty_anon2E3_
+
+int a[10][10][10];
+int *a2 = &a[3][0][8];
+struct { int *p; } a3 = {&a[3][0][8]};
+// CHECK: cir.global external @a2 = #cir.global_view<@a, [3 : i32, 0 : i32, 8 : i32]> : !cir.ptr<!s32i>
+// CHECK: cir.global external @a3 = #cir.const_struct<{#cir.global_view<@a, [3 : i32, 0 : i32, 8 : i32]> : !cir.ptr<!s32i>}> : !ty_anon2E4_
+
+int p[10];
+int *p1 = &p[0];
+struct { int *x; } p2 = {&p[0]};
+// CHECK: cir.global external @p1 = #cir.global_view<@p> : !cir.ptr<!s32i>
+// CHECK: cir.global external @p2 = #cir.const_struct<{#cir.global_view<@p> : !cir.ptr<!s32i>}> : !ty_anon2E5_
+
+int q[10];
+int *q1 = q;
+struct { int *x; } q2 = {q};
+// CHECK: cir.global external @q1 = #cir.global_view<@q> : !cir.ptr<!s32i>
+// CHECK: cir.global external @q2 = #cir.const_struct<{#cir.global_view<@q> : !cir.ptr<!s32i>}> : !ty_anon2E6_
+
+int foo() {
+    extern int optind;
+    return optind;
+}
+// CHECK: cir.global "private" external @optind : !s32i
+// CHECK: cir.func {{.*@foo}}
+// CHECK:   {{.*}} = cir.get_global @optind : !cir.ptr<!s32i>
+
+struct Glob {
+  double a[42];
+  int pad1[3];
+  double b[42];
+} glob;
+
+double *const glob_ptr = &glob.b[1];
+// CHECK: cir.global external @glob_ptr = #cir.global_view<@glob, [2 : i32, 1 : i32]> : !cir.ptr<!cir.double>
+
+// TODO: test tentatives with internal linkage.
+
+// Tentative definition is THE definition. Should be zero-initialized.
+int tentativeA;
+float tentativeC;
+int tentativeD[];
+float zeroInitFlt[2];
+// CHECK: cir.global external @tentativeA = #cir.int<0> : !s32i
+// CHECK: cir.global external @tentativeC = #cir.fp<0.000000e+00> : !cir.float
+// CHECK: cir.global external @tentativeD = #cir.zero : !cir.array<!s32i x 1>
+// CHECK: cir.global external @zeroInitFlt = #cir.zero : !cir.array<!cir.float x 2>
diff --git a/clang/test/CIR/CodeGen/globals.cpp b/clang/test/CIR/CodeGen/globals.cpp
new file mode 100644
index 000000000000..4df6dface2c6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/globals.cpp
@@ -0,0 +1,133 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int a = 3;
+const int b = 4; // unless used wont be generated
+
+unsigned long int c = 2;
+int d = a;
+bool e;
+float y = 3.4;
+double w = 4.3;
+char x = '3';
+unsigned char rgb[3] = {0, 233, 33};
+char alpha[4] = "abc";
+const char *s = "example";
+const char *s1 = "example1";
+const char *s2 = "example";
+
+void use_global() {
+  int li = a;
+}
+
+void use_global_string() {
+  unsigned char c = s2[0];
+}
+
+template <typename T>
+T func() {
+  return T();
+}
+
+int use_func() { return func<int>(); }
+
+// CHECK: module {{.*}} {
+// CHECK-NEXT: cir.global external @a = #cir.int<3> : !s32i
+// CHECK-NEXT: cir.global external @c = #cir.int<2> : !u64i
+// CHECK-NEXT: cir.global external @d = #cir.int<0> : !s32i
+
+// CHECK-NEXT: cir.func internal private @__cxx_global_var_init()
+// CHECK-NEXT:   [[TMP0:%.*]] = cir.get_global @d : !cir.ptr<!s32i>
+// CHECK-NEXT:   [[TMP1:%.*]] = cir.get_global @a : !cir.ptr<!s32i>
+// CHECK-NEXT:   [[TMP2:%.*]] = cir.load [[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   cir.store [[TMP2]], [[TMP0]] : !s32i, !cir.ptr<!s32i>
+
+// CHECK: cir.global external @e = #false
+// CHECK-NEXT: cir.global external @y = #cir.fp<3.400000e+00> : !cir.float
+// CHECK-NEXT: cir.global external @w = #cir.fp<4.300000e+00> : !cir.double
+// CHECK-NEXT: cir.global external @x = #cir.int<51> : !s8i
+// CHECK-NEXT: cir.global external @rgb = #cir.const_array<[#cir.int<0> : !u8i, #cir.int<233> : !u8i, #cir.int<33> : !u8i]> : !cir.array<!u8i x 3>
+// CHECK-NEXT: cir.global external @alpha = #cir.const_array<"abc\00" : !cir.array<!s8i x 4>> : !cir.array<!s8i x 4>
+
+// CHECK-NEXT: cir.global "private" constant internal dsolocal @".str" = #cir.const_array<"example\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8> {alignment = 1 : i64}
+// CHECK-NEXT: cir.global external @s = #cir.global_view<@".str"> : !cir.ptr<!s8i>
+
+// CHECK-NEXT: cir.global "private" constant internal dsolocal @".str1" = #cir.const_array<"example1\00" : !cir.array<!s8i x 9>> : !cir.array<!s8i x 9> {alignment = 1 : i64}
+// CHECK-NEXT: cir.global external @s1 = #cir.global_view<@".str1"> : !cir.ptr<!s8i>
+
+// CHECK-NEXT: cir.global external @s2 = #cir.global_view<@".str"> : !cir.ptr<!s8i>
+
+//      CHECK: cir.func @_Z10use_globalv()
+// CHECK-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["li", init] {alignment = 4 : i64}
+// CHECK-NEXT:     %1 = cir.get_global @a : !cir.ptr<!s32i>
+// CHECK-NEXT:     %2 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+
+//      CHECK: cir.func @_Z17use_global_stringv()
+// CHECK-NEXT:   %0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["c", init] {alignment = 1 : i64}
+// CHECK-NEXT:   %1 = cir.get_global @s2 : !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:   %2 = cir.load %1 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CHECK-NEXT:   %3 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:   %4 = cir.ptr_stride(%2 : !cir.ptr<!s8i>, %3 : !s32i), !cir.ptr<!s8i>
+// CHECK-NEXT:   %5 = cir.load %4 : !cir.ptr<!s8i>, !s8i
+// CHECK-NEXT:   %6 = cir.cast(integral, %5 : !s8i), !u8i
+// CHECK-NEXT:   cir.store %6, %0 : !u8i, !cir.ptr<!u8i>
+// CHECK-NEXT:   cir.return
+
+//      CHECK:  cir.func linkonce_odr @_Z4funcIiET_v() -> !s32i
+// CHECK-NEXT:    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:    %1 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:    cir.return %2 : !s32i
+// CHECK-NEXT:  }
+// CHECK-NEXT:  cir.func @_Z8use_funcv() -> !s32i
+// CHECK-NEXT:    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:    %1 = cir.call @_Z4funcIiET_v() : () -> !s32i
+// CHECK-NEXT:    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:    cir.return %2 : !s32i
+// CHECK-NEXT:  }
+
+
+char string[] = "whatnow";
+// CHECK: cir.global external @string = #cir.const_array<"whatnow\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8>
+unsigned uint[] = {255};
+// CHECK: cir.global external @uint = #cir.const_array<[#cir.int<255> : !u32i]> : !cir.array<!u32i x 1>
+short sshort[] = {11111, 22222};
+// CHECK: cir.global external @sshort = #cir.const_array<[#cir.int<11111> : !s16i, #cir.int<22222> : !s16i]> : !cir.array<!s16i x 2>
+int sint[] = {123, 456, 789};
+// CHECK: cir.global external @sint = #cir.const_array<[#cir.int<123> : !s32i, #cir.int<456> : !s32i, #cir.int<789> : !s32i]> : !cir.array<!s32i x 3>
+long long ll[] = {999999999, 0, 0, 0};
+// CHECK: cir.global external @ll = #cir.const_array<[#cir.int<999999999> : !s64i, #cir.int<0> : !s64i, #cir.int<0> : !s64i, #cir.int<0> : !s64i]> : !cir.array<!s64i x 4>
+
+void get_globals() {
+  // CHECK: cir.func @_Z11get_globalsv()
+  char *s = string;
+  // CHECK: %[[RES:[0-9]+]] = cir.get_global @string : !cir.ptr<!cir.array<!s8i x 8>>
+  // CHECK: %{{[0-9]+}} = cir.cast(array_to_ptrdecay, %[[RES]] : !cir.ptr<!cir.array<!s8i x 8>>), !cir.ptr<!s8i>
+  unsigned *u = uint;
+  // CHECK: %[[RES:[0-9]+]] = cir.get_global @uint : !cir.ptr<!cir.array<!u32i x 1>>
+  // CHECK: %{{[0-9]+}} = cir.cast(array_to_ptrdecay, %[[RES]] : !cir.ptr<!cir.array<!u32i x 1>>), !cir.ptr<!u32i>
+  short *ss = sshort;
+  // CHECK: %[[RES:[0-9]+]] = cir.get_global @sshort : !cir.ptr<!cir.array<!s16i x 2>>
+  // CHECK: %{{[0-9]+}} = cir.cast(array_to_ptrdecay, %[[RES]] : !cir.ptr<!cir.array<!s16i x 2>>), !cir.ptr<!s16i>
+  int *si = sint;
+  // CHECK: %[[RES:[0-9]+]] = cir.get_global @sint : !cir.ptr<!cir.array<!s32i x 3>>
+  // CHECK: %{{[0-9]+}} = cir.cast(array_to_ptrdecay, %[[RES]] : !cir.ptr<!cir.array<!s32i x 3>>), !cir.ptr<!s32i>
+  long long *l = ll;
+  // CHECK: %[[RES:[0-9]+]] = cir.get_global @ll : !cir.ptr<!cir.array<!s64i x 4>>
+  // CHECK: %{{[0-9]+}} = cir.cast(array_to_ptrdecay, %[[RES]] : !cir.ptr<!cir.array<!s64i x 4>>), !cir.ptr<!s64i>
+}
+
+// Should generate extern global variables.
+extern int externVar;
+int testExternVar(void) { return externVar; }
+// CHECK: cir.global "private" external @externVar : !s32i
+// CHECK: cir.func @{{.+}}testExternVar
+// CHECK:   cir.get_global @externVar : !cir.ptr<!s32i>
+
+// Should constant initialize global with constant address.
+int var = 1;
+int *constAddr = &var;
+// CHECK-DAG: cir.global external @constAddr = #cir.global_view<@var> : !cir.ptr<!s32i>
diff --git a/clang/test/CIR/CodeGen/gnu-extension.c b/clang/test/CIR/CodeGen/gnu-extension.c
new file mode 100644
index 000000000000..7386de78176f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/gnu-extension.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int foo(void) { return __extension__ 0b101010; }
+
+//CHECK: cir.func @foo()
+//CHECK-NEXT:    [[ADDR:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+//CHECK-NEXT:    [[VAL:%.*]] = cir.const #cir.int<42> : !s32i
+//CHECK-NEXT:    cir.store [[VAL]], [[ADDR]] : !s32i, !cir.ptr<!s32i>
+//CHECK-NEXT:    [[LOAD_VAL:%.*]] = cir.load [[ADDR]] : !cir.ptr<!s32i>, !s32i
+//CHECK-NEXT:    cir.return [[LOAD_VAL]] : !s32i
+
+void bar(void) {
+  __extension__ bar;
+}
+
+//CHECK:  cir.func @bar()
+//CHECK:    {{.*}} = cir.get_global @bar : !cir.ptr<!cir.func<!void ()>>
+//CHECK:    cir.return
diff --git a/clang/test/CIR/CodeGen/gnu89.c b/clang/test/CIR/CodeGen/gnu89.c
new file mode 100644
index 000000000000..5254576779aa
--- /dev/null
+++ b/clang/test/CIR/CodeGen/gnu89.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -std=gnu89 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void foo() {}
+//CHECK: cir.func {{.*@foo}}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/goto.cpp b/clang/test/CIR/CodeGen/goto.cpp
new file mode 100644
index 000000000000..2200fc98cfac
--- /dev/null
+++ b/clang/test/CIR/CodeGen/goto.cpp
@@ -0,0 +1,360 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir-flat %s -o %t1.cir
+// RUN: FileCheck --input-file=%t1.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t2.cir
+// RUN: FileCheck --input-file=%t2.cir %s -check-prefix=NOFLAT
+
+
+void g0(int a) {
+  int b = a;
+  goto end;
+  b = b + 1;
+end:
+  b = b + 2;
+}
+
+// CHECK:   cir.func @_Z2g0i
+// CHECK-NEXT  %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK-NEXT  %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+// CHECK-NEXT  cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT  %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT  cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT  cir.br ^bb2
+// CHECK-NEXT ^bb1:  // no predecessors
+// CHECK-NEXT   %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT   %4 = cir.const 1 : !s32i
+// CHECK-NEXT   %5 = cir.binop(add, %3, %4) : !s32i
+// CHECK-NEXT   cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT   cir.br ^bb2
+// CHECK-NEXT ^bb2:  // 2 preds: ^bb0, ^bb1
+// CHECK-NEXT   %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT   %7 = cir.const 2 : !s32i
+// CHECK-NEXT   %8 = cir.binop(add, %6, %7) : !s32i
+// CHECK-NEXT   cir.store %8, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT   cir.return
+
+void g1(int a) {
+  int x = 0;
+  goto end;
+end:
+  int y = a + 2;
+}
+
+// Make sure alloca for "y" shows up in the entry block
+// CHECK: cir.func @_Z2g1i(%arg0: !s32i
+// CHECK-NEXT: %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK-NEXT: %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK-NEXT: %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+// CHECK-NEXT: cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+
+int g2() {
+  int b = 1;
+  goto end;
+  b = b + 1;
+end:
+  b = b + 2;
+  return 1;
+}
+
+// Make sure (1) we don't get dangling unused cleanup blocks
+//           (2) generated returns consider the function type
+
+// CHECK: cir.func @_Z2g2v() -> !s32i
+
+// CHECK:     cir.br ^bb2
+// CHECK-NEXT:   ^bb1:  // no predecessors
+// CHECK:   ^bb2:  // 2 preds: ^bb0, ^bb1
+
+// CHECK:     [[R:%[0-9]+]] = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     [[R]] : !s32i
+// CHECK-NEXT:   }
+
+
+int shouldNotGenBranchRet(int x) {
+  if (x > 5)
+    goto err;
+  return 0;
+err:
+  return -1;
+}
+// NOFLAT:  cir.func @_Z21shouldNotGenBranchReti
+// NOFLAT:    cir.if %8 {
+// NOFLAT:      cir.goto "err"
+// NOFLAT:    }
+// NOFLAT:  ^bb1:
+// NOFLAT:    %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// NOFLAT:    cir.return %3 : !s32i
+// NOFLAT:  ^bb2:  // no predecessors
+// NOFLAT:    cir.label "err"
+
+int shouldGenBranch(int x) {
+  if (x > 5)
+    goto err;
+  x++;
+err:
+  return -1;
+}
+// NOFLAT:  cir.func @_Z15shouldGenBranchi
+// NOFLAT:    cir.if %9 {
+// NOFLAT:      cir.goto "err"
+// NOFLAT:    }
+// NOFLAT:    cir.br ^bb1
+// NOFLAT:  ^bb1:  
+// NOFLAT:    cir.label "err"
+
+void severalLabelsInARow(int a) {
+  int b = a;
+  goto end1;
+  b = b + 1;
+  goto end2;
+end1:
+end2:
+  b = b + 2;
+}
+// NOFLAT:  cir.func @_Z19severalLabelsInARowi
+// NOFLAT:  ^bb[[#BLK1:]]:
+// NOFLAT:    cir.label "end1"
+// NOFLAT:    cir.br ^bb[[#BLK2:]]
+// NOFLAT:  ^bb[[#BLK2]]:
+// NOFLAT:    cir.label "end2"
+
+void severalGotosInARow(int a) {
+  int b = a;
+  goto end;
+  goto end;
+end:
+  b = b + 2;
+}
+// NOFLAT:  cir.func @_Z18severalGotosInARowi
+// NOFLAT:    cir.goto "end"
+// NOFLAT:  ^bb[[#BLK1:]]:
+// NOFLAT:    cir.goto "end"
+// NOFLAT:  ^bb[[#BLK2:]]:
+// NOFLAT:    cir.label "end"
+
+
+void labelWithoutMatch() {
+end:
+  return;
+}
+// NOFLAT:  cir.func @_Z17labelWithoutMatchv()
+// NOFLAT:    cir.label "end"
+// NOFLAT:    cir.return
+// NOFLAT:  }
+
+
+int jumpIntoLoop(int* ar) {
+
+  if (ar)
+    goto label;
+  return -1;
+  
+  while (ar) {
+  label:
+    ++ar;
+  }
+
+  return 0;
+}
+
+// CHECK:  cir.func @_Z12jumpIntoLoopPi
+// CHECK:    cir.brcond {{.*}} ^bb[[#BLK2:]], ^bb[[#BLK3:]]
+// CHECK:  ^bb[[#BLK2]]:
+// CHECK:    cir.br ^bb[[#BODY:]]
+// CHECK:  ^bb[[#BLK3]]:
+// CHECK:    cir.br ^bb[[#BLK4:]]
+// CHECK:  ^bb[[#BLK4]]:
+// CHECK:    cir.br ^bb[[#RETURN:]]
+// CHECK:  ^bb[[#RETURN]]:
+// CHECK:    cir.return
+// CHECK:  ^bb[[#BLK5:]]:
+// CHECK:    cir.br ^bb[[#BLK6:]]
+// CHECK:  ^bb[[#BLK6]]:
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond {{.*}} ^bb[[#BODY]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]: 
+// CHECK:    cir.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.br ^bb[[#BLK7:]]
+// CHECK:  ^bb[[#BLK7]]:
+// CHECK:    cir.br ^bb[[#RETURN]]
+
+
+
+int jumpFromLoop(int* ar) {
+
+  if (!ar) {
+err:
+    return -1;
+}
+
+  while (ar) {
+    if (*ar == 42)
+      goto err;
+    ++ar;
+  }
+  
+  return 0;
+}
+// CHECK:  cir.func @_Z12jumpFromLoopPi
+// CHECK:    cir.brcond {{.*}} ^bb[[#RETURN1:]], ^bb[[#BLK3:]]
+// CHECK:  ^bb[[#RETURN1]]:
+// CHECK:    cir.return
+// CHECK:  ^bb[[#BLK3]]:
+// CHECK:    cir.br ^bb[[#BLK4:]]
+// CHECK:  ^bb[[#BLK4]]:
+// CHECK:    cir.br ^bb[[#BLK5:]]
+// CHECK:  ^bb[[#BLK5]]:
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]: 
+// CHECK:    cir.brcond {{.*}} ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#IF42:]]
+// CHECK:  ^bb[[#IF42]]:
+// CHECK:    cir.brcond {{.*}} ^bb[[#IF42TRUE:]], ^bb[[#IF42FALSE:]]
+// CHECK:  ^bb[[#IF42TRUE]]:
+// CHECK:    cir.br ^bb[[#RETURN1]]
+// CHECK:  ^bb[[#IF42FALSE]]:
+// CHECK:    cir.br ^bb[[#BLK11:]]
+// CHECK:  ^bb[[#BLK11]]:
+// CHECK:    cir.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.br ^bb[[#RETURN2:]]
+// CHECK:  ^bb[[#RETURN2]]:
+// CHECK:    cir.return 
+  
+
+void flatLoopWithNoTerminatorInFront(int* ptr) {
+  
+  if (ptr)
+    goto loop;
+
+  do {
+    if (!ptr)
+      goto end;
+  loop:
+      ptr++;
+  } while(ptr);
+
+  end:
+  ;
+}
+
+// CHECK:  cir.func @_Z31flatLoopWithNoTerminatorInFrontPi
+// CHECK:    cir.brcond {{.*}} ^bb[[#BLK2:]], ^bb[[#BLK3:]]
+// CHECK:  ^bb[[#BLK2]]:
+// CHECK:    cir.br ^bb[[#LABEL_LOOP:]]
+// CHECK:  ^bb[[#BLK3]]:
+// CHECK:    cir.br ^bb[[#BLK4:]] 
+// CHECK:  ^bb[[#BLK4]]:
+// CHECK:    cir.br ^bb[[#BLK5:]]
+// CHECK:  ^bb[[#BLK5]]:
+// CHECK:    cir.br ^bb[[#BODY:]]
+// CHECK:  ^bb[[#COND]]: 
+// CHECK:    cir.brcond {{.*}} ^bb[[#BODY]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#BLK8:]]
+// CHECK:  ^bb[[#BLK8]]:
+// CHECK:    cir.brcond {{.*}} ^bb[[#BLK9:]], ^bb[[#BLK10:]]
+// CHECK:  ^bb[[#BLK9]]:
+// CHECK:    cir.br ^bb[[#RETURN:]]
+// CHECK:  ^bb[[#BLK10]]:
+// CHECK:    cir.br ^bb[[#BLK11:]]
+// CHECK:  ^bb[[#BLK11]]:
+// CHECK:    cir.br ^bb[[#LABEL_LOOP]]
+// CHECK:  ^bb[[#LABEL_LOOP]]:
+// CHECK:    cir.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.br ^bb[[#BLK14:]]
+// CHECK:  ^bb[[#BLK14]]:
+// CHECK:    cir.br ^bb[[#RETURN]]
+// CHECK:  ^bb[[#RETURN]]:
+// CHECK:    cir.return
+// CHECK:  }
+// CHECK:}
+
+struct S {};
+struct S get();
+void bar(struct S);
+
+void foo() {
+  { 
+    label:      
+      bar(get());
+  }   
+}
+
+// NOFLAT: cir.func  @_Z3foov()
+// NOFLAT:   cir.scope {
+// NOFLAT:     cir.label "label"
+// NOFLAT:     %0 = cir.alloca !ty_S, !cir.ptr<!ty_S>, ["agg.tmp0"]
+
+extern "C" void action1();
+extern "C" void action2();
+extern "C" void multiple_non_case(int v) {
+  switch (v) {
+    default:
+        action1();
+      l2:
+        action2();
+        break;
+  }
+}
+
+// NOFLAT: cir.func @multiple_non_case
+// NOFLAT: cir.switch
+// NOFLAT: case (default)
+// NOFLAT: cir.call @action1()
+// NOFLAT: cir.br ^[[BB1:[a-zA-Z0-9]+]]
+// NOFLAT: ^[[BB1]]:
+// NOFLAT: cir.label
+// NOFLAT: cir.call @action2()
+// NOFLAT: cir.break
+
+extern "C" void case_follow_label(int v) {
+  switch (v) {
+    case 1:
+    label:
+    case 2:
+      action1();
+      break;
+    default:
+      action2();
+      goto label;
+  }
+}
+
+// NOFLAT: cir.func  @case_follow_label
+// NOFLAT: cir.switch
+// NOFLAT: case (equal, 1)
+// NOFLAT: cir.label "label"
+// NOFLAT: cir.yield
+// NOFLAT: case (equal, 2)
+// NOFLAT: cir.call @action1()
+// NOFLAT: cir.break
+// NOFLAT: case (default)
+// NOFLAT: cir.call @action2()
+// NOFLAT: cir.goto "label"
+
+extern "C" void default_follow_label(int v) {
+  switch (v) {
+    case 1:
+    case 2:
+      action1();
+      break;
+    label:
+    default:
+      action2();
+      goto label;
+  }
+}
+
+// NOFLAT: cir.func  @default_follow_label
+// NOFLAT: cir.switch
+// NOFLAT: case (anyof, [1, 2] : !s32i)
+// NOFLAT: cir.call @action1()
+// NOFLAT: cir.break
+// NOFLAT: cir.label "label"
+// NOFLAT: case (default)
+// NOFLAT: cir.call @action2()
+// NOFLAT: cir.goto "label"
diff --git a/clang/test/CIR/CodeGen/hello.c b/clang/test/CIR/CodeGen/hello.c
new file mode 100644
index 000000000000..3eff7227943c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/hello.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+int printf(const char *restrict, ...);
+
+int main (void) {
+    printf ("Hello, world!\n");
+    return 0;
+}
+
+// CHECK: cir.func private @printf(!cir.ptr<!s8i>, ...) -> !s32i
+// CHECK: cir.global "private" constant internal dsolocal @".str" = #cir.const_array<"Hello, world!\0A\00" : !cir.array<!s8i x 15>> : !cir.array<!s8i x 15> {alignment = 1 : i64}
+// CHECK: cir.func @main() -> !s32i
+// CHECK:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:   %1 = cir.get_global @printf : !cir.ptr<!cir.func<!s32i (!cir.ptr<!s8i>, ...)>>
+// CHECK:   %2 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 15>>
+// CHECK:   %3 = cir.cast(array_to_ptrdecay, %2 : !cir.ptr<!cir.array<!s8i x 15>>), !cir.ptr<!s8i>
+// CHECK:   %4 = cir.call @printf(%3) : (!cir.ptr<!s8i>) -> !s32i
+// CHECK:   %5 = cir.const #cir.int<0> : !s32i
+// CHECK:   cir.store %5, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK:   %6 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.return %6 : !s32i
+// CHECK: }
diff --git a/clang/test/CIR/CodeGen/if-consteval.cpp b/clang/test/CIR/CodeGen/if-consteval.cpp
new file mode 100644
index 000000000000..97468beb0ac5
--- /dev/null
+++ b/clang/test/CIR/CodeGen/if-consteval.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void should_be_used_1();
+void should_be_used_2();
+void should_be_used_3();
+constexpr void should_not_be_used() {}
+
+constexpr void f() {
+  if consteval {
+    should_not_be_used(); // CHECK-NOT: call {{.*}}should_not_be_used
+  } else {
+    should_be_used_1(); // CHECK: call {{.*}}should_be_used_1
+  }
+
+  if !consteval {
+    should_be_used_2(); // CHECK: call {{.*}}should_be_used_2
+  } else {
+    should_not_be_used(); // CHECK-NOT: call {{.*}}should_not_be_used
+  }
+
+  if consteval {
+    should_not_be_used(); // CHECK-NOT: call {{.*}}should_not_be_used
+  }
+
+  if !consteval {
+    should_be_used_3(); // CHECK: call {{.*}}should_be_used_3
+  }
+}
+
+void g() {
+  f();
+}
diff --git a/clang/test/CIR/CodeGen/if-constexpr.cpp b/clang/test/CIR/CodeGen/if-constexpr.cpp
new file mode 100644
index 000000000000..f980f3100841
--- /dev/null
+++ b/clang/test/CIR/CodeGen/if-constexpr.cpp
@@ -0,0 +1,95 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void if0() {
+  int x = 0;
+  if constexpr (0 == 0) {
+    // Declare a variable with same name to be sure we handle the
+    // scopes correctly
+    int x = 2;
+  } else {
+    int x = 3;
+  }
+  if constexpr (0 == 1) {
+    int x = 4;
+  } else {
+    int x = 5;
+  }
+  if constexpr (int x = 7; 8 == 8) {
+    int y = x;
+  } else {
+    int y = 2*x;
+  }
+  if constexpr (int x = 9; 8 == 10) {
+    int y = x;
+  } else {
+    int y = 3*x;
+  }
+  if constexpr (10 == 10) {
+    int x = 20;
+  }
+  if constexpr (10 == 11) {
+    int x = 30;
+  }
+  if constexpr (int x = 70; 80 == 80) {
+    int y = 10*x;
+  }
+  if constexpr (int x = 90; 80 == 100) {
+    int y = 11*x;
+  }
+}
+
+// CHECK: cir.func @_Z3if0v() {{.*}}
+// CHECK: cir.store %1, %0 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.const #cir.int<2> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store %3, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.const #cir.int<5> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store %3, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {{.*}}
+// CHECK-NEXT:   %4 = cir.const #cir.int<7> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store %4, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT:   %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store %5, %3 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {{.*}}
+// CHECK-NEXT:   %4 = cir.const #cir.int<9> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store %4, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT:   %5 = cir.const #cir.int<3> : !s32i loc({{.*}})
+// CHECK-NEXT:   %6 = cir.load %2 : !cir.ptr<!s32i>, !s32i loc({{.*}})
+// CHECK-NEXT:   %7 = cir.binop(mul, %5, %6) nsw : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store %7, %3 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.const #cir.int<20> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store %3, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// Note that Clang does not even emit a block in this case
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {{.*}}
+// CHECK-NEXT:   %4 = cir.const #cir.int<70> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store %4, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT:   %5 = cir.const #cir.int<10> : !s32i loc({{.*}})
+// CHECK-NEXT:   %6 = cir.load %2 : !cir.ptr<!s32i>, !s32i loc({{.*}})
+// CHECK-NEXT:   %7 = cir.binop(mul, %5, %6) nsw : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store %7, %3 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.const #cir.int<90> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store %3, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.return loc({{.*}})
diff --git a/clang/test/CIR/CodeGen/implicit-return.cpp b/clang/test/CIR/CodeGen/implicit-return.cpp
new file mode 100644
index 000000000000..fa64d244957d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/implicit-return.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -O0 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CHECK-O0
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CHECK-O2
+
+void ret_void() {}
+
+//      CHECK-O0: cir.func @_Z8ret_voidv()
+// CHECK-O0-NEXT:   cir.return
+// CHECK-O0-NEXT: }
+
+//      CHECK-O2: cir.func @_Z8ret_voidv()
+// CHECK-O2-NEXT:   cir.return
+// CHECK-O2-NEXT: }
+
+int ret_non_void() {}
+
+//      CHECK-O0: cir.func @_Z12ret_non_voidv() -> !s32i
+// CHECK-O0-NEXT:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK-O0-NEXT:   cir.trap
+// CHECK-O0-NEXT: }
+
+//      CHECK-O2: cir.func @_Z12ret_non_voidv() -> !s32i
+// CHECK-O2-NEXT:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK-O2-NEXT:   cir.unreachable
+// CHECK-O2-NEXT: }
diff --git a/clang/test/CIR/CodeGen/inc-bool.cpp b/clang/test/CIR/CodeGen/inc-bool.cpp
new file mode 100644
index 000000000000..193d63314960
--- /dev/null
+++ b/clang/test/CIR/CodeGen/inc-bool.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++14 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void foo(bool x) {
+  x++;
+}
+
+// CHECK:  cir.func @_Z3foob(%arg0: !cir.bool loc({{.*}}))
+// CHECK:    [[ALLOC_X:%.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["x", init] {alignment = 1 : i64}
+// CHECK:    cir.store %arg0, [[ALLOC_X]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:    {{.*}} = cir.load [[ALLOC_X]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK:    [[TRUE:%.*]] = cir.const #true
+// CHECK:    cir.store [[TRUE]], [[ALLOC_X]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:    cir.return
diff --git a/clang/test/CIR/CodeGen/inc-dec.cpp b/clang/test/CIR/CodeGen/inc-dec.cpp
new file mode 100644
index 000000000000..5207db364ed4
--- /dev/null
+++ b/clang/test/CIR/CodeGen/inc-dec.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+unsigned id0() {
+  unsigned a = 1;
+  return ++a;
+}
+
+// CHECK: cir.func @_Z3id0v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#BEFORE_A:]] = cir.load %[[#A]]
+// CHECK: %[[#AFTER_A:]] = cir.unary(inc, %[[#BEFORE_A]])
+// CHECK: cir.store %[[#AFTER_A]], %[[#A]]
+// CHECK: cir.store %[[#AFTER_A]], %[[#RET]]
+
+
+unsigned id1() {
+  unsigned a = 1;
+  return --a;
+}
+
+// CHECK: cir.func @_Z3id1v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#BEFORE_A:]] = cir.load %[[#A]]
+// CHECK: %[[#AFTER_A:]] = cir.unary(dec, %[[#BEFORE_A]])
+// CHECK: cir.store %[[#AFTER_A]], %[[#A]]
+// CHECK: cir.store %[[#AFTER_A]], %[[#RET]]
+
+unsigned id2() {
+  unsigned a = 1;
+  return a++;
+}
+
+// CHECK: cir.func @_Z3id2v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#BEFORE_A:]] = cir.load %[[#A]]
+// CHECK: %[[#AFTER_A:]] = cir.unary(inc, %[[#BEFORE_A]])
+// CHECK: cir.store %[[#AFTER_A]], %[[#A]]
+// CHECK: cir.store %[[#BEFORE_A]], %[[#RET]]
+
+unsigned id3() {
+  unsigned a = 1;
+  return a--;
+}
+
+// CHECK: cir.func @_Z3id3v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#BEFORE_A:]] = cir.load %[[#A]]
+// CHECK: %[[#AFTER_A:]] = cir.unary(dec, %[[#BEFORE_A]])
+// CHECK: cir.store %[[#AFTER_A]], %[[#A]]
+// CHECK: cir.store %[[#BEFORE_A]], %[[#RET]]
diff --git a/clang/test/CIR/CodeGen/initlist-ptr-ptr.cpp b/clang/test/CIR/CodeGen/initlist-ptr-ptr.cpp
new file mode 100644
index 000000000000..bad49923e895
--- /dev/null
+++ b/clang/test/CIR/CodeGen/initlist-ptr-ptr.cpp
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+namespace std {
+template <class b> class initializer_list {
+  const b *array_start;
+  const b *array_end;
+};
+template <class b>
+void f(initializer_list<b>) {;}
+void test() {
+  f({"xy","uv"});
+}
+} // namespace std
+
+// CIR: [[INITLIST_TYPE:!.*]] = !cir.struct<class "std::initializer_list<const char *>" {!cir.ptr<!cir.ptr<!cir.int<s, 8>>>, !cir.ptr<!cir.ptr<!cir.int<s, 8>>>}>
+// CIR: cir.func linkonce_odr @_ZSt1fIPKcEvSt16initializer_listIT_E(%arg0: [[INITLIST_TYPE]]
+// CIR: [[LOCAL:%.*]] = cir.alloca [[INITLIST_TYPE]], !cir.ptr<[[INITLIST_TYPE]]>,
+// CIR: cir.store %arg0, [[LOCAL]] : [[INITLIST_TYPE]], !cir.ptr<[[INITLIST_TYPE]]>
+// CIR: cir.return
+
+// CIR: cir.global "private" constant internal dsolocal [[STR_XY:@.*]] = #cir.const_array<"xy\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>
+// CIR: cir.global "private" constant internal dsolocal [[STR_UV:@.*]] = #cir.const_array<"uv\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>
+
+// CIR: cir.func @_ZSt4testv()
+// CIR: cir.scope {
+// CIR: [[INITLIST_LOCAL:%.*]] = cir.alloca [[INITLIST_TYPE]], !cir.ptr<[[INITLIST_TYPE]]>,  
+// CIR: [[LOCAL_ELEM_ARRAY:%.*]] = cir.alloca !cir.array<!cir.ptr<!s8i> x 2>, !cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>,
+// CIR: [[FIRST_ELEM_PTR:%.*]] = cir.cast(array_to_ptrdecay, [[LOCAL_ELEM_ARRAY]] : !cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>), !cir.ptr<!cir.ptr<!s8i>>
+// CIR: [[XY_CHAR_ARRAY:%.*]] = cir.get_global [[STR_XY]]  : !cir.ptr<!cir.array<!s8i x 3>>
+// CIR: [[STR_XY_PTR:%.*]] = cir.cast(array_to_ptrdecay, [[XY_CHAR_ARRAY]] : !cir.ptr<!cir.array<!s8i x 3>>), !cir.ptr<!s8i>
+// CIR:  cir.store [[STR_XY_PTR]], [[FIRST_ELEM_PTR]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CIR: [[ONE:%.*]] = cir.const #cir.int<1>
+// CIR: [[NEXT_ELEM_PTR:%.*]] = cir.ptr_stride([[FIRST_ELEM_PTR]] : !cir.ptr<!cir.ptr<!s8i>>, [[ONE]] : !s64i), !cir.ptr<!cir.ptr<!s8i>>
+// CIR: [[UV_CHAR_ARRAY:%.*]] = cir.get_global [[STR_UV]]  : !cir.ptr<!cir.array<!s8i x 3>>
+// CIR: [[STR_UV_PTR:%.*]] = cir.cast(array_to_ptrdecay, [[UV_CHAR_ARRAY]] : !cir.ptr<!cir.array<!s8i x 3>>), !cir.ptr<!s8i>
+// CIR:  cir.store [[STR_UV_PTR]], [[NEXT_ELEM_PTR]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CIR: [[START_FLD_PTR:%.*]] = cir.get_member [[INITLIST_LOCAL]][0] {name = "array_start"} : !cir.ptr<[[INITLIST_TYPE]]> -> !cir.ptr<!cir.ptr<!cir.ptr<!s8i>>>
+// CIR: [[START_FLD_PTR_AS_PTR_2_CHAR_ARRAY:%.*]] = cir.cast(bitcast, [[START_FLD_PTR]] : !cir.ptr<!cir.ptr<!cir.ptr<!s8i>>>), !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>>
+// CIR: cir.store [[LOCAL_ELEM_ARRAY]], [[START_FLD_PTR_AS_PTR_2_CHAR_ARRAY]] : !cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>, !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>>
+// CIR: [[ELEM_ARRAY_LEN:%.*]] = cir.const #cir.int<2>
+// CIR: [[END_FLD_PTR:%.*]] = cir.get_member [[INITLIST_LOCAL]][1] {name = "array_end"} : !cir.ptr<[[INITLIST_TYPE]]> -> !cir.ptr<!cir.ptr<!cir.ptr<!s8i>>>
+// CIR: [[LOCAL_ELEM_ARRAY_END:%.*]] = cir.ptr_stride([[LOCAL_ELEM_ARRAY]] : !cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>, [[ELEM_ARRAY_LEN]] : !u64i), !cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>
+// CIR: [[END_FLD_PTR_AS_PTR_2_CHAR_ARRAY:%.*]] = cir.cast(bitcast, [[END_FLD_PTR]] : !cir.ptr<!cir.ptr<!cir.ptr<!s8i>>>), !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>>
+// CIR: cir.store [[LOCAL_ELEM_ARRAY_END]], [[END_FLD_PTR_AS_PTR_2_CHAR_ARRAY]] : !cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>, !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>>
+// CIR: [[ARG:%.*]] = cir.load [[INITLIST_LOCAL]] : !cir.ptr<[[INITLIST_TYPE]]>, [[INITLIST_TYPE]]
+// CIR: cir.call @_ZSt1fIPKcEvSt16initializer_listIT_E([[ARG]]) : ([[INITLIST_TYPE]]) -> ()
+// CIR: }
+// CIR: cir.return
+// CIR: }
+
+// LLVM: %"class.std::initializer_list<const char *>" = type { ptr, ptr }
+
+// LLVM: @.str = internal constant [3 x i8] c"xy\00"
+// LLVM: @.str1 = internal constant [3 x i8] c"uv\00"
+
+// LLVM: define linkonce_odr void @_ZSt1fIPKcEvSt16initializer_listIT_E(%"class.std::initializer_list<const char *>" [[ARG0:%.*]])
+// LLVM: [[LOCAL_PTR:%.*]] = alloca %"class.std::initializer_list<const char *>", i64 1, align 8, 
+// LLVM: store %"class.std::initializer_list<const char *>" [[ARG0]], ptr [[LOCAL_PTR]], align 8,
+// LLVM: ret void,
+// LLVM: }
+
+// LLVM: define dso_local void @_ZSt4testv()
+// LLVM: br label %[[SCOPE_START:.*]],  
+// LLVM: [[SCOPE_START]]: ; preds = %0
+// LLVM:  [[INIT_STRUCT:%.*]] = alloca %"class.std::initializer_list<const char *>", i64 1, align 8,
+// LLVM:  [[ELEM_ARRAY_PTR:%.*]] = alloca [2 x ptr], i64 1, align 8,
+// LLVM:  [[PTR_FIRST_ELEM:%.*]] = getelementptr ptr, ptr [[ELEM_ARRAY_PTR]], i32 0,
+// LLVM:  store ptr @.str, ptr [[PTR_FIRST_ELEM]], align 8,
+// LLVM:  [[PTR_SECOND_ELEM:%.*]] = getelementptr ptr, ptr [[PTR_FIRST_ELEM]], i64 1,
+// LLVM:  store ptr @.str1, ptr [[PTR_SECOND_ELEM]], align 8,
+// LLVM:  [[INIT_START_FLD_PTR:%.*]] = getelementptr %"class.std::initializer_list<const char *>", ptr [[INIT_STRUCT]], i32 0, i32 0,
+// LLVM:  [[INIT_END_FLD_PTR:%.*]] = getelementptr %"class.std::initializer_list<const char *>", ptr [[INIT_STRUCT]], i32 0, i32 1,
+// LLVM:  [[ELEM_ARRAY_END:%.*]] = getelementptr [2 x ptr], ptr [[ELEM_ARRAY_PTR]], i64 2,
+// LLVM:  store ptr [[ELEM_ARRAY_END]], ptr [[INIT_END_FLD_PTR]], align 8,
+// LLVM:  [[ARG2PASS:%.*]] = load %"class.std::initializer_list<const char *>", ptr [[INIT_STRUCT]], align 8,
+// LLVM:  call void @_ZSt1fIPKcEvSt16initializer_listIT_E(%"class.std::initializer_list<const char *>" [[ARG2PASS]]),
+// LLVM:  br label %[[SCOPE_END:.*]],
+// LLVM: [[SCOPE_END]]: ; preds = %[[SCOPE_START]]
+// LLVM:  ret void
diff --git a/clang/test/CIR/CodeGen/initlist-ptr-unsigned.cpp b/clang/test/CIR/CodeGen/initlist-ptr-unsigned.cpp
new file mode 100644
index 000000000000..8703ca391fde
--- /dev/null
+++ b/clang/test/CIR/CodeGen/initlist-ptr-unsigned.cpp
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+namespace std {
+template <class b> class initializer_list {
+  const b *c;
+  unsigned long len;
+};
+template <class b>
+void f(initializer_list<b>) {;}
+void test() {
+  f({7});
+}
+} // namespace std
+
+// CIR: [[INITLIST_TYPE:!.*]] = !cir.struct<class "std::initializer_list<int>" {!cir.ptr<!cir.int<s, 32>>, !cir.int<u, 64>}>
+
+// CIR: cir.func linkonce_odr @_ZSt1fIiEvSt16initializer_listIT_E(%arg0: [[INITLIST_TYPE]]
+// CIR: [[REG0:%.*]] = cir.alloca [[INITLIST_TYPE]], !cir.ptr<[[INITLIST_TYPE]]>,
+// CIR: cir.store %arg0, [[REG0]] : [[INITLIST_TYPE]], !cir.ptr<[[INITLIST_TYPE]]>
+// CIR: cir.return
+
+// CIR: cir.func @_ZSt4testv()
+// CIR: cir.scope {
+// CIR: [[LIST_PTR:%.*]] = cir.alloca [[INITLIST_TYPE]], !cir.ptr<[[INITLIST_TYPE]]>,
+// CIR: [[ARRAY:%.*]] = cir.alloca !cir.array<!s32i x 1>, !cir.ptr<!cir.array<!s32i x 1>>,
+// CIR: [[DECAY_PTR:%.*]] = cir.cast(array_to_ptrdecay, [[ARRAY]] : !cir.ptr<!cir.array<!s32i x 1>>), !cir.ptr<!s32i>
+// CIR: [[SEVEN:%.*]] = cir.const #cir.int<7> : !s32i
+// CIR: cir.store [[SEVEN]], [[DECAY_PTR]] : !s32i, !cir.ptr<!s32i>
+// CIR: [[FLD_C:%.*]] = cir.get_member [[LIST_PTR]][0] {name = "c"} : !cir.ptr<[[INITLIST_TYPE]]> -> !cir.ptr<!cir.ptr<!s32i>>
+// CIR: [[ARRAY_PTR:%.*]] = cir.cast(bitcast, [[FLD_C]] : !cir.ptr<!cir.ptr<!s32i>>), !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>
+// CIR: cir.store [[ARRAY]], [[ARRAY_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>, !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>
+// CIR: [[LENGTH_ONE:%.*]] = cir.const #cir.int<1>
+// CIR: [[FLD_LEN:%.*]] = cir.get_member [[LIST_PTR]][1] {name = "len"} : !cir.ptr<[[INITLIST_TYPE]]> -> !cir.ptr<!u64i>
+// CIR: cir.store [[LENGTH_ONE]], [[FLD_LEN]] : !u64i, !cir.ptr<!u64i>
+// CIR: [[ARG2PASS:%.*]] = cir.load [[LIST_PTR]] : !cir.ptr<[[INITLIST_TYPE]]>,  [[INITLIST_TYPE]]
+// CIR: cir.call @_ZSt1fIiEvSt16initializer_listIT_E([[ARG2PASS]]) : ([[INITLIST_TYPE]]) -> ()
+// CIR: }
+// CIR: cir.return
+// CIR: }
+
+// LLVM: %"class.std::initializer_list<int>" = type { ptr, i64 }
+// LLVM: define linkonce_odr void @_ZSt1fIiEvSt16initializer_listIT_E(%"class.std::initializer_list<int>" [[ARG:%.*]]) 
+// LLVM:  [[LOCAL:%.*]] = alloca %"class.std::initializer_list<int>", i64 1, align 8,
+// LLVM:  store %"class.std::initializer_list<int>" [[ARG]], ptr [[LOCAL]], align 8,
+
+// LLVM: define dso_local void @_ZSt4testv()
+// LLVM: br label %[[SCOPE_START:.*]],
+// LLVM: [[SCOPE_START]]: ; preds = %0
+// LLVM:  [[INIT_STRUCT:%.*]] = alloca %"class.std::initializer_list<int>", i64 1, align 8,
+// LLVM:  [[ELEM_ARRAY:%.*]] = alloca [1 x i32], i64 1, align 4,
+// LLVM:  [[PTR_FIRST_ELEM:%.*]] = getelementptr i32, ptr [[ELEM_ARRAY]], i32 0,
+// LLVM:  store i32 7, ptr [[PTR_FIRST_ELEM]], align 4,
+// LLVM:  [[ELEM_ARRAY_PTR:%.*]] = getelementptr %"class.std::initializer_list<int>", ptr [[INIT_STRUCT]], i32 0, i32 0,
+// LLVM:  store ptr [[ELEM_ARRAY]], ptr [[ELEM_ARRAY_PTR]], align 8,
+// LLVM:  [[INIT_LEN_FLD:%.*]] = getelementptr %"class.std::initializer_list<int>", ptr [[INIT_STRUCT]], i32 0, i32 1,
+// LLVM:  store i64 1, ptr [[INIT_LEN_FLD]], align 8,
+// LLVM:  [[ARG2PASS:%.*]] = load %"class.std::initializer_list<int>", ptr [[INIT_STRUCT]], align 8,
+// LLVM:  call void @_ZSt1fIiEvSt16initializer_listIT_E(%"class.std::initializer_list<int>" [[ARG2PASS]])
+// LLVM:  br label %[[SCOPE_END:.*]],
+// LLVM: [[SCOPE_END]]: ; preds = %[[SCOPE_START]]
+// LLVM:  ret void
diff --git a/clang/test/CIR/CodeGen/int-wrap.c b/clang/test/CIR/CodeGen/int-wrap.c
new file mode 100644
index 000000000000..f23e216143fc
--- /dev/null
+++ b/clang/test/CIR/CodeGen/int-wrap.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fwrapv -fclangir -emit-cir %s -o - 2>&1 | FileCheck %s --check-prefix=WRAP
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - 2>&1 | FileCheck %s --check-prefix=NOWRAP
+
+#define N 42
+
+typedef struct {
+  const char* ptr;
+} A;
+
+// WRAP:   cir.binop(sub, {{.*}}, {{.*}}) : !s32i
+// NOWRAP: cir.binop(sub, {{.*}}, {{.*}}) nsw : !s32i
+void foo(int* ar, int len) {
+  int x = ar[len - N];
+}
+
+// check that the ptr_stride is generated in both cases (i.e. no NYI fails)
+
+// WRAP:    cir.ptr_stride
+// NOWRAP:  cir.ptr_stride
+void bar(A* a, unsigned n) {
+  a->ptr = a->ptr + n;
+}
+
+// WRAP    cir.ptr_stride
+// NOWRAP: cir.ptr_stride
+void baz(A* a) {
+  a->ptr--;
+}
+
+
diff --git a/clang/test/CIR/CodeGen/lalg.c b/clang/test/CIR/CodeGen/lalg.c
new file mode 100644
index 000000000000..26b41591d7dd
--- /dev/null
+++ b/clang/test/CIR/CodeGen/lalg.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o - | FileCheck %s
+
+double dot() {
+  double x = 0.0;
+  double y = 0.0f;
+  double result = x * y;
+  return result;
+}
+
+//      CHECK: %1 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["x", init]
+// CHECK-NEXT: %2 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["y", init]
+// CHECK-NEXT: %3 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["result", init]
+// CHECK-NEXT: %4 = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CHECK-NEXT: cir.store %4, %1 : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %5 = cir.const #cir.fp<0.000000e+00> : !cir.float
+// CHECK-NEXT: %6 = cir.cast(floating, %5 : !cir.float), !cir.double
+// CHECK-NEXT: cir.store %6, %2 : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %7 = cir.load %1 : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %8 = cir.load %2 : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %9 = cir.binop(mul, %7, %8) : !cir.double
diff --git a/clang/test/CIR/CodeGen/lambda.cpp b/clang/test/CIR/CodeGen/lambda.cpp
new file mode 100644
index 000000000000..9eacaa38ef24
--- /dev/null
+++ b/clang/test/CIR/CodeGen/lambda.cpp
@@ -0,0 +1,136 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-return-stack-address -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void fn() {
+  auto a = [](){};
+  a();
+}
+
+//      CHECK: !ty_anon2E0_ = !cir.struct<class "anon.0" {!cir.int<u, 8>}>
+//  CHECK-DAG: module
+
+//      CHECK: cir.func lambda internal private @_ZZ2fnvENK3$_0clEv{{.*}}) extra
+
+//      CHECK:   cir.func @_Z2fnv()
+// CHECK-NEXT:     %0 = cir.alloca !ty_anon2E0_, !cir.ptr<!ty_anon2E0_>, ["a"]
+//      CHECK:   cir.call @_ZZ2fnvENK3$_0clEv
+
+void l0() {
+  int i;
+  auto a = [&](){ i = i + 1; };
+  a();
+}
+
+// CHECK: cir.func lambda internal private @_ZZ2l0vENK3$_0clEv({{.*}}) extra
+
+// CHECK: %0 = cir.alloca !cir.ptr<!ty_anon2E2_>, !cir.ptr<!cir.ptr<!ty_anon2E2_>>, ["this", init] {alignment = 8 : i64}
+// CHECK: cir.store %arg0, %0 : !cir.ptr<!ty_anon2E2_>, !cir.ptr<!cir.ptr<!ty_anon2E2_>>
+// CHECK: %1 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_anon2E2_>>, !cir.ptr<!ty_anon2E2_>
+// CHECK: %2 = cir.get_member %1[0] {name = "i"} : !cir.ptr<!ty_anon2E2_> -> !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: %3 = cir.load %2 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %4 = cir.load %3 : !cir.ptr<!s32i>, !s32i
+// CHECK: %5 = cir.const #cir.int<1> : !s32i
+// CHECK: %6 = cir.binop(add, %4, %5) nsw : !s32i
+// CHECK: %7 = cir.get_member %1[0] {name = "i"} : !cir.ptr<!ty_anon2E2_> -> !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: %8 = cir.load %7 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: cir.store %6, %8 : !s32i, !cir.ptr<!s32i>
+
+// CHECK: cir.func @_Z2l0v()
+
+auto g() {
+  int i = 12;
+  return [&] {
+    i += 100;
+    return i;
+  };
+}
+
+// CHECK: cir.func @_Z1gv() -> !ty_anon2E3_
+// CHECK: %0 = cir.alloca !ty_anon2E3_, !cir.ptr<!ty_anon2E3_>, ["__retval"] {alignment = 8 : i64}
+// CHECK: %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CHECK: %2 = cir.const #cir.int<12> : !s32i
+// CHECK: cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK: %3 = cir.get_member %0[0] {name = "i"} : !cir.ptr<!ty_anon2E3_> -> !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: cir.store %1, %3 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: %4 = cir.load %0 : !cir.ptr<!ty_anon2E3_>, !ty_anon2E3_
+// CHECK: cir.return %4 : !ty_anon2E3_
+
+auto g2() {
+  int i = 12;
+  auto lam = [&] {
+    i += 100;
+    return i;
+  };
+  return lam;
+}
+
+// Should be same as above because of NRVO
+// CHECK: cir.func @_Z2g2v() -> !ty_anon2E4_
+// CHECK-NEXT: %0 = cir.alloca !ty_anon2E4_, !cir.ptr<!ty_anon2E4_>, ["__retval", init] {alignment = 8 : i64}
+// CHECK-NEXT: %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CHECK-NEXT: %2 = cir.const #cir.int<12> : !s32i
+// CHECK-NEXT: cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %3 = cir.get_member %0[0] {name = "i"} : !cir.ptr<!ty_anon2E4_> -> !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-NEXT: cir.store %1, %3 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-NEXT: %4 = cir.load %0 : !cir.ptr<!ty_anon2E4_>, !ty_anon2E4_
+// CHECK-NEXT: cir.return %4 : !ty_anon2E4_
+
+int f() {
+  return g2()();
+}
+
+//      CHECK: cir.func @_Z1fv() -> !s32i
+// CHECK-NEXT:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     %2 = cir.alloca !ty_anon2E4_, !cir.ptr<!ty_anon2E4_>, ["ref.tmp0"] {alignment = 8 : i64}
+// CHECK-NEXT:     %3 = cir.call @_Z2g2v() : () -> !ty_anon2E4_
+// CHECK-NEXT:     cir.store %3, %2 : !ty_anon2E4_, !cir.ptr<!ty_anon2E4_>
+// CHECK-NEXT:     %4 = cir.call @_ZZ2g2vENK3$_0clEv(%2) : (!cir.ptr<!ty_anon2E4_>) -> !s32i
+// CHECK-NEXT:     cir.store %4, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   }
+// CHECK-NEXT:   %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   cir.return %1 : !s32i
+// CHECK-NEXT: }
+
+int g3() {
+  auto* fn = +[](int const& i) -> int { return i; };
+  auto task = fn(3);
+  return task;
+}
+
+// lambda operator()
+// CHECK: cir.func lambda internal private @_ZZ2g3vENK3$_0clERKi{{.*}}!s32i extra
+
+// lambda __invoke()
+// CHECK:   cir.func internal private @_ZZ2g3vEN3$_08__invokeERKi
+
+// lambda operator int (*)(int const&)()
+// CHECK:   cir.func internal private @_ZZ2g3vENK3$_0cvPFiRKiEEv
+
+// CHECK: cir.func @_Z2g3v() -> !s32i
+// CHECK:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:     %1 = cir.alloca !cir.ptr<!cir.func<!s32i (!cir.ptr<!s32i>)>>, !cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!s32i>)>>>, ["fn", init] {alignment = 8 : i64}
+// CHECK:     %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["task", init] {alignment = 4 : i64}
+
+// 1. Use `operator int (*)(int const&)()` to retrieve the fnptr to `__invoke()`.
+// CHECK:     %3 = cir.scope {
+// CHECK:       %7 = cir.alloca !ty_anon2E5_, !cir.ptr<!ty_anon2E5_>, ["ref.tmp0"] {alignment = 1 : i64}
+// CHECK:       %8 = cir.call @_ZZ2g3vENK3$_0cvPFiRKiEEv(%7) : (!cir.ptr<!ty_anon2E5_>) -> !cir.ptr<!cir.func<!s32i (!cir.ptr<!s32i>)>>
+// CHECK:       %9 = cir.unary(plus, %8) : !cir.ptr<!cir.func<!s32i (!cir.ptr<!s32i>)>>, !cir.ptr<!cir.func<!s32i (!cir.ptr<!s32i>)>>
+// CHECK:       cir.yield %9 : !cir.ptr<!cir.func<!s32i (!cir.ptr<!s32i>)>>
+// CHECK:     }
+
+// 2. Load ptr to `__invoke()`.
+// CHECK:     cir.store %3, %1 : !cir.ptr<!cir.func<!s32i (!cir.ptr<!s32i>)>>, !cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!s32i>)>>>
+// CHECK:     %4 = cir.scope {
+// CHECK:       %7 = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp1", init] {alignment = 4 : i64}
+// CHECK:       %8 = cir.load %1 : !cir.ptr<!cir.ptr<!cir.func<!s32i (!cir.ptr<!s32i>)>>>, !cir.ptr<!cir.func<!s32i (!cir.ptr<!s32i>)>>
+// CHECK:       %9 = cir.const #cir.int<3> : !s32i
+// CHECK:       cir.store %9, %7 : !s32i, !cir.ptr<!s32i>
+
+// 3. Call `__invoke()`, which effectively executes `operator()`.
+// CHECK:       %10 = cir.call %8(%7) : (!cir.ptr<!cir.func<!s32i (!cir.ptr<!s32i>)>>, !cir.ptr<!s32i>) -> !s32i
+// CHECK:       cir.yield %10 : !s32i
+// CHECK:     }
+
+// CHECK:   }
diff --git a/clang/test/CIR/CodeGen/libc.c b/clang/test/CIR/CodeGen/libc.c
new file mode 100644
index 000000000000..f6cf6a8e50e6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/libc.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Should generate CIR's builtin memcpy op.
+void *memcpy(void *, const void *, unsigned long);
+void testMemcpy(void *src, const void *dst, unsigned long size) {
+  memcpy(dst, src, size);
+  // CHECK: cir.libc.memcpy %{{.+}} bytes from %{{.+}} to %{{.+}} : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+}
+
+double fabs(double);
+double testFabs(double x) {
+  return fabs(x);
+  // CHECK: cir.fabs %{{.+}} : !cir.double
+}
+
+float fabsf(float);
+float testFabsf(float x) {
+  return fabsf(x);
+  // CHECK: cir.fabs %{{.+}} : !cir.float
+}
diff --git a/clang/test/CIR/CodeGen/libcall.cpp b/clang/test/CIR/CodeGen/libcall.cpp
new file mode 100644
index 000000000000..17d2e7912833
--- /dev/null
+++ b/clang/test/CIR/CodeGen/libcall.cpp
@@ -0,0 +1,63 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -clangir-disable-emit-cxx-default -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef __builtin_va_list va_list;
+
+static __inline__ __attribute__((__always_inline__)) __attribute__((__format__(printf, 3, 0)))
+int vsnprintf(char* const __attribute__((pass_object_size(1))) dest, int size, const char* format, va_list ap)
+        __attribute__((overloadable)) {
+    return __builtin___vsnprintf_chk(dest, size, 0, __builtin_object_size(((dest)), (1)), format, ap);
+}
+
+typedef long unsigned int size_t;
+
+size_t __strlen_chk(const char* __s, size_t __n) __attribute__((annotate("introduced_in=" "17")));
+size_t strlen(const char* __s) __attribute__((__pure__));
+static __inline__ __attribute__((__always_inline__))
+size_t strlen(const char* const s __attribute__((pass_object_size(0)))) __attribute__((overloadable)) {
+    size_t bos = __builtin_object_size(((s)), (0));
+
+    if (bos == ((size_t) -1)) {
+        return __builtin_strlen(s);
+    }
+
+    return __strlen_chk(s, bos);
+}
+
+void log(int, const char *, int);
+
+void consume_message(const char *m) {
+  log(3, m, strlen(m));
+}
+
+void t(const char* fmt, ...) {
+  va_list args;
+  __builtin_va_start(args, fmt);
+  const int size = 512;
+  char message[size];
+  vsnprintf(message, size, fmt, args);
+  consume_message(message);
+}
+
+// CHECK: cir.func @_Z15consume_messagePKc(%arg0: !cir.ptr<!s8i>
+// CHECK:   %0 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["m", init] {alignment = 8 : i64}
+
+// CHECK:   %3 = cir.load %0 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CHECK:   %4 = cir.objsize(%3 : <!s8i>, max) -> !u64i
+// CHECK:   %5 = cir.call @_ZL6strlenPKcU17pass_object_size0(%3, %4) : (!cir.ptr<!s8i>, !u64i) -> !u64i
+
+// CHECK: cir.func private @__vsnprintf_chk
+// CHECK: cir.func internal private @_ZL9vsnprintfPcU17pass_object_size1iPKcP13__va_list_tag
+
+// Implicit size parameter in arg %1
+//
+// FIXME: tag the param with an attribute to designate the size information.
+//
+// CHECK: %1 = cir.alloca !u64i, !cir.ptr<!u64i>, ["", init] {alignment = 8 : i64}
+
+// CHECK: cir.store %arg1, %1 : !u64i, !cir.ptr<!u64i>
+
+// CHECK: %10 = cir.load %1 : !cir.ptr<!u64i>, !u64i
+// CHECK: %11 = cir.load %3 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CHECK: %12 = cir.load %4 : !cir.ptr<!cir.ptr<!ty___va_list_tag>>, !cir.ptr<!ty___va_list_tag>
+// CHECK: %13 = cir.call @__vsnprintf_chk(%6, %8, %9, %10, %11, %12)
diff --git a/clang/test/CIR/CodeGen/linkage.c b/clang/test/CIR/CodeGen/linkage.c
new file mode 100644
index 000000000000..1b087f43ca81
--- /dev/null
+++ b/clang/test/CIR/CodeGen/linkage.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+
+static int bar(int i) {
+  return i;
+}
+
+int foo(void) {
+  return bar(5);
+}
+
+// CIR:   cir.func internal private @bar(
+// CIR:   cir.func @foo(
+
+// LLVM: define internal i32 @bar(
+// LLVM: define dso_local i32 @foo(
+
+static int var = 0;
+// CIR: cir.global "private" internal dsolocal @var = #cir.int<0> : !s32i
+int get_var(void) {
+  return var;
+}
+
+// Should generate available_externally linkage.
+inline int availableExternallyMethod(void) { return 0; }
+void callAvailableExternallyMethod(void) { availableExternallyMethod(); }
+// CIR: cir.func available_externally @availableExternallyMethod
diff --git a/clang/test/CIR/CodeGen/literals.c b/clang/test/CIR/CodeGen/literals.c
new file mode 100644
index 000000000000..b8a33ad11559
--- /dev/null
+++ b/clang/test/CIR/CodeGen/literals.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+int literals(void) {
+    char a = 'a'; // char literals are int in C
+    // CHECK: %[[RES:[0-9]+]] = cir.const #cir.int<97> : !s32i
+    // CHECK: %{{[0-9]+}} = cir.cast(integral, %[[RES]] : !s32i), !s8i
+
+    return 0;
+}
diff --git a/clang/test/CIR/CodeGen/literals.cpp b/clang/test/CIR/CodeGen/literals.cpp
new file mode 100644
index 000000000000..87290b888185
--- /dev/null
+++ b/clang/test/CIR/CodeGen/literals.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+int literals() {
+    char a = 'a'; // char literals have char type in C++
+    // CHECK:  %{{[0-9]+}} = cir.const #cir.int<97> : !s8i
+
+    return 0;
+}
diff --git a/clang/test/CIR/CodeGen/loop-scope.cpp b/clang/test/CIR/CodeGen/loop-scope.cpp
new file mode 100644
index 000000000000..fcc45a892e3d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/loop-scope.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cpp.cir
+// RUN: FileCheck --input-file=%t.cpp.cir %s --check-prefix=CPPSCOPE
+// RUN: %clang_cc1 -x c -std=c11 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.c.cir
+// RUN: FileCheck --input-file=%t.c.cir %s --check-prefix=CSCOPE
+
+void l0(void) {
+  for (int i = 0;;) {
+    int j = 0;
+  }
+}
+
+// CPPSCOPE: cir.func @_Z2l0v()
+// CPPSCOPE-NEXT:   cir.scope {
+// CPPSCOPE-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CPPSCOPE-NEXT:     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["j", init] {alignment = 4 : i64}
+// CPPSCOPE-NEXT:     %2 = cir.const #cir.int<0> : !s32i
+// CPPSCOPE-NEXT:     cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+// CPPSCOPE-NEXT:     cir.for : cond {
+
+// CSCOPE: cir.func @l0()
+// CSCOPE-NEXT: cir.scope {
+// CSCOPE-NEXT:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CSCOPE-NEXT:   %1 = cir.const #cir.int<0> : !s32i
+// CSCOPE-NEXT:   cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+// CSCOPE-NEXT:   cir.for : cond {
+
+// CSCOPE:        } body {
+// CSCOPE-NEXT:     cir.scope {
+// CSCOPE-NEXT:       %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["j", init] {alignment = 4 : i64}
diff --git a/clang/test/CIR/CodeGen/loop.cpp b/clang/test/CIR/CodeGen/loop.cpp
new file mode 100644
index 000000000000..64909759fd25
--- /dev/null
+++ b/clang/test/CIR/CodeGen/loop.cpp
@@ -0,0 +1,259 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void l0() {
+  for (;;) {
+  }
+}
+
+// CHECK: cir.func @_Z2l0v
+// CHECK: cir.for : cond {
+// CHECK:   %[[#TRUE:]] = cir.const #true
+// CHECK:   cir.condition(%[[#TRUE]])
+
+void l1() {
+  int x = 0;
+  for (int i = 0; i < 10; i = i + 1) {
+    x = x + 1;
+  }
+}
+
+// CHECK: cir.func @_Z2l1v
+// CHECK: cir.for : cond {
+// CHECK-NEXT:   %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %5 = cir.const #cir.int<10> : !s32i
+// CHECK-NEXT:   %6 = cir.cmp(lt, %4, %5) : !s32i, !cir.bool
+// CHECK-NEXT:   cir.condition(%6)
+// CHECK-NEXT: } body {
+// CHECK-NEXT:   %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %5 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   %6 = cir.binop(add, %4, %5) nsw : !s32i
+// CHECK-NEXT:   cir.store %6, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT:   %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %5 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   %6 = cir.binop(add, %4, %5) nsw : !s32i
+// CHECK-NEXT:   cir.store %6, %2 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT: }
+
+void l2(bool cond) {
+  int i = 0;
+  while (cond) {
+    i = i + 1;
+  }
+  while (true) {
+    i = i + 1;
+  }
+  while (1) {
+    i = i + 1;
+  }
+}
+
+// CHECK: cir.func @_Z2l2b
+// CHECK:         cir.scope {
+// CHECK-NEXT:     cir.while {
+// CHECK-NEXT:       %3 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT:       cir.condition(%3)
+// CHECK-NEXT:     } do {
+// CHECK-NEXT:       %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:       %4 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK-NEXT:       cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     cir.while {
+// CHECK-NEXT:       %[[#TRUE:]] = cir.const #true
+// CHECK-NEXT:       cir.condition(%[[#TRUE]])
+// CHECK-NEXT:     } do {
+// CHECK-NEXT:       %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:       %4 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK-NEXT:       cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     cir.while {
+// CHECK-NEXT:       %3 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       %4 = cir.cast(int_to_bool, %3 : !s32i), !cir.bool
+// CHECK-NEXT:       cir.condition(%4)
+// CHECK-NEXT:     } do {
+// CHECK-NEXT:       %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:       %4 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK-NEXT:       cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+
+void l3(bool cond) {
+  int i = 0;
+  do {
+    i = i + 1;
+  } while (cond);
+  do {
+    i = i + 1;
+  } while (true);
+  do {
+    i = i + 1;
+  } while (1);
+}
+
+// CHECK: cir.func @_Z2l3b
+// CHECK: cir.scope {
+// CHECK-NEXT:   cir.do {
+// CHECK-NEXT:     %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     %4 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:     %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK-NEXT:     cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   } while {
+// CHECK-NEXT:     %[[#TRUE:]] = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT:     cir.condition(%[[#TRUE]])
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   cir.do {
+// CHECK-NEXT:     %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     %4 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:     %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK-NEXT:     cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   } while {
+// CHECK-NEXT:     %[[#TRUE:]] = cir.const #true
+// CHECK-NEXT:     cir.condition(%[[#TRUE]])
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   cir.do {
+// CHECK-NEXT:     %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     %4 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:     %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK-NEXT:     cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   } while {
+// CHECK-NEXT:     %3 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:     %4 = cir.cast(int_to_bool, %3 : !s32i), !cir.bool
+// CHECK-NEXT:     cir.condition(%4)
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
+
+void l4() {
+  int i = 0, y = 100;
+  while (true) {
+    i = i + 1;
+    if (i < 10)
+      continue;
+    y = y - 20;
+  }
+}
+
+// CHECK: cir.func @_Z2l4v
+// CHECK: cir.while {
+// CHECK-NEXT:   %[[#TRUE:]] = cir.const #true
+// CHECK-NEXT:   cir.condition(%[[#TRUE]])
+// CHECK-NEXT: } do {
+// CHECK-NEXT:   %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %5 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   %6 = cir.binop(add, %4, %5) nsw  : !s32i
+// CHECK-NEXT:   cir.store %6, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     %10 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     %11 = cir.const #cir.int<10> : !s32i
+// CHECK-NEXT:     %12 = cir.cmp(lt, %10, %11) : !s32i, !cir.bool
+// CHECK-NEXT:     cir.if %12 {
+// CHECK-NEXT:       cir.continue
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+
+void l5() {
+  do {
+  } while (0);
+}
+
+// CHECK: cir.func @_Z2l5v()
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     cir.do {
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     } while {
+// CHECK-NEXT:       %0 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:       %1 = cir.cast(int_to_bool, %0 : !s32i), !cir.bool
+// CHECK-NEXT:       cir.condition(%1)
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+void l6() {
+  while (true) {
+    return;
+  }
+}
+
+// CHECK: cir.func @_Z2l6v()
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     cir.while {
+// CHECK-NEXT:       %[[#TRUE:]] = cir.const #true
+// CHECK-NEXT:       cir.condition(%[[#TRUE]])
+// CHECK-NEXT:     } do {
+// CHECK-NEXT:       cir.return
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+void unreachable_after_break() {
+  for (;;) {
+    break;
+    int x = 1;
+  }
+}
+
+// CHECK-NEXT: cir.func @_Z23unreachable_after_breakv()
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK-NEXT:     cir.for : cond {
+// CHECK-NEXT:       %1 = cir.const #true
+// CHECK-NEXT:       cir.condition(%1)
+// CHECK-NEXT:     } body {
+// CHECK-NEXT:       cir.break
+// CHECK-NEXT:     ^bb1:  // no predecessors
+// CHECK-NEXT:       %1 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     } step {
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+void unreachable_after_continue() {
+  for (;;) {
+    continue;
+    int x = 1;
+  }
+}
+
+// CHECK-NEXT: cir.func @_Z26unreachable_after_continuev()
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK-NEXT:     cir.for : cond {
+// CHECK-NEXT:       %1 = cir.const #true
+// CHECK-NEXT:       cir.condition(%1)
+// CHECK-NEXT:     } body {
+// CHECK-NEXT:       cir.continue
+// CHECK-NEXT:     ^bb1:  // no predecessors
+// CHECK-NEXT:       %1 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     } step {
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/CodeGen/lvalue-refs.cpp b/clang/test/CIR/CodeGen/lvalue-refs.cpp
new file mode 100644
index 000000000000..f1e6dd2fed2a
--- /dev/null
+++ b/clang/test/CIR/CodeGen/lvalue-refs.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+struct String {
+  long size;
+};
+
+void split(String &S) {}
+
+// CHECK: cir.func @_Z5splitR6String(%arg0: !cir.ptr<!ty_String>
+// CHECK:     %0 = cir.alloca !cir.ptr<!ty_String>, !cir.ptr<!cir.ptr<!ty_String>>, ["S", init]
+
+void foo() {
+  String s;
+  split(s);
+}
+
+// CHECK: cir.func @_Z3foov()
+// CHECK:     %0 = cir.alloca !ty_String, !cir.ptr<!ty_String>, ["s"]
+// CHECK:     cir.call @_Z5splitR6String(%0) : (!cir.ptr<!ty_String>) -> ()
diff --git a/clang/test/CIR/CodeGen/move.cpp b/clang/test/CIR/CodeGen/move.cpp
new file mode 100644
index 000000000000..2d889d134b43
--- /dev/null
+++ b/clang/test/CIR/CodeGen/move.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -clangir-disable-emit-cxx-default -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+namespace std {
+
+template<typename T> struct remove_reference       { typedef T type; };
+template<typename T> struct remove_reference<T &>  { typedef T type; };
+template<typename T> struct remove_reference<T &&> { typedef T type; };
+
+template<typename T>
+typename remove_reference<T>::type &&move(T &&t) noexcept;
+
+struct string {
+  string();
+};
+
+} // std namespace
+
+// CHECK: ![[StdString:ty_.*]] = !cir.struct<struct "std::string" {!cir.int<u, 8>}>
+
+std::string getstr();
+void emplace(std::string &&s);
+
+void t() {
+  emplace(std::move(getstr()));
+}
+
+// FIXME: we should explicitly model std::move here since it will
+// be useful at least for the lifetime checker.
+
+// CHECK: cir.func @_Z1tv()
+// CHECK:   %[[#Addr:]] = cir.alloca ![[StdString]], {{.*}} ["ref.tmp0"]
+// CHECK:   %[[#RValStr:]] = cir.call @_Z6getstrv() : () -> ![[StdString]]
+// CHECK:   cir.store %[[#RValStr]], %[[#Addr]]
+// CHECK:   cir.call @_Z7emplaceOSt6string(%[[#Addr]])
+// CHECK:   cir.return
+// CHECK: }
diff --git a/clang/test/CIR/CodeGen/multi-vtable.cpp b/clang/test/CIR/CodeGen/multi-vtable.cpp
new file mode 100644
index 000000000000..ba988a639792
--- /dev/null
+++ b/clang/test/CIR/CodeGen/multi-vtable.cpp
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+class Mother {
+public:
+ virtual void MotherFoo() {}
+ void simple() { }
+ virtual void MotherFoo2() {}
+};
+
+class Father {
+public:
+ virtual void FatherFoo() {}
+};
+
+class Child : public Mother, public Father {
+public:
+ void MotherFoo() override {}
+};
+
+int main() {
+    Mother *b = new Mother();
+    b->MotherFoo();
+    b->simple();
+    Child *c = new Child();
+    c->MotherFoo();
+    return 0;
+}
+
+// CIR: ![[VTypeInfoA:ty_.*]] = !cir.struct<struct  {!cir.ptr<!cir.int<u, 8>>, !cir.ptr<!cir.int<u, 8>>}>
+// CIR: ![[VTypeInfoB:ty_.*]] = !cir.struct<struct  {!cir.ptr<!cir.int<u, 8>>, !cir.ptr<!cir.int<u, 8>>, !cir.int<u, 32>, !cir.int<u, 32>, !cir.ptr<!cir.int<u, 8>>, !cir.int<s, 64>, !cir.ptr<!cir.int<u, 8>>, !cir.int<s, 64>}>
+// CIR: ![[VTableTypeMother:ty_.*]] = !cir.struct<struct  {!cir.array<!cir.ptr<!cir.int<u, 8>> x 4>}>
+// CIR: ![[VTableTypeFather:ty_.*]] = !cir.struct<struct  {!cir.array<!cir.ptr<!cir.int<u, 8>> x 3>}>
+// CIR: ![[VTableTypeChild:ty_.*]] = !cir.struct<struct  {!cir.array<!cir.ptr<!cir.int<u, 8>> x 4>, !cir.array<!cir.ptr<!cir.int<u, 8>> x 3>}>
+// CIR: !ty_Father = !cir.struct<class "Father" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>
+// CIR: !ty_Mother = !cir.struct<class "Mother" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>
+// CIR: !ty_Child = !cir.struct<class "Child" {!cir.struct<class "Mother" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>, !cir.struct<class "Father" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>} #cir.record.decl.ast>
+
+// CIR: cir.func linkonce_odr @_ZN6MotherC2Ev(%arg0: !cir.ptr<!ty_Mother>
+// CIR:   %{{[0-9]+}} = cir.vtable.address_point(@_ZTV6Mother, vtable_index = 0, address_point_index = 2) : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>
+// CIR:   %{{[0-9]+}} = cir.cast(bitcast, %{{[0-9]+}} : !cir.ptr<!ty_Mother>), !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>
+// CIR:   cir.store %2, %{{[0-9]+}} : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>, !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>
+// CIR:   cir.return
+// CIR: }
+
+// LLVM-DAG: define linkonce_odr void @_ZN6MotherC2Ev(ptr %0)
+// LLVM-DAG:   store ptr getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV6Mother, i32 0, i32 0, i32 2), ptr %{{[0-9]+}}, align 8
+// LLVM-DAG:   ret void
+// LLVM-DAG: }
+
+// CIR: cir.func linkonce_odr @_ZN5ChildC2Ev(%arg0: !cir.ptr<!ty_Child>
+// CIR:   %{{[0-9]+}} = cir.vtable.address_point(@_ZTV5Child, vtable_index = 0, address_point_index = 2) : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>
+// CIR:   %{{[0-9]+}} = cir.cast(bitcast, %{{[0-9]+}} : !cir.ptr<!ty_Child>), !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>
+// CIR:   cir.store %{{[0-9]+}}, %{{[0-9]+}} : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>, !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>
+// CIR:   %{{[0-9]+}} = cir.vtable.address_point(@_ZTV5Child, vtable_index = 1, address_point_index = 2) : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>
+// CIR:   %{{[0-9]+}} = cir.const #cir.int<8> : !s64i
+// CIR:   %{{[0-9]+}} = cir.ptr_stride(%{{[0-9]+}} : !cir.ptr<!ty_Child>, %{{[0-9]+}} : !s64i), !cir.ptr<!ty_Child>
+// CIR:   %11 = cir.cast(bitcast, %{{[0-9]+}} : !cir.ptr<!ty_Child>), !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>
+// CIR:   cir.store %{{[0-9]+}}, %{{[0-9]+}} : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>, !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>
+// CIR:   cir.return
+// CIR: }
+
+// LLVM-DAG: $_ZTS6Mother = comdat any
+// LLVM-DAG: $_ZTS5Child = comdat any
+// LLVM-DAG: $_ZTS6Father = comdat any
+
+// LLVM-DAG: define linkonce_odr void @_ZN5ChildC2Ev(ptr %0)
+// LLVM-DAG:  store ptr getelementptr inbounds ({ [4 x ptr], [3 x ptr] }, ptr @_ZTV5Child, i32 0, i32 0, i32 2), ptr %{{[0-9]+}}, align 8
+// LLVM-DAG:  %{{[0-9]+}} = getelementptr %class.Child, ptr %3, i64 8
+// LLVM-DAG:  store ptr getelementptr inbounds ({ [4 x ptr], [3 x ptr] }, ptr @_ZTV5Child, i32 0, i32 1, i32 2), ptr %{{[0-9]+}}, align 8
+// LLVM-DAG:  ret void
+// }
+
+// CIR: cir.func @main() -> !s32i extra(#fn_attr) {
+
+// CIR:   %{{[0-9]+}} = cir.vtable.address_point( %{{[0-9]+}} : !cir.ptr<!cir.ptr<!cir.func<!void (!cir.ptr<!ty_Mother>)>>>, vtable_index = 0, address_point_index = 0) : !cir.ptr<!cir.ptr<!cir.func<!void (!cir.ptr<!ty_Mother>)>>>
+
+// CIR:   %{{[0-9]+}} = cir.vtable.address_point( %{{[0-9]+}} : !cir.ptr<!cir.ptr<!cir.func<!void (!cir.ptr<!ty_Child>)>>>, vtable_index = 0, address_point_index = 0) : !cir.ptr<!cir.ptr<!cir.func<!void (!cir.ptr<!ty_Child>)>>>
+
+// CIR: }
+
+//   vtable for Mother
+// CIR: cir.global linkonce_odr @_ZTV6Mother = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI6Mother> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Mother9MotherFooEv> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Mother10MotherFoo2Ev> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>}> : ![[VTableTypeMother]] {alignment = 8 : i64}
+// LLVM-DAG: @_ZTV6Mother = linkonce_odr global { [4 x ptr] } { [4 x ptr] [ptr null, ptr @_ZTI6Mother, ptr @_ZN6Mother9MotherFooEv, ptr @_ZN6Mother10MotherFoo2Ev] }
+
+//   vtable for __cxxabiv1::__class_type_info
+// CIR: cir.global "private" external @_ZTVN10__cxxabiv117__class_type_infoE : !cir.ptr<!cir.ptr<!u8i>>
+// LLVM-DAG: @_ZTVN10__cxxabiv117__class_type_infoE = external global ptr
+
+//   typeinfo name for Mother
+// CIR: cir.global linkonce_odr comdat @_ZTS6Mother = #cir.const_array<"6Mother" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7> {alignment = 1 : i64}
+// LLVM-DAG: @_ZTS6Mother = linkonce_odr global [7 x i8] c"6Mother", comdat
+
+//   typeinfo for Mother
+// CIR: cir.global constant external @_ZTI6Mother = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS6Mother> : !cir.ptr<!u8i>}> : ![[VTypeInfoA]] {alignment = 8 : i64}
+// LLVM-DAG: @_ZTI6Mother = constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i32 2), ptr @_ZTS6Mother }
+
+//   vtable for Father
+// CIR: cir.global linkonce_odr @_ZTV6Father = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI6Father> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Father9FatherFooEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : ![[VTableTypeFather]] {alignment = 8 : i64}
+// LLVM-DAG: @_ZTV6Father = linkonce_odr global { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI6Father, ptr @_ZN6Father9FatherFooEv] }
+
+//   vtable for Child
+// CIR: cir.global linkonce_odr @_ZTV5Child = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI5Child> : !cir.ptr<!u8i>, #cir.global_view<@_ZN5Child9MotherFooEv> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Mother10MotherFoo2Ev> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>, #cir.const_array<[#cir.ptr<-8 : i64> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI5Child> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Father9FatherFooEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : ![[VTableTypeChild]] {alignment = 8 : i64}
+// LLVM-DAG: @_ZTV5Child = linkonce_odr global { [4 x ptr], [3 x ptr] } { [4 x ptr] [ptr null, ptr @_ZTI5Child, ptr @_ZN5Child9MotherFooEv, ptr @_ZN6Mother10MotherFoo2Ev], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr @_ZTI5Child, ptr @_ZN6Father9FatherFooEv] }
+
+//   vtable for __cxxabiv1::__vmi_class_type_info
+// CIR: cir.global "private" external @_ZTVN10__cxxabiv121__vmi_class_type_infoE : !cir.ptr<!cir.ptr<!u8i>>
+// LLVM-DAG: @_ZTVN10__cxxabiv121__vmi_class_type_infoE = external global ptr
+
+//   typeinfo name for Child
+// CIR: cir.global linkonce_odr comdat @_ZTS5Child = #cir.const_array<"5Child" : !cir.array<!s8i x 6>> : !cir.array<!s8i x 6> {alignment = 1 : i64}
+// LLVM-DAG: @_ZTS5Child = linkonce_odr global [6 x i8] c"5Child", comdat
+
+//   typeinfo name for Father
+// CIR: cir.global linkonce_odr comdat @_ZTS6Father = #cir.const_array<"6Father" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7> {alignment = 1 : i64}
+// LLVM-DAG: @_ZTS6Father = linkonce_odr global [7 x i8] c"6Father", comdat
+
+//   typeinfo for Father
+// CIR: cir.global constant external @_ZTI6Father = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS6Father> : !cir.ptr<!u8i>}> : !ty_anon_struct {alignment = 8 : i64}
+// LLVM-DAG: @_ZTI6Father = constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i32 2), ptr @_ZTS6Father }
+
+//   typeinfo for Child
+// CIR: cir.global constant external @_ZTI5Child = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv121__vmi_class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS5Child> : !cir.ptr<!u8i>, #cir.int<0> : !u32i, #cir.int<2> : !u32i, #cir.global_view<@_ZTI6Mother> : !cir.ptr<!u8i>, #cir.int<2> : !s64i, #cir.global_view<@_ZTI6Father> : !cir.ptr<!u8i>, #cir.int<2050> : !s64i}> : ![[VTypeInfoB]] {alignment = 8 : i64}
+// LLVM-DAG: @_ZTI5Child = constant { ptr, ptr, i32, i32, ptr, i64, ptr, i64 } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i32 2), ptr @_ZTS5Child, i32 0, i32 2, ptr @_ZTI6Mother, i64 2, ptr @_ZTI6Father, i64 2050 }
diff --git a/clang/test/CIR/CodeGen/new-null.cpp b/clang/test/CIR/CodeGen/new-null.cpp
new file mode 100644
index 000000000000..23e43219b451
--- /dev/null
+++ b/clang/test/CIR/CodeGen/new-null.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-linux-gnu %s -fclangir -emit-cir -o %t.cir
+// RUN: FileCheck --input-file=%t.cir -check-prefix=CIR %s
+// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-linux-gnu %s -fclangir -emit-llvm -o %t.ll
+// RUN: FileCheck --input-file=%t.ll -check-prefix=LLVM %s
+
+// TODO: This file is inspired by clang/test/CodeGenCXX/new.cpp, add all tests from it.
+
+typedef __typeof__(sizeof(0)) size_t;
+
+// Declare an 'operator new' template to tickle a bug in __builtin_operator_new.
+template<typename T> void *operator new(size_t, int (*)(T));
+
+// Ensure that this declaration doesn't cause operator new to lose its
+// 'noalias' attribute.
+void *operator new[](size_t);
+
+namespace std {
+  struct nothrow_t {};
+}
+std::nothrow_t nothrow;
+
+// Declare the reserved placement operators.
+void *operator new(size_t, void*) throw();
+void operator delete(void*, void*) throw();
+void *operator new[](size_t, void*) throw();
+void operator delete[](void*, void*) throw();
+
+// Declare the replaceable global allocation operators.
+void *operator new(size_t, const std::nothrow_t &) throw();
+void *operator new[](size_t, const std::nothrow_t &) throw();
+void operator delete(void *, const std::nothrow_t &) throw();
+void operator delete[](void *, const std::nothrow_t &) throw();
+
+// Declare some other placemenet operators.
+void *operator new(size_t, void*, bool) throw();
+void *operator new[](size_t, void*, bool) throw();
+
+namespace test15 {
+  struct A { A(); ~A(); };
+  // CIR-DAG:   ![[TEST15A:.*]] = !cir.struct<struct "test15::A" {!cir.int<u, 8>}
+
+  void test0a(void *p) {
+    new (p) A();
+  }
+
+  // CIR-LABEL:   cir.func @_ZN6test156test0bEPv(
+  // CIR-SAME:                                   %[[VAL_0:.*]]: !cir.ptr<!void>
+  // CIR:           %[[VAL_1:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["p", init] {alignment = 8 : i64}
+  // CIR:           cir.store %[[VAL_0]], %[[VAL_1]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  // CIR:           %[[VAL_2:.*]] = cir.const #cir.int<1> : !u64i
+  // CIR:           %[[VAL_3:.*]] = cir.load %[[VAL_1]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+  // CIR:           %[[VAL_4:.*]] = cir.const #true
+  // CIR:           %[[VAL_5:.*]] = cir.call @_ZnwmPvb(%[[VAL_2]], %[[VAL_3]], %[[VAL_4]])
+  // CIR:           %[[VAL_6:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+  // CIR:           %[[VAL_7:.*]] = cir.cmp(ne, %[[VAL_5]], %[[VAL_6]]) : !cir.ptr<!void>, !cir.bool
+  // CIR:           %[[VAL_8:.*]] = cir.cast(bitcast, %[[VAL_5]] : !cir.ptr<!void>), !cir.ptr<![[TEST15A]]>
+  // CIR:           cir.if %[[VAL_7]] {
+  // CIR:             cir.call @_ZN6test151AC1Ev(%[[VAL_8]]) : (!cir.ptr<![[TEST15A]]>) -> ()
+  // CIR:           }
+  // CIR:           cir.return
+  // CIR:         }
+
+  // LLVM-LABEL: _ZN6test156test0bEPv
+  // LLVM:         %[[VAL_0:.*]] = alloca ptr, i64 1, align 8
+  // LLVM:         store ptr %[[VAL_1:.*]], ptr %[[VAL_0]], align 8
+  // LLVM:         %[[VAL_2:.*]] = load ptr, ptr %[[VAL_0]], align 8
+  // LLVM:         %[[VAL_3:.*]] = call ptr @_ZnwmPvb(i64 1, ptr %[[VAL_2]], i8 1)
+  // LLVM:         %[[VAL_4:.*]] = icmp ne ptr %[[VAL_3]], null
+  // LLVM:         br i1 %[[VAL_4]], label %[[VAL_5:.*]], label %[[VAL_6:.*]],
+  // LLVM:       [[VAL_5]]:                                                ; preds = %[[VAL_7:.*]]
+  // LLVM:         call void @_ZN6test151AC1Ev(ptr %[[VAL_3]])
+  // LLVM:         br label %[[VAL_6]],
+  // LLVM:       [[VAL_6]]:                                                ; preds = %[[VAL_5]], %[[VAL_7]]
+  // LLVM:         ret void
+
+  void test0b(void *p) {
+    new (p, true) A();
+  }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/new.cpp b/clang/test/CIR/CodeGen/new.cpp
new file mode 100644
index 000000000000..acf7df22d5a9
--- /dev/null
+++ b/clang/test/CIR/CodeGen/new.cpp
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -clangir-disable-emit-cxx-default -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "std-cxx.h"
+
+struct S {
+  S(int, int);
+};
+
+void m(int a, int b) {
+  std::shared_ptr<S> l = std::make_shared<S>(a, b);
+}
+
+// CHECK: cir.func linkonce_odr @_ZSt11make_sharedI1SJRiS1_EESt10shared_ptrIT_EDpOT0_(
+// CHECK:   %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["args", init] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["args", init] {alignment = 8 : i64}
+// CHECK:   %2 = cir.alloca !ty_std3A3Ashared_ptr3CS3E, !cir.ptr<!ty_std3A3Ashared_ptr3CS3E>, ["__retval"] {alignment = 1 : i64}
+// CHECK:   cir.store %arg0, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:   cir.store %arg1, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:   cir.scope {
+// CHECK:     %4 = cir.const #cir.int<1> : !u64i
+// CHECK:     %5 = cir.call @_Znwm(%4) : (!u64i) -> !cir.ptr<!void>
+// CHECK:     %6 = cir.cast(bitcast, %5 : !cir.ptr<!void>), !cir.ptr<!ty_S>
+// CHECK:     %7 = cir.load %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:     %8 = cir.load %7 : !cir.ptr<!s32i>, !s32i
+// CHECK:     %9 = cir.load %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:     %10 = cir.load %9 : !cir.ptr<!s32i>, !s32i
+// CHECK:     cir.call @_ZN1SC1Eii(%6, %8, %10) : (!cir.ptr<!ty_S>, !s32i, !s32i) -> ()
+// CHECK:     cir.call @_ZNSt10shared_ptrI1SEC1EPS0_(%2, %6) : (!cir.ptr<!ty_std3A3Ashared_ptr3CS3E>, !cir.ptr<!ty_S>) -> ()
+// CHECK:   }
+
+class B {
+public:
+  void construct(B* __p) {
+      ::new ((void*)__p) B;
+  }
+};
+
+// CHECK: cir.func linkonce_odr @_ZN1B9constructEPS_(%arg0: !cir.ptr<!ty_B>
+// CHECK:   %0 = cir.alloca !cir.ptr<!ty_B>, !cir.ptr<!cir.ptr<!ty_B>>, ["this", init] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !cir.ptr<!ty_B>, !cir.ptr<!cir.ptr<!ty_B>>, ["__p", init] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %0 : !cir.ptr<!ty_B>, !cir.ptr<!cir.ptr<!ty_B>>
+// CHECK:   cir.store %arg1, %1 : !cir.ptr<!ty_B>, !cir.ptr<!cir.ptr<!ty_B>>
+// CHECK:   %2 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_B>>, !cir.ptr<!ty_B>
+// CHECK:   %3 = cir.const #cir.int<1> : !u64i
+// CHECK:   %4 = cir.load %1 : !cir.ptr<!cir.ptr<!ty_B>>, !cir.ptr<!ty_B>
+// CHECK:   %5 = cir.cast(bitcast, %4 : !cir.ptr<!ty_B>), !cir.ptr<!void>
+// CHECK:   %6 = cir.cast(bitcast, %5 : !cir.ptr<!void>), !cir.ptr<!ty_B>
+
+// cir.call @B::B()(%new_placament_ptr)
+// CHECK:   cir.call @_ZN1BC1Ev(%6) : (!cir.ptr<!ty_B>) -> ()
+// CHECK:   cir.return
+// CHECK: }
+
+void t() {
+  B b;
+  b.construct(&b);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/no-common.c b/clang/test/CIR/CodeGen/no-common.c
new file mode 100644
index 000000000000..61ecea191636
--- /dev/null
+++ b/clang/test/CIR/CodeGen/no-common.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir %s -emit-cir -o - | FileCheck %s -check-prefix=CHECK-DEFAULT
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir %s -fno-common -emit-cir -o - | FileCheck %s -check-prefix=CHECK-DEFAULT
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir %s -fcommon -emit-cir -o - | FileCheck %s -check-prefix=CHECK-COMMON
+
+// CHECK-COMMON: cir.global common @x
+// CHECK-DEFAULT: cir.global external @x
+int x;
+
+// CHECK-COMMON: cir.global external @ABC
+// CHECK-DEFAULT: cir.global external @ABC
+typedef void* (*fn_t)(long a, long b, char *f, int c);
+fn_t ABC __attribute__ ((nocommon));
+
+// CHECK-COMMON: cir.global common @y
+// CHECK-DEFAULT: cir.global common @y
+int y __attribute__((common));
diff --git a/clang/test/CIR/CodeGen/no-proto-fun-ptr.c b/clang/test/CIR/CodeGen/no-proto-fun-ptr.c
new file mode 100644
index 000000000000..b4d92db11963
--- /dev/null
+++ b/clang/test/CIR/CodeGen/no-proto-fun-ptr.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o -  | FileCheck %s
+
+void empty();
+
+void check_noproto_ptr() {
+  void (*fun)(void) = empty;
+}
+
+// CHECK:  cir.func no_proto @check_noproto_ptr()
+// CHECK:    [[ALLOC:%.*]] = cir.alloca !cir.ptr<!cir.func<!void ()>>, !cir.ptr<!cir.ptr<!cir.func<!void ()>>>, ["fun", init] {alignment = 8 : i64}
+// CHECK:    [[GGO:%.*]] = cir.get_global @empty : !cir.ptr<!cir.func<!void ()>>
+// CHECK:    cir.store [[GGO]], [[ALLOC]] : !cir.ptr<!cir.func<!void ()>>, !cir.ptr<!cir.ptr<!cir.func<!void ()>>>
+// CHECK:    cir.return
+
+void empty(void) {}
+
+void buz() {
+  void (*func)();
+  (*func)();
+}
+
+// CHECK:  cir.func no_proto @buz()
+// CHECK:    [[FNPTR_ALLOC:%.*]] = cir.alloca !cir.ptr<!cir.func<!void (...)>>, !cir.ptr<!cir.ptr<!cir.func<!void (...)>>>, ["func"] {alignment = 8 : i64}
+// CHECK:    [[FNPTR:%.*]] = cir.load deref [[FNPTR_ALLOC]] : !cir.ptr<!cir.ptr<!cir.func<!void (...)>>>, !cir.ptr<!cir.func<!void (...)>>
+// CHECK:    [[CAST:%.*]] = cir.cast(bitcast, %1 : !cir.ptr<!cir.func<!void (...)>>), !cir.ptr<!cir.func<!void ()>>
+// CHECK:    cir.call [[CAST]]() : (!cir.ptr<!cir.func<!void ()>>) -> ()
+// CHECK:    cir.return
diff --git a/clang/test/CIR/CodeGen/no-proto-is-void.cpp b/clang/test/CIR/CodeGen/no-proto-is-void.cpp
new file mode 100644
index 000000000000..7ab958f8fd00
--- /dev/null
+++ b/clang/test/CIR/CodeGen/no-proto-is-void.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -x c++ -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -std=c2x -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Both CXX and C2X don't support no-prototype functions. They default to void.
+int noProto();
+// CHECK: cir.func @{{.*}}noProto{{.*}}() -> !s32i
+int test(int x) {
+  return noProto();
+  // CHECK {{.+}} = cir.call @{{.*}}noProto{{.*}}() : () -> !s32i
+}
+int noProto() { return 0; }
diff --git a/clang/test/CIR/CodeGen/no-prototype.c b/clang/test/CIR/CodeGen/no-prototype.c
new file mode 100644
index 000000000000..c119304ce54d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/no-prototype.c
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+//===----------------------------------------------------------------------===//
+// DEFINED BEHAVIOUR
+//===----------------------------------------------------------------------===//
+
+// No-proto definition followed by a correct call.
+int noProto0(x) int x; { return x; }
+// CHECK: cir.func no_proto @noProto0(%arg0: !s32i {{.+}}) -> !s32i
+int test0(int x) {
+  // CHECK: cir.func @test0
+  return noProto0(x); // We know the definition. Should be a direct call.
+  // CHECK: %{{.+}} = cir.call @noProto0(%{{.+}})
+}
+
+// Declaration without prototype followed by its definition, then a correct call.
+//
+// Prototyped definition overrides no-proto declaration before any call is made,
+// only allowing calls with proper arguments. This is the only case where the
+// definition is not marked as no-proto.
+int noProto1();
+int noProto1(int x) { return x; }
+// CHECK: cir.func @noProto1(%arg0: !s32i {{.+}}) -> !s32i
+int test1(int x) {
+  // CHECK: cir.func @test1
+  return noProto1(x);
+  // CHECK: %{{.+}} = cir.call @noProto1(%{{[0-9]+}}) : (!s32i) -> !s32i
+}
+
+// Declaration without prototype followed by a correct call, then its definition.
+//
+// Call to no-proto is made before definition, so a variadic call that takes anything
+// is created. Later, when the definition is found, no-proto is replaced.
+int noProto2();
+int test2(int x) {
+  return noProto2(x);
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto2 : !cir.ptr<!cir.func<!s32i (!s32i)>>
+  // CHECK:  {{.*}} = cir.call [[GGO]](%{{[0-9]+}}) : (!cir.ptr<!cir.func<!s32i (!s32i)>>, !s32i) -> !s32i
+}
+int noProto2(int x) { return x; }
+// CHECK: cir.func no_proto @noProto2(%arg0: !s32i {{.+}}) -> !s32i
+
+// No-proto declaration without definition (any call here is "correct").
+//
+// Call to no-proto is made before definition, so a variadic call that takes anything
+// is created. Definition is not in the translation unit, so it is left as is.
+int noProto3();
+// cir.func private no_proto @noProto3(...) -> !s32i
+int test3(int x) {
+// CHECK: cir.func @test3
+  return noProto3(x);
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto3 : !cir.ptr<!cir.func<!s32i (...)>>
+  // CHECK:  [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<!s32i (...)>>), !cir.ptr<!cir.func<!s32i (!s32i)>>
+  // CHECK:  {{%.*}} = cir.call [[CAST]](%{{[0-9]+}}) : (!cir.ptr<!cir.func<!s32i (!s32i)>>, !s32i) -> !s32i
+}
+
+
+//===----------------------------------------------------------------------===//
+// UNDEFINED BEHAVIOUR
+//
+// No-proto definitions followed by incorrect calls.
+//===----------------------------------------------------------------------===//
+
+// No-proto definition followed by an incorrect call due to extra args.
+int noProto4() { return 0; }
+// cir.func private no_proto @noProto4() -> !s32i
+int test4(int x) {
+  return noProto4(x); // Even if we know the definition, this should compile.
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto4 : !cir.ptr<!cir.func<!s32i ()>>
+  // CHECK:  [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<!s32i ()>>), !cir.ptr<!cir.func<!s32i (!s32i)>>
+  // CHECK:  {{%.*}} = cir.call [[CAST]]({{%.*}}) : (!cir.ptr<!cir.func<!s32i (!s32i)>>, !s32i) -> !s32i
+}
+
+// No-proto definition followed by an incorrect call due to lack of args.
+int noProto5();
+int test5(int x) {
+  return noProto5();
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto5 : !cir.ptr<!cir.func<!s32i (!s32i)>>
+  // CHECK:  [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<!s32i (!s32i)>>), !cir.ptr<!cir.func<!s32i ()>>
+  // CHECK:  {{%.*}} = cir.call [[CAST]]() : (!cir.ptr<!cir.func<!s32i ()>>) -> !s32i
+}
+int noProto5(int x) { return x; }
+// CHECK: cir.func no_proto @noProto5(%arg0: !s32i {{.+}}) -> !s32i
diff --git a/clang/test/CIR/CodeGen/nrvo.cpp b/clang/test/CIR/CodeGen/nrvo.cpp
new file mode 100644
index 000000000000..7006c3734ff3
--- /dev/null
+++ b/clang/test/CIR/CodeGen/nrvo.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -clangir-disable-emit-cxx-default -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "std-cxx.h"
+
+std::vector<const char*> test_nrvo() {
+  std::vector<const char*> result;
+  result.push_back("Words bend our thinking to infinite paths of self-delusion");
+  return result;
+}
+
+// CHECK: ![[VEC:.*]] = !cir.struct<class "std::vector<const char *>" {!cir.ptr<!cir.ptr<!cir.int<s, 8>>>, !cir.ptr<!cir.ptr<!cir.int<s, 8>>>, !cir.ptr<!cir.ptr<!cir.int<s, 8>>>}>
+
+// CHECK: cir.func @_Z9test_nrvov() -> ![[VEC]]
+// CHECK:   %0 = cir.alloca ![[VEC]], !cir.ptr<![[VEC]]>, ["__retval", init] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["nrvo"] {alignment = 1 : i64}
+// CHECK:   %2 = cir.const #false
+// CHECK:   cir.store %2, %1 : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:   cir.call @_ZNSt6vectorIPKcEC1Ev(%0) : (!cir.ptr<![[VEC]]>) -> ()
+// CHECK:   cir.scope {
+// CHECK:     %5 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["ref.tmp0"] {alignment = 8 : i64}
+// CHECK:     %6 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 59>>
+// CHECK:     %7 = cir.cast(array_to_ptrdecay, %6 : !cir.ptr<!cir.array<!s8i x 59>>), !cir.ptr<!s8i>
+// CHECK:     cir.store %7, %5 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK:     cir.call @_ZNSt6vectorIPKcE9push_backEOS1_(%0, %5) : (!cir.ptr<![[VEC]]>, !cir.ptr<!cir.ptr<!s8i>>) -> ()
+// CHECK:   }
+// CHECK:   %3 = cir.const #true
+// CHECK:   cir.store %3, %1 : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:   %4 = cir.load %0 : !cir.ptr<![[VEC]]>, ![[VEC]]
+// CHECK:   cir.return %4 : ![[VEC]]
+// CHECK: }
diff --git a/clang/test/CIR/CodeGen/null-arithmatic-expression.c b/clang/test/CIR/CodeGen/null-arithmatic-expression.c
new file mode 100644
index 000000000000..62cde494fb58
--- /dev/null
+++ b/clang/test/CIR/CodeGen/null-arithmatic-expression.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+#define NULL ((void *)0)
+
+char *foo() {
+  return (char*)NULL + 1;
+}
+
+// CHECK:  cir.func no_proto @foo()
+// CHECK:    [[CONST_1:%[0-9]+]] = cir.const #cir.int<1> : !s32i
+// CHECK:    {{.*}} = cir.cast(int_to_ptr, [[CONST_1]] : !s32i)
+// CHECK:    cir.return
diff --git a/clang/test/CIR/CodeGen/offsetof.c b/clang/test/CIR/CodeGen/offsetof.c
new file mode 100644
index 000000000000..5cd0d76ff46c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/offsetof.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+#include <stddef.h>
+
+typedef struct {
+  int a;
+  int b;
+} A;
+
+void foo() {
+  offsetof(A, a);
+  offsetof(A, b);
+}
+
+// CHECK:  cir.func no_proto @foo()
+// CHECK:    {{.*}} = cir.const #cir.int<0> : !u64i
+// CHECK:    {{.*}} = cir.const #cir.int<4> : !u64i
+// CHECK:    cir.return
+
diff --git a/clang/test/CIR/CodeGen/opaque.c b/clang/test/CIR/CodeGen/opaque.c
new file mode 100644
index 000000000000..00c11d7c65d1
--- /dev/null
+++ b/clang/test/CIR/CodeGen/opaque.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int foo(int x, short y) {
+  return x ?: y;
+}
+
+// CHECK: cir.func @foo
+// CHECK: %[[Load:.*]] = cir.load
+// CHECK: %[[Bool:.*]] = cir.cast(int_to_bool, %[[Load]] : !s32i), !cir.bool loc(#loc8)
+// CHECK: = cir.ternary(%[[Bool]], true {
+// CHECK:   cir.yield %[[Load]]
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/operators.cpp b/clang/test/CIR/CodeGen/operators.cpp
new file mode 100644
index 000000000000..1d900188f1ce
--- /dev/null
+++ b/clang/test/CIR/CodeGen/operators.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+class __attribute__((__visibility__("default"))) exception_ptr
+{
+    void* __ptr_;
+public:
+    explicit operator bool() const noexcept {return __ptr_ != nullptr;}
+};
+
+// TODO: for now only check that this doesn't crash, in the future check operator
+// bool codegen.
+
+// CHECK: module
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/optnone.cpp b/clang/test/CIR/CodeGen/optnone.cpp
new file mode 100644
index 000000000000..1dbb7892a5ad
--- /dev/null
+++ b/clang/test/CIR/CodeGen/optnone.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -O0 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR-O0
+// RUN: %clang_cc1 -O0 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM-O0
+
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t2.cir
+// RUN: FileCheck --input-file=%t2.cir %s -check-prefix=CIR-O2
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t2.ll
+// RUN: FileCheck --input-file=%t2.ll %s -check-prefix=LLVM-O2
+
+int s0(int a, int b) {
+  int x = a + b;
+  if (x > 0)
+    x = 0;
+  else
+    x = 1;
+  return x;
+}
+
+// CIR-O0: #fn_attr = #cir<extra({inline = #cir.inline<no>, nothrow = #cir.nothrow, optnone = #cir.optnone})>
+// CIR-O0:   cir.func @_Z2s0ii(%arg0:{{.*}}, %arg1:{{.*}} -> {{.*}} extra(#fn_attr)
+
+// CIR-O2-NOT: #fn_attr ={{.*}} optnone
+
+// LLVM-O0: define dso_local i32 @_Z2s0ii(i32 %0, i32 %1) #[[#ATTR:]]
+// LLVM-O0: attributes #[[#ATTR]] = { noinline nounwind optnone }
+// LLVM-O2-NOT: attributes #[[#]] = { noinline nounwind optnone }
diff --git a/clang/test/CIR/CodeGen/packed-structs.c b/clang/test/CIR/CodeGen/packed-structs.c
new file mode 100644
index 000000000000..ea08be405ca5
--- /dev/null
+++ b/clang/test/CIR/CodeGen/packed-structs.c
@@ -0,0 +1,131 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+#pragma pack(1)
+
+typedef struct {
+    int  a0;
+    char a1;    
+} A;
+
+typedef struct {
+    int  b0;
+    char b1;
+    A a[6];    
+} B;
+
+typedef struct {
+    int  c0;
+    char c1;    
+} __attribute__((aligned(2))) C;
+
+
+// CIR: !ty_A = !cir.struct<struct "A" packed {!cir.int<s, 32>, !cir.int<s, 8>}>
+// CIR: !ty_C = !cir.struct<struct "C" packed {!cir.int<s, 32>, !cir.int<s, 8>, !cir.int<u, 8>}>
+// CIR: !ty_D = !cir.struct<struct "D" packed {!cir.int<s, 8>, !cir.int<u, 8>, !cir.int<s, 32>}
+// CIR: !ty_F = !cir.struct<struct "F" packed {!cir.int<s, 64>, !cir.int<s, 8>}
+// CIR: !ty_E = !cir.struct<struct "E" packed {!cir.struct<struct "D" packed {!cir.int<s, 8>, !cir.int<u, 8>, !cir.int<s, 32>}
+// CIR: !ty_G = !cir.struct<struct "G" {!cir.struct<struct "F" packed {!cir.int<s, 64>, !cir.int<s, 8>}
+// CIR: !ty_H = !cir.struct<struct "H" {!cir.int<s, 32>, !cir.struct<union "anon.{{.*}}" {!cir.int<s, 8>, !cir.int<s, 32>}
+// CIR: !ty_B = !cir.struct<struct "B" packed {!cir.int<s, 32>, !cir.int<s, 8>, !cir.array<!cir.struct<struct "A" packed {!cir.int<s, 32>, !cir.int<s, 8>}> x 6>}>
+// CIR: !ty_I = !cir.struct<struct "I" packed {!cir.int<s, 8>, !cir.struct<struct "H" {!cir.int<s, 32>, !cir.struct<union "anon.{{.*}}" {!cir.int<s, 8>, !cir.int<s, 32>}
+// CIR: !ty_J = !cir.struct<struct "J" packed {!cir.int<s, 8>, !cir.int<s, 8>, !cir.int<s, 8>, !cir.int<s, 8>, !cir.struct<struct "I" packed {!cir.int<s, 8>, !cir.struct<struct "H" {!cir.int<s, 32>, !cir.struct<union "anon.{{.*}}" {!cir.int<s, 8>, !cir.int<s, 32>}
+
+// LLVM: %struct.A = type <{ i32, i8 }>
+// LLVM: %struct.B = type <{ i32, i8, [6 x %struct.A] }>
+// LLVM: %struct.C = type <{ i32, i8, i8 }>
+// LLVM: %struct.E = type <{ %struct.D, i32 }>
+// LLVM: %struct.D = type <{ i8, i8, i32 }>
+// LLVM: %struct.G = type { %struct.F, i8 }
+// LLVM: %struct.F = type <{ i64, i8 }>
+// LLVM: %struct.J = type <{ i8, i8, i8, i8, %struct.I, i32 }>
+// LLVM: %struct.I = type <{ i8, %struct.H }>
+// LLVM: %struct.H = type { i32, %union.anon.{{.*}} }
+
+// CIR: cir.func {{.*@foo()}}
+// CIR:  {{.*}} = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["a"] {alignment = 1 : i64}
+// CIR:  {{.*}} = cir.alloca !ty_B, !cir.ptr<!ty_B>, ["b"] {alignment = 1 : i64}
+// CIR:  {{.*}} = cir.alloca !ty_C, !cir.ptr<!ty_C>, ["c"] {alignment = 2 : i64}
+
+// LLVM: {{.*}} = alloca %struct.A, i64 1, align 1
+// LLVM: {{.*}} = alloca %struct.B, i64 1, align 1
+// LLVM: {{.*}} = alloca %struct.C, i64 1, align 2
+void foo() {
+    A a;
+    B b;
+    C c;
+}
+
+#pragma pack(2)
+
+typedef struct {
+    char b;
+    int c;
+} D;
+
+typedef struct {
+    D e;
+    int f;
+} E;
+
+// CIR: cir.func {{.*@f1()}}
+// CIR:  {{.*}} = cir.alloca !ty_E, !cir.ptr<!ty_E>, ["a"] {alignment = 2 : i64}
+
+// LLVM: {{.*}} = alloca %struct.E, i64 1, align 2
+void f1() {
+    E a = {};
+}
+
+#pragma pack(1)
+
+typedef struct {
+    long b;
+    char c;
+} F;
+
+typedef struct {
+    F e;
+    char f;
+} G;
+
+// CIR: cir.func {{.*@f2()}}
+// CIR:  {{.*}} = cir.alloca !ty_G, !cir.ptr<!ty_G>, ["a"] {alignment = 1 : i64}
+
+// LLVM: {{.*}} = alloca %struct.G, i64 1, align 1
+void f2() {
+    G a = {};
+}
+
+#pragma pack(1)
+
+typedef struct {
+    int d0;
+    union {
+        char null;
+        int val;
+    } value;
+} H;
+
+typedef struct {
+    char t;
+    H d;
+} I;
+
+typedef struct {
+    char a0;
+    char a1;
+    char a2;
+    char a3;
+    I c;
+    int a;
+} J;
+
+// CIR: cir.func {{.*@f3()}}
+// CIR:  {{.*}} = cir.alloca !ty_J, !cir.ptr<!ty_J>, ["a"] {alignment = 1 : i64}
+
+// LLVM: {{.*}} = alloca %struct.J, i64 1, align 1
+void f3() {
+    J a = {0};
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/paren-list-init.cpp b/clang/test/CIR/CodeGen/paren-list-init.cpp
new file mode 100644
index 000000000000..20ddefb32a46
--- /dev/null
+++ b/clang/test/CIR/CodeGen/paren-list-init.cpp
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -fexceptions -fcxx-exceptions -triple aarch64-none-linux-android21 -Wno-unused-value -fclangir -emit-cir %s -o %t.eh.cir
+// RUN: FileCheck --check-prefix=CIR_EH --input-file=%t.eh.cir %s
+
+struct Vec {
+  Vec();
+  Vec(Vec&&);
+  ~Vec();
+};
+
+struct S1 {
+  Vec v;
+};
+
+// CIR-DAG: ![[VecType:.*]] = !cir.struct<struct "Vec" {!cir.int<u, 8>}>
+// CIR-DAG: ![[S1:.*]] = !cir.struct<struct "S1" {!cir.struct<struct "Vec" {!cir.int<u, 8>}>}>
+
+// CIR_EH-DAG: ![[VecType:.*]] = !cir.struct<struct "Vec" {!cir.int<u, 8>}>
+// CIR_EH-DAG: ![[S1:.*]] = !cir.struct<struct "S1" {!cir.struct<struct "Vec" {!cir.int<u, 8>}>}>
+
+template <int I>
+void make1() {
+  Vec v;
+  S1((Vec&&) v);
+// CIR: cir.func linkonce_odr  @_Z5make1ILi0EEvv()
+// CIR:   %[[VEC:.*]] = cir.alloca ![[VecType]], !cir.ptr<![[VecType]]>
+// CIR:   cir.call @_ZN3VecC1Ev(%[[VEC]]) : (!cir.ptr<![[VecType]]>)
+// CIR:   cir.scope {
+// CIR:     %[[AGG_TMP:.*]] = cir.alloca ![[S1]], !cir.ptr<![[S1]]>, ["agg.tmp.ensured"]
+// CIR:     %[[FIELD:.*]] = cir.get_member %[[AGG_TMP]][0] {name = "v"} : !cir.ptr<![[S1]]> -> !cir.ptr<![[VecType]]>
+// CIR:     cir.call @_ZN3VecC1EOS_(%[[FIELD]], %[[VEC]]) : (!cir.ptr<![[VecType]]>, !cir.ptr<![[VecType]]>) -> ()
+// CIR:     cir.call @_ZN2S1D1Ev(%[[AGG_TMP]]) : (!cir.ptr<![[S1]]>) -> ()
+// CIR:   }
+// CIR:   cir.call @_ZN3VecD1Ev(%[[VEC]]) : (!cir.ptr<![[VecType]]>) -> ()
+// CIR:   cir.return
+
+// CIR_EH: cir.func linkonce_odr  @_Z5make1ILi0EEvv()
+// CIR_EH:  %[[VEC:.*]] = cir.alloca ![[VecType]], !cir.ptr<![[VecType]]>, ["v", init]
+
+// Construct v
+// CIR_EH:  cir.call @_ZN3VecC1Ev(%[[VEC]]) : (!cir.ptr<![[VecType]]>) -> ()
+// CIR_EH:  cir.scope {
+// CIR_EH:    %1 = cir.alloca ![[S1]], !cir.ptr<![[S1]]>, ["agg.tmp.ensured"]
+// CIR_EH:    %2 = cir.get_member %1[0] {name = "v"} : !cir.ptr<![[S1]]> -> !cir.ptr<![[VecType]]>
+// CIR_EH:    cir.try synthetic cleanup {
+
+// Call v move ctor
+// CIR_EH:      cir.call exception @_ZN3VecC1EOS_{{.*}} cleanup {
+
+// Destroy v after v move ctor throws
+// CIR_EH:        cir.call @_ZN3VecD1Ev(%[[VEC]])
+// CIR_EH:        cir.yield
+// CIR_EH:      }
+// CIR_EH:      cir.yield
+// CIR_EH:    } catch [#cir.unwind {
+// CIR_EH:      cir.resume
+// CIR_EH:    }]
+// CIR_EH:    cir.call @_ZN2S1D1Ev(%1) : (!cir.ptr<![[S1]]>) -> ()
+// CIR_EH:  }
+
+// Destroy v after successful cir.try
+// CIR_EH:  cir.call @_ZN3VecD1Ev(%[[VEC]]) : (!cir.ptr<![[VecType]]>) -> ()
+// CIR_EH:  cir.return
+}
+
+void foo() {
+  make1<0>();
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/pass-object-size.c b/clang/test/CIR/CodeGen/pass-object-size.c
new file mode 100644
index 000000000000..67935a6f867c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/pass-object-size.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+void b(void *__attribute__((pass_object_size(0))));
+void e(void *__attribute__((pass_object_size(2))));
+void c() {
+  int a;
+  int d[a];
+  b(d);
+  e(d);
+}
+
+// CIR: cir.func no_proto @c()
+// CIR: [[TMP0:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, %{{[0-9]+}} : !u64i, ["vla"] {alignment = 16 : i64}
+// CIR: [[TMP1:%.*]] = cir.cast(bitcast, [[TMP0]] : !cir.ptr<!s32i>), !cir.ptr<!void>
+// CIR-NEXT: [[TMP2:%.*]] = cir.objsize([[TMP1]] : <!void>, max) -> !u64i
+// CIR-NEXT: cir.call @b([[TMP1]], [[TMP2]]) : (!cir.ptr<!void>, !u64i) -> ()
+// CIR: [[TMP3:%.*]] = cir.cast(bitcast, [[TMP0]] : !cir.ptr<!s32i>), !cir.ptr<!void>
+// CIR: [[TMP4:%.*]] = cir.objsize([[TMP3]] : <!void>, min) -> !u64i
+// CIR-NEXT: cir.call @e([[TMP3]], [[TMP4]]) : (!cir.ptr<!void>, !u64i) -> ()
+
+// LLVM: define dso_local void @c()
+// LLVM: [[TMP0:%.*]] = alloca i32, i64 %{{[0-9]+}},
+// LLVM: [[TMP1:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[TMP0]], i1 false, i1 true, i1 false),
+// LLVM-NEXT: call void @b(ptr [[TMP0]], i64 [[TMP1]])
+// LLVM: [[TMP2:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[TMP0]], i1 true, i1 true, i1 false),
+// LLVM-NEXT: call void @e(ptr [[TMP0]], i64 [[TMP2]])
diff --git a/clang/test/CIR/CodeGen/pointer-arith-ext.c b/clang/test/CIR/CodeGen/pointer-arith-ext.c
new file mode 100644
index 000000000000..558ad823cae4
--- /dev/null
+++ b/clang/test/CIR/CodeGen/pointer-arith-ext.c
@@ -0,0 +1,124 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-int-conversions -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-int-conversions -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// GNU extensions
+typedef void (*FP)(void);
+void *f2(void *a, int b) { return a + b; }
+// CIR-LABEL: f2
+// CIR: %[[PTR:.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR: %[[STRIDE:.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: cir.ptr_stride(%[[PTR]] : !cir.ptr<!void>, %[[STRIDE]] : !s32i)
+
+// LLVM-LABEL: f2
+// LLVM: %[[PTR:.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM: %[[TOEXT:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: %[[STRIDE:.*]] = sext i32 %[[TOEXT]] to i64
+// LLVM: getelementptr i8, ptr %[[PTR]], i64 %[[STRIDE]]
+
+// These test the same paths above, just make sure it does not crash.
+void *f2_0(void *a, int b) { return &a[b]; }
+void *f2_1(void *a, int b) { return (a += b); }
+void *f3(int a, void *b) { return a + b; }
+
+void *f3_1(int a, void *b) { return (a += b); }
+// CIR-LABEL: @f3_1
+// CIR: %[[NEW_PTR:.*]] = cir.ptr_stride
+// CIR: cir.cast(ptr_to_int, %[[NEW_PTR]] : !cir.ptr<!void>), !s32i
+
+// LLVM-LABEL: @f3_1
+// LLVM: %[[NEW_PTR:.*]] = getelementptr
+// LLVM: ptrtoint ptr %[[NEW_PTR]] to i32
+
+void *f4(void *a, int b) { return a - b; }
+// CIR-LABEL: f4
+// CIR: %[[PTR:.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR: %[[STRIDE:.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: %[[SUB:.*]] = cir.unary(minus, %[[STRIDE]]) : !s32i, !s32i
+// CIR: cir.ptr_stride(%[[PTR]] : !cir.ptr<!void>, %[[SUB]] : !s32i)
+
+// LLVM-LABEL: f4
+// LLVM: %[[PTR:.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM: %[[TOEXT:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: %[[STRIDE:.*]] = sext i32 %[[TOEXT]] to i64
+// LLVM: %[[SUB:.*]] = sub i64 0, %[[STRIDE]]
+// LLVM: getelementptr i8, ptr %[[PTR]], i64 %[[SUB]]
+
+// Similar to f4, just make sure it does not crash.
+void *f4_1(void *a, int b) { return (a -= b); }
+
+FP f5(FP a, int b) { return a + b; }
+// CIR-LABEL: f5
+// CIR: %[[PTR:.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!cir.func<!void ()>>>, !cir.ptr<!cir.func<!void ()>>
+// CIR: %[[STRIDE:.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: cir.ptr_stride(%[[PTR]] : !cir.ptr<!cir.func<!void ()>>, %[[STRIDE]] : !s32i)
+
+// LLVM-LABEL: f5
+// LLVM: %[[PTR:.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM: %[[TOEXT:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: %[[STRIDE:.*]] = sext i32 %[[TOEXT]] to i64
+// LLVM: getelementptr i8, ptr %[[PTR]], i64 %[[STRIDE]]
+
+// These test the same paths above, just make sure it does not crash.
+FP f5_1(FP a, int b) { return (a += b); }
+FP f6(int a, FP b) { return a + b; }
+FP f6_1(int a, FP b) { return (a += b); }
+
+FP f7(FP a, int b) { return a - b; }
+// CIR-LABEL: f7
+// CIR: %[[PTR:.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!cir.func<!void ()>>>, !cir.ptr<!cir.func<!void ()>>
+// CIR: %[[STRIDE:.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: %[[SUB:.*]] = cir.unary(minus, %[[STRIDE]]) : !s32i, !s32i
+// CIR: cir.ptr_stride(%[[PTR]] : !cir.ptr<!cir.func<!void ()>>, %[[SUB]] : !s32i)
+
+// LLVM-LABEL: f7
+// LLVM: %[[PTR:.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM: %[[TOEXT:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: %[[STRIDE:.*]] = sext i32 %[[TOEXT]] to i64
+// LLVM: %[[SUB:.*]] = sub i64 0, %[[STRIDE]]
+// LLVM: getelementptr i8, ptr %[[PTR]], i64 %[[SUB]]
+
+// Similar to f7, just make sure it does not crash.
+FP f7_1(FP a, int b) { return (a -= b); }
+
+void f8(void *a, int b) { return *(a + b); }
+// CIR-LABEL: f8
+// CIR: %[[PTR:.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR: %[[STRIDE:.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: cir.ptr_stride(%[[PTR]] : !cir.ptr<!void>, %[[STRIDE]] : !s32i)
+// CIR: cir.return
+
+// LLVM-LABEL: f8
+// LLVM: %[[PTR:.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM: %[[TOEXT:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: %[[STRIDE:.*]] = sext i32 %[[TOEXT]] to i64
+// LLVM: getelementptr i8, ptr %[[PTR]], i64 %[[STRIDE]]
+// LLVM: ret void
+
+void f8_1(void *a, int b) { return a[b]; }
+// CIR-LABEL: f8_1
+// CIR: %[[PTR:.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR: %[[STRIDE:.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: cir.ptr_stride(%[[PTR]] : !cir.ptr<!void>, %[[STRIDE]] : !s32i)
+// CIR: cir.return
+
+// LLVM-LABEL: f8_1
+// LLVM: %[[PTR:.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM: %[[TOEXT:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: %[[STRIDE:.*]] = sext i32 %[[TOEXT]] to i64
+// LLVM: getelementptr i8, ptr %[[PTR]], i64 %[[STRIDE]]
+// LLVM: ret void
+
+unsigned char *p(unsigned int x) {
+  unsigned char *p;
+  p += 16-x;
+  return p;
+}
+
+// CIR-LABEL: @p
+// CIR: %[[SUB:.*]] = cir.binop(sub
+// CIR: cir.ptr_stride({{.*}} : !cir.ptr<!u8i>, %[[SUB]] : !u32i), !cir.ptr<!u8i>
+
+// LLVM-LABEL: @p
+// LLVM: getelementptr i8, ptr {{.*}}
diff --git a/clang/test/CIR/CodeGen/pointer-to-data-member.cpp b/clang/test/CIR/CodeGen/pointer-to-data-member.cpp
new file mode 100644
index 000000000000..6fa8901333ba
--- /dev/null
+++ b/clang/test/CIR/CodeGen/pointer-to-data-member.cpp
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct Point {
+  int x;
+  int y;
+  int z;
+};
+// CHECK-DAG: !ty_Point = !cir.struct<struct "Point" {!cir.int<s, 32>, !cir.int<s, 32>, !cir.int<s, 32>}
+
+struct Incomplete;
+// CHECK-DAG: !ty_Incomplete = !cir.struct<struct "Incomplete" incomplete>
+
+int Point::*pt_member = &Point::x;
+// CHECK: cir.global external @pt_member = #cir.data_member<0> : !cir.data_member<!s32i in !ty_Point>
+
+auto test1() -> int Point::* {
+  return &Point::y;
+}
+// CHECK: cir.func @_Z5test1v() -> !cir.data_member<!s32i in !ty_Point>
+// CHECK:   %{{.+}} = cir.const #cir.data_member<1> : !cir.data_member<!s32i in !ty_Point>
+// CHECK: }
+
+int test2(const Point &pt, int Point::*member) {
+  return pt.*member;
+}
+// CHECK: cir.func @_Z5test2RK5PointMS_i
+// CHECK:   %{{.+}} = cir.get_runtime_member %{{.+}}[%{{.+}} : !cir.data_member<!s32i in !ty_Point>] : !cir.ptr<!ty_Point> -> !cir.ptr<!s32i>
+// CHECK: }
+
+int test3(const Point *pt, int Point::*member) {
+  return pt->*member;
+}
+// CHECK: cir.func @_Z5test3PK5PointMS_i
+// CHECK:   %{{.+}} = cir.get_runtime_member %{{.+}}[%{{.+}} : !cir.data_member<!s32i in !ty_Point>] : !cir.ptr<!ty_Point> -> !cir.ptr<!s32i>
+// CHECK: }
+
+auto test4(int Incomplete::*member) -> int Incomplete::* {
+  return member;
+}
+// CHECK: cir.func @_Z5test4M10Incompletei(%arg0: !cir.data_member<!s32i in !ty_Incomplete> loc({{.+}})) -> !cir.data_member<!s32i in !ty_Incomplete>
+
+int test5(Incomplete *ic, int Incomplete::*member) {
+  return ic->*member;
+}
+// CHECK: cir.func @_Z5test5P10IncompleteMS_i
+// CHECK: %{{.+}} = cir.get_runtime_member %{{.+}}[%{{.+}} : !cir.data_member<!s32i in !ty_Incomplete>] : !cir.ptr<!ty_Incomplete> -> !cir.ptr<!s32i>
+// CHECK: }
+
+auto test_null() -> int Point::* {
+  return nullptr;
+}
+// CHECK: cir.func @_Z9test_nullv
+// CHECK:   %{{.+}} = cir.const #cir.data_member<null> : !cir.data_member<!s32i in !ty_Point>
+// CHECK: }
+
+auto test_null_incomplete() -> int Incomplete::* {
+  return nullptr;
+}
+// CHECK: cir.func @_Z20test_null_incompletev
+// CHECK:   %{{.+}} = cir.const #cir.data_member<null> : !cir.data_member<!s32i in !ty_Incomplete>
+// CHECK: }
diff --git a/clang/test/CIR/CodeGen/pointer-to-member-func.cpp b/clang/test/CIR/CodeGen/pointer-to-member-func.cpp
new file mode 100644
index 000000000000..6f8b3363bfa3
--- /dev/null
+++ b/clang/test/CIR/CodeGen/pointer-to-member-func.cpp
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct Foo {
+  void m1(int);
+  virtual void m2(int);
+  virtual void m3(int);
+};
+
+auto make_non_virtual() -> void (Foo::*)(int) {
+  return &Foo::m1;
+}
+
+// CHECK-LABEL: cir.func @_Z16make_non_virtualv() -> !cir.method<!cir.func<!void (!s32i)> in !ty_Foo>
+//       CHECK:   %{{.+}} = cir.const #cir.method<@_ZN3Foo2m1Ei> : !cir.method<!cir.func<!void (!s32i)> in !ty_Foo>
+//       CHECK: }
+
+auto make_virtual() -> void (Foo::*)(int) {
+  return &Foo::m3;
+}
+
+// CHECK-LABEL: cir.func @_Z12make_virtualv() -> !cir.method<!cir.func<!void (!s32i)> in !ty_Foo>
+//       CHECK:   %{{.+}} = cir.const #cir.method<vtable_offset = 8> : !cir.method<!cir.func<!void (!s32i)> in !ty_Foo>
+//       CHECK: }
+
+auto make_null() -> void (Foo::*)(int) {
+  return nullptr;
+}
+
+// CHECK-LABEL: cir.func @_Z9make_nullv() -> !cir.method<!cir.func<!void (!s32i)> in !ty_Foo>
+//       CHECK:   %{{.+}} = cir.const #cir.method<null> : !cir.method<!cir.func<!void (!s32i)> in !ty_Foo>
+//       CHECK: }
+
+void call(Foo *obj, void (Foo::*func)(int), int arg) {
+  (obj->*func)(arg);
+}
+
+// CHECK-LABEL: cir.func @_Z4callP3FooMS_FviEi
+//       CHECK:   %[[CALLEE:.+]], %[[THIS:.+]] = cir.get_method %{{.+}}, %{{.+}} : (!cir.method<!cir.func<!void (!s32i)> in !ty_Foo>, !cir.ptr<!ty_Foo>) -> (!cir.ptr<!cir.func<!void (!cir.ptr<!void>, !s32i)>>, !cir.ptr<!void>)
+//  CHECK-NEXT:   %[[#ARG:]] = cir.load %{{.+}} : !cir.ptr<!s32i>, !s32i
+//  CHECK-NEXT:   cir.call %[[CALLEE]](%[[THIS]], %[[#ARG]]) : (!cir.ptr<!cir.func<!void (!cir.ptr<!void>, !s32i)>>, !cir.ptr<!void>, !s32i) -> ()
+//       CHECK: }
diff --git a/clang/test/CIR/CodeGen/pointer.cpp b/clang/test/CIR/CodeGen/pointer.cpp
new file mode 100644
index 000000000000..bdf0e2103192
--- /dev/null
+++ b/clang/test/CIR/CodeGen/pointer.cpp
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Global pointer should be zero initialized by default.
+int *ptr;
+// CHECK: cir.global external @ptr = #cir.ptr<null> : !cir.ptr<!s32i>
diff --git a/clang/test/CIR/CodeGen/pointers.cpp b/clang/test/CIR/CodeGen/pointers.cpp
new file mode 100644
index 000000000000..dfea22ebadd6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/pointers.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Should generate basic pointer arithmetics.
+void foo(int *iptr, char *cptr, unsigned ustride) {
+  iptr + 2;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
+  cptr + 3;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s8i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s8i>
+  iptr - 2;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s32i>
+  cptr - 3;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i
+  // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s8i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s8i>
+  iptr + ustride;
+  // CHECK: %[[#STRIDE:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !u32i), !cir.ptr<!s32i>
+
+  // Must convert unsigned stride to a signed one.
+  iptr - ustride;
+  // CHECK: %[[#STRIDE:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+  // CHECK: %[[#SIGNSTRIDE:]] = cir.cast(integral, %[[#STRIDE]] : !u32i), !s32i
+  // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#SIGNSTRIDE]]) : !s32i, !s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s32i>
+}
+
+void testPointerSubscriptAccess(int *ptr) {
+// CHECK: testPointerSubscriptAccess
+  ptr[1];
+  // CHECK: %[[#V1:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CHECK: %[[#V2:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: cir.ptr_stride(%[[#V1]] : !cir.ptr<!s32i>, %[[#V2]] : !s32i), !cir.ptr<!s32i>
+}
+
+void testPointerMultiDimSubscriptAccess(int **ptr) {
+// CHECK: testPointerMultiDimSubscriptAccess
+  ptr[1][2];
+  // CHECK: %[[#V1:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>, !cir.ptr<!cir.ptr<!s32i>>
+  // CHECK: %[[#V2:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: %[[#V3:]] = cir.ptr_stride(%[[#V1]] : !cir.ptr<!cir.ptr<!s32i>>, %[[#V2]] : !s32i), !cir.ptr<!cir.ptr<!s32i>>
+  // CHECK: %[[#V4:]] = cir.load %[[#V3]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CHECK: %[[#V5:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: cir.ptr_stride(%[[#V4]] : !cir.ptr<!s32i>, %[[#V5]] : !s32i), !cir.ptr<!s32i>
+}
diff --git a/clang/test/CIR/CodeGen/pred-info-builtins.c b/clang/test/CIR/CodeGen/pred-info-builtins.c
new file mode 100644
index 000000000000..263274890e34
--- /dev/null
+++ b/clang/test/CIR/CodeGen/pred-info-builtins.c
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -O0 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR-O0
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR-O2
+
+extern void __attribute__((noinline)) bar(void);
+
+void expect(int x) {
+  if (__builtin_expect(x, 0))
+    bar();
+}
+// CIR-O0: cir.func @expect
+// CIR-O0:   cir.if {{%.*}} {
+// CIR-O0:     cir.call @bar() : () -> ()
+
+// CIR-O2: cir.func @expect
+// CIR-O2:   [[EXPECT:%.*]] = cir.expect({{.*}}, {{.*}}) : !s64i
+// CIR-O2:   [[EXPECT_BOOL:%.*]] = cir.cast(int_to_bool, [[EXPECT]] : !s64i), !cir.bool
+// CIR-O2:   cir.if [[EXPECT_BOOL]]
+// CIR-O2:     cir.call @bar() : () -> ()
+
+void expect_with_probability(int x) {
+  if (__builtin_expect_with_probability(x, 1, 0.8))
+    bar();
+}
+// CIR-O0: cir.func @expect_with_probability
+// CIR-O0:   cir.if {{%.*}} {
+// CIR-O0:     cir.call @bar() : () -> ()
+
+// CIR-O2:  cir.func @expect_with_probability
+// CIR-O2:    [[EXPECT:%.*]] = cir.expect({{.*}}, {{.*}}, 8.000000e-01) : !s64i
+// CIR-O2:    [[EXPECT_BOOL:%.*]] = cir.cast(int_to_bool, [[EXPECT]] : !s64i), !cir.bool
+// CIR-O2:    cir.if [[EXPECT_BOOL]]
+// CIR-O2:      cir.call @bar() : () -> ()
+
+void unpredictable(int x) {
+  if (__builtin_unpredictable(x > 1))
+    bar();
+// CIR-O0: cir.func @unpredictable
+// CIR-O0:   cir.if {{%.*}} {
+// CIR-O0:     cir.call @bar() : () -> ()
+}
diff --git a/clang/test/CIR/CodeGen/predefined.cpp b/clang/test/CIR/CodeGen/predefined.cpp
new file mode 100644
index 000000000000..b5ec86d41aff
--- /dev/null
+++ b/clang/test/CIR/CodeGen/predefined.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+extern "C" {
+  void __assert2(const char* __file, int __line, const char* __function, const char* __msg) __attribute__((__noreturn__));
+}
+
+void m() {
+  __assert2("yo.cpp", 79, __PRETTY_FUNCTION__, "doom");
+}
+
+// CHECK: cir.func @_Z1mv()
+// CHECK:     %0 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 7>>
+// CHECK:     %1 = cir.cast(array_to_ptrdecay, %0 : !cir.ptr<!cir.array<!s8i x 7>>), !cir.ptr<!s8i>
+// CHECK:     %2 = cir.const #cir.int<79> : !s32i
+// CHECK:     %3 = cir.get_global @".str1" : !cir.ptr<!cir.array<!s8i x 9>>
+// CHECK:     %4 = cir.cast(array_to_ptrdecay, %3 : !cir.ptr<!cir.array<!s8i x 9>>), !cir.ptr<!s8i>
+// CHECK:     %5 = cir.get_global @".str2" : !cir.ptr<!cir.array<!s8i x 5>>
+// CHECK:     %6 = cir.cast(array_to_ptrdecay, %5 : !cir.ptr<!cir.array<!s8i x 5>>), !cir.ptr<!s8i>
+// CHECK:     cir.call @__assert2(%1, %2, %4, %6) : (!cir.ptr<!s8i>, !s32i, !cir.ptr<!s8i>, !cir.ptr<!s8i>) -> ()
+// CHECK:     cir.return
+// CHECK:   }
diff --git a/clang/test/CIR/CodeGen/ptrdiff.c b/clang/test/CIR/CodeGen/ptrdiff.c
new file mode 100644
index 000000000000..1a937d5f4272
--- /dev/null
+++ b/clang/test/CIR/CodeGen/ptrdiff.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+int addrcmp(const void* a, const void* b) {
+  // CIR-LABEL: addrcmp
+  // CIR: %[[R:.*]] = cir.ptr_diff
+  // CIR: cir.cast(integral,  %[[R]] : !s64i), !s32
+
+  // LLVM-LABEL: addrcmp
+  // LLVM: %[[PTR_A:.*]] = ptrtoint ptr {{.*}} to i64
+  // LLVM: %[[PTR_B:.*]] = ptrtoint ptr {{.*}} to i64
+  // LLVM: %[[SUB:.*]] = sub i64 %[[PTR_A]], %[[PTR_B]]
+  // LLVM-NOT: sdiv
+  // LLVM: trunc i64 %[[SUB]] to i32
+  return *(const void**)a - *(const void**)b;
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/ptrdiff.cpp b/clang/test/CIR/CodeGen/ptrdiff.cpp
new file mode 100644
index 000000000000..e322c9c6388a
--- /dev/null
+++ b/clang/test/CIR/CodeGen/ptrdiff.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef unsigned long size_type;
+size_type size(unsigned long *_start, unsigned long *_finish) {
+  return static_cast<size_type>(_finish - _start);
+}
+
+// CHECK: cir.func @_Z4sizePmS_(%arg0: !cir.ptr<!u64i>
+// CHECK:   %3 = cir.load %1 : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK:   %4 = cir.load %0 : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK:   %5 = cir.ptr_diff(%3, %4) : !cir.ptr<!u64i> -> !s64i
+// CHECK:   %6 = cir.cast(integral, %5 : !s64i), !u64i
+
+long add(char *a, char *b) {
+  return a - b + 1;
+}
+
+// CHECK: cir.func @_Z3addPcS_(%arg0: !cir.ptr<!s8i>
+//          %5 = cir.ptr_diff(%3, %4) : !cir.ptr<!s8i> -> !s64i
+//          %6 = cir.const #cir.int<1> : !s32i
+//          %7 = cir.cast(integral, %6 : !s32i), !s64i
+//          %8 = cir.binop(add, %5, %7) : !s64i
+
diff --git a/clang/test/CIR/CodeGen/rangefor.cpp b/clang/test/CIR/CodeGen/rangefor.cpp
new file mode 100644
index 000000000000..5dc724583506
--- /dev/null
+++ b/clang/test/CIR/CodeGen/rangefor.cpp
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -clangir-disable-emit-cxx-default -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "std-cxx.h"
+
+typedef enum enumy {
+  Unknown = 0,
+  Some = 1000024002,
+} enumy;
+
+typedef struct triple {
+  enumy type;
+  void* __attribute__((__may_alias__)) next;
+  unsigned image;
+} triple;
+
+void init(unsigned numImages) {
+  std::vector<triple> images(numImages);
+  for (auto& image : images) {
+    image = {Some};
+  }
+}
+
+// CHECK-DAG: !ty_triple = !cir.struct<struct "triple" {!cir.int<u, 32>, !cir.ptr<!cir.void>, !cir.int<u, 32>}>
+// CHECK-DAG: ![[VEC:.*]] = !cir.struct<class "std::vector<triple>" {!cir.ptr<!cir.struct<struct "triple" {!cir.int<u, 32>, !cir.ptr<!cir.void>, !cir.int<u, 32>}>>, !cir.ptr<!cir.struct<struct "triple" {!cir.int<u, 32>, !cir.ptr<!cir.void>, !cir.int<u, 32>}>>, !cir.ptr<!cir.struct<struct "triple" {!cir.int<u, 32>, !cir.ptr<!cir.void>, !cir.int<u, 32>}>>}>
+// CHECK-DAG: ![[VEC_IT:.*]] = !cir.struct<struct "__vector_iterator<triple, triple *, triple &>" {!cir.ptr<!cir.struct<struct "triple" {!cir.int<u, 32>, !cir.ptr<!cir.void>, !cir.int<u, 32>}>
+
+// CHECK: cir.func @_Z4initj(%arg0: !u32i
+// CHECK:   %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["numImages", init] {alignment = 4 : i64}
+// CHECK:   %1 = cir.alloca ![[VEC]], !cir.ptr<![[VEC]]>, ["images", init] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %0 : !u32i, !cir.ptr<!u32i>
+// CHECK:   %2 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+// CHECK:   %3 = cir.cast(integral, %2 : !u32i), !u64i
+// CHECK:   cir.call @_ZNSt6vectorI6tripleEC1Em(%1, %3) : (!cir.ptr<![[VEC]]>, !u64i) -> ()
+// CHECK:   cir.scope {
+// CHECK:     %4 = cir.alloca !cir.ptr<![[VEC]]>, !cir.ptr<!cir.ptr<![[VEC]]>>, ["__range1", init] {alignment = 8 : i64}
+// CHECK:     %5 = cir.alloca ![[VEC_IT]], !cir.ptr<![[VEC_IT]]>, ["__begin1", init] {alignment = 8 : i64}
+// CHECK:     %6 = cir.alloca ![[VEC_IT]], !cir.ptr<![[VEC_IT]]>, ["__end1", init] {alignment = 8 : i64}
+// CHECK:     %7 = cir.alloca !cir.ptr<!ty_triple>, !cir.ptr<!cir.ptr<!ty_triple>>, ["image", init] {alignment = 8 : i64}
+// CHECK:     cir.store %1, %4 : !cir.ptr<![[VEC]]>, !cir.ptr<!cir.ptr<![[VEC]]>>
+// CHECK:     %8 = cir.load %4 : !cir.ptr<!cir.ptr<![[VEC]]>>, !cir.ptr<![[VEC]]>
+// CHECK:     %9 = cir.call @_ZNSt6vectorI6tripleE5beginEv(%8) : (!cir.ptr<![[VEC]]>) -> ![[VEC_IT]]
+// CHECK:     cir.store %9, %5 : ![[VEC_IT]], !cir.ptr<![[VEC_IT]]>
+// CHECK:     %10 = cir.load %4 : !cir.ptr<!cir.ptr<![[VEC]]>>, !cir.ptr<![[VEC]]>
+// CHECK:     %11 = cir.call @_ZNSt6vectorI6tripleE3endEv(%10) : (!cir.ptr<![[VEC]]>) -> ![[VEC_IT]]
+// CHECK:     cir.store %11, %6 : ![[VEC_IT]], !cir.ptr<![[VEC_IT]]>
+// CHECK:     cir.for : cond {
+// CHECK:       %12 = cir.call @_ZNK17__vector_iteratorI6triplePS0_RS0_EneERKS3_(%5, %6) : (!cir.ptr<![[VEC_IT]]>, !cir.ptr<![[VEC_IT]]>) -> !cir.bool
+// CHECK:       cir.condition(%12)
+// CHECK:     } body {
+// CHECK:       %12 = cir.call @_ZNK17__vector_iteratorI6triplePS0_RS0_EdeEv(%5) : (!cir.ptr<![[VEC_IT]]>) -> !cir.ptr<!ty_triple>
+// CHECK:       cir.store %12, %7 : !cir.ptr<!ty_triple>, !cir.ptr<!cir.ptr<!ty_triple>>
+// CHECK:       cir.scope {
+// CHECK:         %13 = cir.alloca !ty_triple, !cir.ptr<!ty_triple>, ["ref.tmp0"] {alignment = 8 : i64}
+// CHECK:         %14 = cir.const #cir.zero : !ty_triple
+// CHECK:         cir.store %14, %13 : !ty_triple, !cir.ptr<!ty_triple>
+// CHECK:         %15 = cir.get_member %13[0] {name = "type"} : !cir.ptr<!ty_triple> -> !cir.ptr<!u32i>
+// CHECK:         %16 = cir.const #cir.int<1000024002> : !u32i
+// CHECK:         cir.store %16, %15 : !u32i, !cir.ptr<!u32i>
+// CHECK:         %17 = cir.get_member %13[1] {name = "next"} : !cir.ptr<!ty_triple> -> !cir.ptr<!cir.ptr<!void>>
+// CHECK:         %18 = cir.get_member %13[2] {name = "image"} : !cir.ptr<!ty_triple> -> !cir.ptr<!u32i>
+// CHECK:         %19 = cir.load %7 : !cir.ptr<!cir.ptr<!ty_triple>>, !cir.ptr<!ty_triple>
+// CHECK:         %20 = cir.call @_ZN6tripleaSEOS_(%19, %13) : (!cir.ptr<!ty_triple>, !cir.ptr<!ty_triple>) -> !cir.ptr<!ty_triple>
+// CHECK:       }
+// CHECK:       cir.yield
+// CHECK:     } step {
+// CHECK:       %12 = cir.call @_ZN17__vector_iteratorI6triplePS0_RS0_EppEv(%5) : (!cir.ptr<![[VEC_IT]]>) -> !cir.ptr<![[VEC_IT]]>
+// CHECK:       cir.yield
+// CHECK:     }
+// CHECK:   }
+// CHECK:   cir.return
diff --git a/clang/test/CIR/CodeGen/return.cpp b/clang/test/CIR/CodeGen/return.cpp
new file mode 100644
index 000000000000..8391e647d46b
--- /dev/null
+++ b/clang/test/CIR/CodeGen/return.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+int &ret0(int &x) { 
+  return x;
+}
+
+// CHECK: cir.func @_Z4ret0Ri
+// CHECK:   %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["x", init] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["__retval"] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:   %2 = cir.load %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:   cir.store %2, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:   %3 = cir.load %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:   cir.return %3 : !cir.ptr<!s32i>
+
+int unreachable_after_return() {
+  return 0;
+  return 1;
+}
+
+// CHECK: cir.func @_Z24unreachable_after_returnv
+// CHECK-NEXT:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:   %1 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:   cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.br ^bb1
+// CHECK-NEXT: ^bb1:  // 2 preds: ^bb0, ^bb2
+// CHECK-NEXT:   %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   cir.return %2 : !s32i
+// CHECK-NEXT: ^bb2:  // no predecessors
+// CHECK-NEXT:   %3 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.br ^bb1
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/CodeGen/shift.cpp b/clang/test/CIR/CodeGen/shift.cpp
new file mode 100644
index 000000000000..6f6a10d34ab0
--- /dev/null
+++ b/clang/test/CIR/CodeGen/shift.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+unsigned long s(int i, unsigned long x) {
+  return x << i;
+}
+
+// CHECK: cir.shift(left, %3 : !u64i, %4 : !s32i) -> !u64i
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/skip-functions-from-system-headers.cpp b/clang/test/CIR/CodeGen/skip-functions-from-system-headers.cpp
new file mode 100644
index 000000000000..f48602b87676
--- /dev/null
+++ b/clang/test/CIR/CodeGen/skip-functions-from-system-headers.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fclangir-skip-system-headers -I%S/../Inputs %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "skip-this-header.h"
+
+void test() {
+  String s1{};
+  String s2{1};
+  String s3{"abcdefghijklmnop"};
+}
+
+// CHECK-NOT: cir.func linkonce_odr @_ZN6StringC2Ev
+// CHECK-NOT: cir.func linkonce_odr @_ZN6StringC2Ei
+// CHECK-NOT: cir.func linkonce_odr @_ZN6StringC2EPKc
+// CHECK-NOT: cir.func linkonce_odr @_ZN6StringC1EPKc
+
+// CHECK: cir.func @_Z4testv()
+// CHECK:   cir.call @_ZN6StringC1Ev(%0) : (!cir.ptr<!ty_String>) -> ()
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/sourcelocation.cpp b/clang/test/CIR/CodeGen/sourcelocation.cpp
new file mode 100644
index 000000000000..97ee16aa3cde
--- /dev/null
+++ b/clang/test/CIR/CodeGen/sourcelocation.cpp
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+int s0(int a, int b) {
+  int x = a + b;
+  if (x > 0)
+    x = 0;
+  else
+    x = 1;
+  return x;
+}
+
+// CIR: #loc3 = loc("{{.*}}sourcelocation.cpp":6:8)
+// CIR: #loc4 = loc("{{.*}}sourcelocation.cpp":6:12)
+// CIR: #loc5 = loc("{{.*}}sourcelocation.cpp":6:15)
+// CIR: #loc6 = loc("{{.*}}sourcelocation.cpp":6:19)
+// CIR: #loc21 = loc(fused[#loc3, #loc4])
+// CIR: #loc22 = loc(fused[#loc5, #loc6])
+// CIR: module @"{{.*}}sourcelocation.cpp" attributes {cir.lang = #cir.lang<cxx>, cir.sob = #cir.signed_overflow_behavior<undefined>
+// CIR:   cir.func @_Z2s0ii(%arg0: !s32i loc(fused[#loc3, #loc4]), %arg1: !s32i loc(fused[#loc5, #loc6])) -> !s32i
+// CIR:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64} loc(#loc21)
+// CIR:     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64} loc(#loc22)
+// CIR:     %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64} loc(#loc2)
+// CIR:     %3 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64} loc(#loc23)
+// CIR:     cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i> loc(#loc9)
+// CIR:     cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i> loc(#loc9)
+// CIR:     %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i loc(#loc10)
+// CIR:     %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i loc(#loc8)
+// CIR:     %6 = cir.binop(add, %4, %5) nsw : !s32i loc(#loc10)
+// CIR:     cir.store %6, %3 : !s32i, !cir.ptr<!s32i> loc(#loc23)
+// CIR:     cir.scope {
+// CIR:       %9 = cir.load %3 : !cir.ptr<!s32i>, !s32i loc(#loc13)
+// CIR:       %10 = cir.const #cir.int<0> : !s32i loc(#loc14)
+// CIR:       %11 = cir.cmp(gt, %9, %10) : !s32i, !cir.bool loc(#loc25)
+// CIR:       cir.if %11 {
+// CIR:         %12 = cir.const #cir.int<0> : !s32i loc(#loc16)
+// CIR:         cir.store %12, %3 : !s32i, !cir.ptr<!s32i> loc(#loc27)
+// CIR:       } else {
+// CIR:         %12 = cir.const #cir.int<1> : !s32i loc(#loc12)
+// CIR:         cir.store %12, %3 : !s32i, !cir.ptr<!s32i> loc(#loc28)
+// CIR:       } loc(#loc26)
+// CIR:     } loc(#loc24)
+// CIR:     %7 = cir.load %3 : !cir.ptr<!s32i>, !s32i loc(#loc18)
+// CIR:     cir.store %7, %2 : !s32i, !cir.ptr<!s32i> loc(#loc29)
+// CIR:     %8 = cir.load %2 : !cir.ptr<!s32i>, !s32i loc(#loc29)
+// CIR:     cir.return %8 : !s32i loc(#loc29)
+// CIR:   } loc(#loc20)
+// CIR: } loc(#loc)
+// CIR: #loc = loc("{{.*}}sourcelocation.cpp":0:0)
+// CIR: #loc1 = loc("{{.*}}sourcelocation.cpp":6:1)
+// CIR: #loc2 = loc("{{.*}}sourcelocation.cpp":13:1)
+// CIR: #loc7 = loc("{{.*}}sourcelocation.cpp":7:3)
+// CIR: #loc8 = loc("{{.*}}sourcelocation.cpp":7:15)
+// CIR: #loc9 = loc("{{.*}}sourcelocation.cpp":6:22)
+// CIR: #loc10 = loc("{{.*}}sourcelocation.cpp":7:11)
+// CIR: #loc11 = loc("{{.*}}sourcelocation.cpp":8:3)
+// CIR: #loc12 = loc("{{.*}}sourcelocation.cpp":11:9)
+// CIR: #loc13 = loc("{{.*}}sourcelocation.cpp":8:7)
+// CIR: #loc14 = loc("{{.*}}sourcelocation.cpp":8:11)
+// CIR: #loc15 = loc("{{.*}}sourcelocation.cpp":9:5)
+// CIR: #loc16 = loc("{{.*}}sourcelocation.cpp":9:9)
+// CIR: #loc17 = loc("{{.*}}sourcelocation.cpp":11:5)
+// CIR: #loc18 = loc("{{.*}}sourcelocation.cpp":12:10)
+// CIR: #loc19 = loc("{{.*}}sourcelocation.cpp":12:3)
+// CIR: #loc20 = loc(fused[#loc1, #loc2])
+// CIR: #loc23 = loc(fused[#loc7, #loc8])
+// CIR: #loc24 = loc(fused[#loc11, #loc12])
+// CIR: #loc25 = loc(fused[#loc13, #loc14])
+// CIR: #loc26 = loc(fused[#loc15, #loc16, #loc17, #loc12])
+// CIR: #loc27 = loc(fused[#loc15, #loc16])
+// CIR: #loc28 = loc(fused[#loc17, #loc12])
+// CIR: #loc29 = loc(fused[#loc19, #loc18])
+
+
+// LLVM: ModuleID = '{{.*}}sourcelocation.cpp'
+// LLVM: source_filename = "{{.*}}sourcelocation.cpp"
+// LLVM: define dso_local i32 @_Z2s0ii(i32 %0, i32 %1) #[[#]] !dbg ![[#SP:]]
+// LLVM:  %3 = alloca i32, i64 1, align 4, !dbg ![[#LOC1:]]
+
+
+// LLVM: !llvm.module.flags = !{!0}
+// LLVM: !llvm.dbg.cu = !{!1}
+// LLVM: !0 = !{i32 2, !"Debug Info Version", i32 3}
+// LLVM: !1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2, producer: "MLIR", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly)
+// LLVM: !2 = !DIFile(filename: "sourcelocation.cpp", directory: "{{.*}}CodeGen")
+// LLVM: ![[#SP]] = distinct !DISubprogram(name: "_Z2s0ii", linkageName: "_Z2s0ii", scope: !2, file: !2, line: 6, type: !4, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1)
+// LLVM: ![[#LOC1]] = !DILocation(line: 6, scope: ![[#SP]])
diff --git a/clang/test/CIR/CodeGen/special-virtual-func.cpp b/clang/test/CIR/CodeGen/special-virtual-func.cpp
new file mode 100644
index 000000000000..83e2a27b82f0
--- /dev/null
+++ b/clang/test/CIR/CodeGen/special-virtual-func.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Check that pure and deleted virtual functions are correctly emitted in the
+// vtable.
+class A {
+  A();
+  virtual void pure() = 0;
+  virtual void deleted() = delete;
+};
+
+A::A() = default;
+
+// CHECK: @_ZTV1A = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1A> : !cir.ptr<!u8i>, #cir.global_view<@__cxa_pure_virtual> : !cir.ptr<!u8i>, #cir.global_view<@__cxa_deleted_virtual> : !cir.ptr<!u8i>]>
+// CHECK: cir.func private @__cxa_pure_virtual()
+// CHECK: cir.func private @__cxa_deleted_virtual()
diff --git a/clang/test/CIR/CodeGen/spelling-locations.cpp b/clang/test/CIR/CodeGen/spelling-locations.cpp
new file mode 100644
index 000000000000..66c09c88a029
--- /dev/null
+++ b/clang/test/CIR/CodeGen/spelling-locations.cpp
@@ -0,0 +1,100 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+#define multiline_if_macro(c, t) \
+if (c) { \
+  return t; \
+}
+
+int testMacroLocations(void) {
+
+  // Expanded macros will use the location of the expansion site.
+  multiline_if_macro(1, 3);
+  // CHECK: cir.scope {
+  // CHECK:   cir.if %{{.+}} {
+  // CHECK:     cir.return %{{.+}} : !s32i loc(#loc[[#LOC:]])
+  // CHECK:   } loc(#loc[[#LOC]])
+  // CHECK: } loc(#loc[[#LOC]])
+
+  // Regular if statements should use different locations.
+  if (1) {
+    return 3;
+  }
+  //     CHECK: cir.scope {
+  //     CHECK:   cir.if %{{.+}} {
+  //     CHECK:     cir.return %{{.+}} : !s32i loc(#loc[[#LOC:]])
+  // CHECK-NOT:   } loc(#loc[[#LOC]])
+  // CHECK-NOT: } loc(#loc[[#LOC]])
+
+  return 0;
+}
+
+void testIfStmtLocations(int f) {
+  if (f)
+    ;
+  else
+    ;
+
+  if (f)
+    ++f;
+  else
+    ;
+
+  if (f)
+    ;
+  else
+    --f;
+
+  if (f)
+    ++f;
+  else
+    --f;
+}
+
+// CHECK: cir.if %{{.+}} {
+// CHECK: } else {
+// CHECK: } loc(#loc[[#LOC1:]])
+
+// CHECK: cir.if %{{.+}} {
+// CHECK:   %{{.+}} = cir.load
+// CHECK:   %{{.+}} = cir.unary(inc
+// CHECK:   cir.store
+// CHECK: } else {
+// CHECK: } loc(#loc[[#LOC2:]])
+
+// CHECK: cir.if %{{.+}} {
+// CHECK: } else {
+// CHECK:   %{{.+}} = cir.load
+// CHECK:   %{{.+}} = cir.unary(dec
+// CHECK:   cir.store
+// CHECK: } loc(#loc[[#LOC3:]])
+
+// CHECK: cir.if %{{.+}} {
+// CHECK:   %{{.+}} = cir.load
+// CHECK:   %{{.+}} = cir.unary(inc
+// CHECK:   cir.store
+// CHECK: } else {
+// CHECK:   %{{.+}} = cir.load
+// CHECK:   %{{.+}} = cir.unary(dec
+// CHECK:   cir.store
+// CHECK: } loc(#loc[[#LOC4:]])
+
+// CHECK: #loc[[#LOC12:]] = loc({{.+}}:35:5)
+// CHECK: #loc[[#LOC11:]] = loc({{.+}}:33:5)
+
+// CHECK: #loc[[#LOC23:]] = loc({{.+}}:40:5)
+// CHECK: #loc[[#LOC21:]] = loc({{.+}}:38:5)
+// CHECK: #loc[[#LOC22:]] = loc({{.+}}:38:7)
+
+// CHECK: #loc[[#LOC33:]] = loc({{.+}}:45:7)
+// CHECK: #loc[[#LOC31:]] = loc({{.+}}:43:5)
+// CHECK: #loc[[#LOC32:]] = loc({{.+}}:45:5)
+
+// CHECK: #loc[[#LOC44:]] = loc({{.+}}:50:7)
+// CHECK: #loc[[#LOC41:]] = loc({{.+}}:48:5)
+// CHECK: #loc[[#LOC42:]] = loc({{.+}}:48:7)
+// CHECK: #loc[[#LOC43:]] = loc({{.+}}:50:5)
+
+// CHECK: #loc[[#LOC1]] = loc(fused[#loc[[#LOC11]], #loc[[#LOC12]]])
+// CHECK: #loc[[#LOC2]] = loc(fused[#loc[[#LOC21]], #loc[[#LOC22]], #loc[[#LOC23]]])
+// CHECK: #loc[[#LOC3]] = loc(fused[#loc[[#LOC31]], #loc[[#LOC32]], #loc[[#LOC33]]])
+// CHECK: #loc[[#LOC4]] = loc(fused[#loc[[#LOC41]], #loc[[#LOC42]], #loc[[#LOC43]], #loc[[#LOC44]]])
diff --git a/clang/test/CIR/CodeGen/static-vars.c b/clang/test/CIR/CodeGen/static-vars.c
new file mode 100644
index 000000000000..140f4e6052f6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/static-vars.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void func1(void) {
+  // Should lower default-initialized static vars.
+  static int i;
+  // CHECK-DAG: cir.global "private" internal dsolocal @func1.i = #cir.int<0> : !s32i
+
+  // Should lower constant-initialized static vars.
+  static int j = 1;
+  // CHECK-DAG: cir.global "private" internal dsolocal @func1.j = #cir.int<1> : !s32i
+
+  // Should properly shadow static vars in nested scopes.
+  {
+    static int j = 2;
+    // CHECK-DAG: cir.global "private" internal dsolocal @func1.j.1 = #cir.int<2> : !s32i
+  }
+  {
+    static int j = 3;
+    // CHECK-DAG: cir.global "private" internal dsolocal @func1.j.2 = #cir.int<3> : !s32i
+  }
+
+  // Should lower basic static vars arithmetics.
+  j++;
+  // CHECK-DAG: %[[#V2:]] = cir.get_global @func1.j : !cir.ptr<!s32i>
+  // CHECK-DAG: %[[#V3:]] = cir.load %[[#V2]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-DAG: %[[#V4:]] = cir.unary(inc, %[[#V3]]) : !s32i, !s32i
+  // CHECK-DAG: cir.store %[[#V4]], %[[#V2]] : !s32i, !cir.ptr<!s32i>
+}
+
+// Should shadow static vars on different functions.
+void func2(void) {
+  static char i;
+  // CHECK-DAG: cir.global "private" internal dsolocal @func2.i = #cir.int<0> : !s8i
+  static float j;
+  // CHECK-DAG: cir.global "private" internal dsolocal @func2.j = #cir.fp<0.000000e+00> : !cir.float
+}
+
+// Should const initialize static vars with constant addresses.
+void func3(void) {
+  static int var;
+  static int *constAddr = &var;
+  // CHECK-DAG: cir.global "private" internal dsolocal @func3.constAddr = #cir.global_view<@func3.var> : !cir.ptr<!s32i>
+}
+
+// Should match type size in bytes between var and initializer.
+void func4(void) {
+  static char string[] = "Hello";
+  // CHECK-DAG: cir.global "private" internal dsolocal @func4.string = #cir.const_array<"Hello\00" : !cir.array<!s8i x 6>> : !cir.array<!s8i x 6>
+}
diff --git a/clang/test/CIR/CodeGen/static-vars.cpp b/clang/test/CIR/CodeGen/static-vars.cpp
new file mode 100644
index 000000000000..c1c65bea0748
--- /dev/null
+++ b/clang/test/CIR/CodeGen/static-vars.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void func1(void) {
+  // Should lower default-initialized static vars.
+  static int i;
+  // CHECK-DAG: cir.global "private" internal dsolocal @_ZZ5func1vE1i = #cir.int<0> : !s32i
+
+  // Should lower constant-initialized static vars.
+  static int j = 1;
+  // CHECK-DAG: cir.global "private" internal dsolocal @_ZZ5func1vE1j = #cir.int<1> : !s32i
+
+  // Should properly shadow static vars in nested scopes.
+  {
+    static int j = 2;
+    // CHECK-DAG: cir.global "private" internal dsolocal @_ZZ5func1vE1j_0 = #cir.int<2> : !s32i
+  }
+  {
+    static int j = 3;
+    // CHECK-DAG: cir.global "private" internal dsolocal @_ZZ5func1vE1j_1 = #cir.int<3> : !s32i
+  }
+
+  // Should lower basic static vars arithmetics.
+  j++;
+  // CHECK-DAG: %[[#V2:]] = cir.get_global @_ZZ5func1vE1j : !cir.ptr<!s32i>
+  // CHECK-DAG: %[[#V3:]] = cir.load %[[#V2]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-DAG: %[[#V4:]] = cir.unary(inc, %[[#V3]]) : !s32i, !s32i
+  // CHECK-DAG: cir.store %[[#V4]], %[[#V2]] : !s32i, !cir.ptr<!s32i>
+}
+
+// Should shadow static vars on different functions.
+void func2(void) {
+  static char i;
+  // CHECK-DAG: cir.global "private" internal dsolocal @_ZZ5func2vE1i = #cir.int<0> : !s8i
+  static float j;
+  // CHECK-DAG: cir.global "private" internal dsolocal @_ZZ5func2vE1j = #cir.fp<0.000000e+00> : !cir.float
+}
diff --git a/clang/test/CIR/CodeGen/static.cpp b/clang/test/CIR/CodeGen/static.cpp
new file mode 100644
index 000000000000..2ba42118dddb
--- /dev/null
+++ b/clang/test/CIR/CodeGen/static.cpp
@@ -0,0 +1,90 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+// RUN: cir-opt %t.cir -o - | FileCheck %s -check-prefix=AFTER
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+class Init {
+
+public:
+  Init(bool a) ;
+  ~Init();
+private:
+  static bool _S_synced_with_stdio;
+};
+
+
+static Init __ioinit(true);
+static Init __ioinit2(false);
+
+// BEFORE:      module {{.*}} {
+// BEFORE-NEXT:   cir.func private @_ZN4InitC1Eb(!cir.ptr<!ty_Init>, !cir.bool)
+// BEFORE-NEXT:   cir.func private @_ZN4InitD1Ev(!cir.ptr<!ty_Init>)
+// BEFORE-NEXT:   cir.global "private" internal dsolocal @_ZL8__ioinit = ctor : !ty_Init {
+// BEFORE-NEXT:     %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!ty_Init>
+// BEFORE-NEXT:     %1 = cir.const #true
+// BEFORE-NEXT:     cir.call @_ZN4InitC1Eb(%0, %1) : (!cir.ptr<!ty_Init>, !cir.bool) -> ()
+// BEFORE-NEXT:   } dtor {
+// BEFORE-NEXT:      %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!ty_Init>
+// BEFORE-NEXT:      cir.call @_ZN4InitD1Ev(%0) : (!cir.ptr<!ty_Init>) -> ()
+// BEFORE-NEXT:   } {alignment = 1 : i64, ast = #cir.var.decl.ast}
+// BEFORE:        cir.global "private" internal dsolocal @_ZL9__ioinit2 = ctor : !ty_Init {
+// BEFORE-NEXT:     %0 = cir.get_global @_ZL9__ioinit2 : !cir.ptr<!ty_Init>
+// BEFORE-NEXT:     %1 = cir.const #false
+// BEFORE-NEXT:     cir.call @_ZN4InitC1Eb(%0, %1) : (!cir.ptr<!ty_Init>, !cir.bool) -> ()
+// BEFORE-NEXT:   } dtor  {
+// BEFORE-NEXT:     %0 = cir.get_global @_ZL9__ioinit2 : !cir.ptr<!ty_Init>
+// BEFORE-NEXT:     cir.call @_ZN4InitD1Ev(%0) : (!cir.ptr<!ty_Init>) -> ()
+// BEFORE-NEXT:   } {alignment = 1 : i64, ast = #cir.var.decl.ast}
+// BEFORE-NEXT: }
+
+
+// AFTER:      module {{.*}} attributes {{.*}}cir.global_ctors = [#cir.global_ctor<"__cxx_global_var_init", 65536>, #cir.global_ctor<"__cxx_global_var_init.1", 65536>]
+// AFTER-NEXT:   cir.global "private" external @__dso_handle : i8
+// AFTER-NEXT:   cir.func private @__cxa_atexit(!cir.ptr<!cir.func<!void (!cir.ptr<!void>)>>, !cir.ptr<!void>, !cir.ptr<i8>)
+// AFTER-NEXT:   cir.func private @_ZN4InitC1Eb(!cir.ptr<!ty_Init>, !cir.bool)
+// AFTER-NEXT:   cir.func private @_ZN4InitD1Ev(!cir.ptr<!ty_Init>)
+// AFTER-NEXT:   cir.global "private" internal dsolocal @_ZL8__ioinit =  #cir.zero : !ty_Init {alignment = 1 : i64, ast = #cir.var.decl.ast}
+// AFTER-NEXT:   cir.func internal private @__cxx_global_var_init()
+// AFTER-NEXT:     %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!ty_Init>
+// AFTER-NEXT:     %1 = cir.const #true
+// AFTER-NEXT:     cir.call @_ZN4InitC1Eb(%0, %1) : (!cir.ptr<!ty_Init>, !cir.bool) -> ()
+// AFTER-NEXT:     %2 = cir.get_global @_ZL8__ioinit : !cir.ptr<!ty_Init>
+// AFTER-NEXT:     %3 = cir.get_global @_ZN4InitD1Ev : !cir.ptr<!cir.func<!void (!cir.ptr<!ty_Init>)>>
+// AFTER-NEXT:     %4 = cir.cast(bitcast, %3 : !cir.ptr<!cir.func<!void (!cir.ptr<!ty_Init>)>>), !cir.ptr<!cir.func<!void (!cir.ptr<!void>)>>
+// AFTER-NEXT:     %5 = cir.cast(bitcast, %2 : !cir.ptr<!ty_Init>), !cir.ptr<!void>
+// AFTER-NEXT:     %6 = cir.get_global @__dso_handle : !cir.ptr<i8>
+// AFTER-NEXT:     cir.call @__cxa_atexit(%4, %5, %6) : (!cir.ptr<!cir.func<!void (!cir.ptr<!void>)>>, !cir.ptr<!void>, !cir.ptr<i8>) -> ()
+// AFTER-NEXT:     cir.return
+// AFTER:        cir.global "private" internal dsolocal @_ZL9__ioinit2 =  #cir.zero : !ty_Init {alignment = 1 : i64, ast = #cir.var.decl.ast}
+// AFTER-NEXT:   cir.func internal private @__cxx_global_var_init.1()
+// AFTER-NEXT:     %0 = cir.get_global @_ZL9__ioinit2 : !cir.ptr<!ty_Init>
+// AFTER-NEXT:     %1 = cir.const #false
+// AFTER-NEXT:     cir.call @_ZN4InitC1Eb(%0, %1) : (!cir.ptr<!ty_Init>, !cir.bool) -> ()
+// AFTER-NEXT:     %2 = cir.get_global @_ZL9__ioinit2 : !cir.ptr<!ty_Init>
+// AFTER-NEXT:     %3 = cir.get_global @_ZN4InitD1Ev : !cir.ptr<!cir.func<!void (!cir.ptr<!ty_Init>)>>
+// AFTER-NEXT:     %4 = cir.cast(bitcast, %3 : !cir.ptr<!cir.func<!void (!cir.ptr<!ty_Init>)>>), !cir.ptr<!cir.func<!void (!cir.ptr<!void>)>>
+// AFTER-NEXT:     %5 = cir.cast(bitcast, %2 : !cir.ptr<!ty_Init>), !cir.ptr<!void>
+// AFTER-NEXT:     %6 = cir.get_global @__dso_handle : !cir.ptr<i8>
+// AFTER-NEXT:     cir.call @__cxa_atexit(%4, %5, %6) : (!cir.ptr<!cir.func<!void (!cir.ptr<!void>)>>, !cir.ptr<!void>, !cir.ptr<i8>) -> ()
+// AFTER-NEXT:     cir.return
+// AFTER:        cir.func private @_GLOBAL__sub_I_static.cpp()
+// AFTER-NEXT:     cir.call @__cxx_global_var_init() : () -> ()
+// AFTER-NEXT:     cir.call @__cxx_global_var_init.1() : () -> ()
+// AFTER-NEXT:     cir.return
+
+// LLVM:      @__dso_handle = external global i8
+// LLVM:      @_ZL8__ioinit = internal global %class.Init zeroinitializer
+// LLVM:      @_ZL9__ioinit2 = internal global %class.Init zeroinitializer
+// LLVM:      @llvm.global_ctors = appending constant [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65536, ptr @__cxx_global_var_init, ptr null }, { i32, ptr, ptr } { i32 65536, ptr @__cxx_global_var_init.1, ptr null }]
+// LLVM:      define internal void @__cxx_global_var_init()
+// LLVM-NEXT:   call void @_ZN4InitC1Eb(ptr @_ZL8__ioinit, i8 1)
+// LLVM-NEXT:   call void @__cxa_atexit(ptr @_ZN4InitD1Ev, ptr @_ZL8__ioinit, ptr @__dso_handle)
+// LLVM-NEXT:   ret void
+// LLVM:      define internal void @__cxx_global_var_init.1()
+// LLVM-NEXT:   call void @_ZN4InitC1Eb(ptr @_ZL9__ioinit2, i8 0)
+// LLVM-NEXT:   call void @__cxa_atexit(ptr @_ZN4InitD1Ev, ptr @_ZL9__ioinit2, ptr @__dso_handle)
+// LLVM-NEXT:   ret void
+// LLVM:      define void @_GLOBAL__sub_I_static.cpp()
+// LLVM-NEXT:  call void @__cxx_global_var_init()
+// LLVM-NEXT:  call void @__cxx_global_var_init.1()
+// LLVM-NEXT:  ret void
diff --git a/clang/test/CIR/CodeGen/std-array.cpp b/clang/test/CIR/CodeGen/std-array.cpp
new file mode 100644
index 000000000000..a360a0a37d44
--- /dev/null
+++ b/clang/test/CIR/CodeGen/std-array.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -clangir-disable-emit-cxx-default -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "std-cxx.h"
+
+void t() {
+  std::array<unsigned char, 9> v = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+  (void)v.end();
+}
+
+// CHECK: ![[array:.*]] = !cir.struct<struct "std::array<unsigned char, 9U>"
+
+// CHECK: {{.*}} = cir.get_member
+// CHECK: {{.*}} = cir.cast(array_to_ptrdecay
+// CHECK: {{.*}} = cir.const #cir.int<9> : !u32i
+
+// CHECK: cir.call @_ZNSt5arrayIhLj9EE3endEv
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/std-find.cpp b/clang/test/CIR/CodeGen/std-find.cpp
new file mode 100644
index 000000000000..73494ba8b308
--- /dev/null
+++ b/clang/test/CIR/CodeGen/std-find.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -clangir-disable-emit-cxx-default -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "std-cxx.h"
+
+// CHECK: ![[array:.*]] = !cir.struct<struct "std::array<unsigned char, 9U>"
+
+int test_find(unsigned char n = 3)
+{
+    // CHECK: cir.func @_Z9test_findh(%arg0: !u8i
+    unsigned num_found = 0;
+    std::array<unsigned char, 9> v = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    // CHECK: %[[array_addr:.*]] = cir.alloca ![[array]], !cir.ptr<![[array]]>, ["v"]
+
+    auto f = std::find(v.begin(), v.end(), n);
+    // CHECK: {{.*}} cir.call @_ZNSt5arrayIhLj9EE5beginEv(%[[array_addr]])
+    // CHECK: {{.*}} cir.call @_ZNSt5arrayIhLj9EE3endEv(%[[array_addr]])
+    // CHECK: {{.*}} cir.call @_ZSt4findIPhhET_S1_S1_RKT0_(
+
+    if (f != v.end())
+        num_found++;
+    // CHECK: cir.call @_ZNSt5arrayIhLj9EE3endEv(%[[array_addr]]
+    // CHECK: %[[neq_cmp:.*]] = cir.cmp
+    // CHECK: cir.if %[[neq_cmp]]
+
+    return num_found;
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/stmt-expr.c b/clang/test/CIR/CodeGen/stmt-expr.c
new file mode 100644
index 000000000000..0e3daebb9d78
--- /dev/null
+++ b/clang/test/CIR/CodeGen/stmt-expr.c
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Yields void.
+void test1() { ({ }); }
+// CHECK: @test1
+//     CHECK: cir.scope {
+// CHECK-NOT:   cir.yield
+//     CHECK: }
+
+// Yields an out-of-scope scalar.
+void test2() { ({int x = 3; x; }); }
+// CHECK: @test2
+// CHECK: %[[#RETVAL:]] = cir.alloca !s32i, !cir.ptr<!s32i>
+// CHECK: cir.scope {
+// CHECK:   %[[#VAR:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+//          [...]
+// CHECK:   %[[#TMP:]] = cir.load %[[#VAR]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store %[[#TMP]], %[[#RETVAL]] : !s32i, !cir.ptr<!s32i>
+// CHECK: }
+// CHECK: %{{.+}} = cir.load %[[#RETVAL]] : !cir.ptr<!s32i>, !s32i
+
+// Yields an aggregate.
+struct S { int x; };
+int test3() { return ({ struct S s = {1}; s; }).x; }
+// CHECK: @test3
+// CHECK: %[[#RETVAL:]] = cir.alloca !ty_S, !cir.ptr<!ty_S>
+// CHECK: cir.scope {
+// CHECK:   %[[#VAR:]] = cir.alloca !ty_S, !cir.ptr<!ty_S>
+//          [...]
+// CHECK:   cir.copy %[[#VAR]] to %[[#RETVAL]] : !cir.ptr<!ty_S>
+// CHECK: }
+// CHECK: %[[#RETADDR:]] = cir.get_member %1[0] {name = "x"} : !cir.ptr<!ty_S> -> !cir.ptr<!s32i>
+// CHECK: %{{.+}} = cir.load %[[#RETADDR]] : !cir.ptr<!s32i>, !s32i
+
+// Expression is wrapped in an expression attribute (just ensure it does not crash).
+void test4(int x) { ({[[gsl::suppress("foo")]] x;}); }
+// CHECK: @test4
+
+// TODO(cir): Missing label support.
+// // Expression is wrapped in a label.
+// // void test5(int x) { x = ({ label: x; }); }
diff --git a/clang/test/CIR/CodeGen/stmt-expr.cpp b/clang/test/CIR/CodeGen/stmt-expr.cpp
new file mode 100644
index 000000000000..8432df4e15af
--- /dev/null
+++ b/clang/test/CIR/CodeGen/stmt-expr.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+class A {
+public:
+  A(): x(0) {}
+  A(A &a) : x(a.x) {}
+  // TODO(cir): Ensure dtors are properly called. The dtor below crashes.
+  // ~A() {}
+  int x;
+  void Foo() {}
+};
+
+void test1() {
+  ({
+    A a;
+    a;
+  }).Foo();
+}
+// CHECK: @_Z5test1v
+// CHECK: cir.scope {
+// CHECK:   %[[#RETVAL:]] = cir.alloca !ty_A, !cir.ptr<!ty_A>
+// CHECK:   cir.scope {
+// CHECK:     %[[#VAR:]] = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["a", init] {alignment = 4 : i64}
+// CHECK:     cir.call @_ZN1AC1Ev(%[[#VAR]]) : (!cir.ptr<!ty_A>) -> ()
+// CHECK:     cir.call @_ZN1AC1ERS_(%[[#RETVAL]], %[[#VAR]]) : (!cir.ptr<!ty_A>, !cir.ptr<!ty_A>) -> ()
+//            TODO(cir): the local VAR should be destroyed here.
+// CHECK:   }
+// CHECK:   cir.call @_ZN1A3FooEv(%[[#RETVAL]]) : (!cir.ptr<!ty_A>) -> ()
+//          TODO(cir): the temporary RETVAL should be destroyed here.
+// CHECK: }
diff --git a/clang/test/CIR/CodeGen/stmtexpr-init.c b/clang/test/CIR/CodeGen/stmtexpr-init.c
new file mode 100644
index 000000000000..b57562511fc9
--- /dev/null
+++ b/clang/test/CIR/CodeGen/stmtexpr-init.c
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// CIR: ![[annon_struct:.*]] = !cir.struct<struct  {!cir.int<s, 32>, !cir.array<!cir.int<s, 32> x 2>}>
+// CIR: ![[sized_array:.*]] = !cir.struct<struct "sized_array" {!cir.int<s, 32>, !cir.array<!cir.int<s, 32> x 0>}
+
+void escape(const void *);
+
+// CIR-DAG: cir.global "private" internal dsolocal @T1._x = #cir.int<99> : !s8i
+// LLVM-DAG: internal global i8 99
+
+void T1(void) {
+  const char *x[1] = {({static char _x = 99; &_x; })};
+  escape(x);
+}
+
+struct sized_array {
+  int count;
+  int entries[];
+};
+
+#define N_ARGS(...) (sizeof((int[]){__VA_ARGS__}) / sizeof(int))
+
+#define ARRAY_PTR(...) ({                                                    \
+  static const struct sized_array _a = {N_ARGS(__VA_ARGS__), {__VA_ARGS__}}; \
+  &_a;                                                                       \
+})
+
+struct outer {
+  const struct sized_array *a;
+};
+
+void T2(void) {
+  // CIR-DAG: cir.global "private" constant internal @T2._a = #cir.const_struct<{#cir.int<2> : !s32i, #cir.const_array<[#cir.int<50> : !s32i, #cir.int<60> : !s32i]> : !cir.array<!s32i x 2>}>
+  // LLVM-DAG: internal constant { i32, [2 x i32] } { i32 2, [2 x i32] [i32 50, i32 60] }
+  const struct sized_array *A = ARRAY_PTR(50, 60);
+
+  // CIR-DAG: cir.global "private" constant internal @T2._a.1 = #cir.const_struct<{#cir.int<3> : !s32i, #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3>}>
+  // LLVM-DAG: internal constant { i32, [3 x i32] } { i32 3, [3 x i32] [i32 10, i32 20, i32 30] }
+  struct outer X = {ARRAY_PTR(10, 20, 30)};
+
+  // CIR-DAG: %[[T2A:.*]] = cir.get_global @T2._a : !cir.ptr<![[annon_struct]]>
+  // CIR-DAG: cir.cast(bitcast, %[[T2A]] : !cir.ptr<![[annon_struct]]>), !cir.ptr<![[sized_array]]>
+  escape(A);
+  escape(&X);
+}
diff --git a/clang/test/CIR/CodeGen/store.c b/clang/test/CIR/CodeGen/store.c
new file mode 100644
index 000000000000..9a94e6578129
--- /dev/null
+++ b/clang/test/CIR/CodeGen/store.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void foo(void) {
+  int a = 0;
+  a = 1;
+}
+
+//      CHECK: cir.func @foo()
+// CHECK-NEXT:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK-NEXT:   %1 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:   cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   %2 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+typedef int (*fn_t)();
+int get42() { return 42; }
+
+void storeNoArgsFn() {
+  fn_t f = get42;
+}
+
+// CHECK:  cir.func {{.*@storeNoArgsFn}}
+// CHECK:    %0 = cir.alloca
+// CHECK:    %1 = cir.get_global @get42 : !cir.ptr<!cir.func<!s32i ()>>
+// CHECK:    %2 = cir.cast(bitcast, %1 : !cir.ptr<!cir.func<!s32i ()>>), !cir.ptr<!cir.func<!s32i (...)>>
+// CHECK:    cir.store %2, %0 : !cir.ptr<!cir.func<!s32i (...)>>, !cir.ptr<!cir.ptr<!cir.func<!s32i (...)>>>
+
diff --git a/clang/test/CIR/CodeGen/string-literals.c b/clang/test/CIR/CodeGen/string-literals.c
new file mode 100644
index 000000000000..7be9d6819d3e
--- /dev/null
+++ b/clang/test/CIR/CodeGen/string-literals.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct {
+  char x[10];
+  char y[10];
+  char z[10];
+} literals = {"1", "", "\00"};
+
+// CIR-LABEL: @literals
+// CIR:  #cir.const_struct<{
+// CIR:     #cir.const_array<"1" : !cir.array<!s8i x 1>, trailing_zeros> : !cir.array<!s8i x 10>,
+// CIR:     #cir.zero : !cir.array<!s8i x 10>,
+// CIR:     #cir.zero : !cir.array<!s8i x 10>
+// CIR:  }> 
+
+// LLVM-LABEL: @literals
+// LLVM:  global %struct.anon.0 {
+// LLVM:    [10 x i8] c"1\00\00\00\00\00\00\00\00\00",
+// LLVM:    [10 x i8] zeroinitializer,
+// LLVM:    [10 x i8] zeroinitializer
+// LLVM:  }
diff --git a/clang/test/CIR/CodeGen/struct-comma.c b/clang/test/CIR/CodeGen/struct-comma.c
new file mode 100644
index 000000000000..ffd0544fda43
--- /dev/null
+++ b/clang/test/CIR/CodeGen/struct-comma.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct AA {int a, b;} x;
+extern int r(void);
+void a(struct AA* b) {*b = (r(), x);}
+
+// CHECK-LABEL: @a
+// CHECK: %[[ADDR:.*]] = cir.alloca {{.*}} ["b"
+// CHECK: cir.store {{.*}}, %[[ADDR]]
+// CHECK: %[[LOAD:.*]] = cir.load deref %[[ADDR]]
+// CHECK: cir.call @r
+// CHECK: %[[GADDR:.*]] = cir.get_global @x
+// CHECK: cir.copy %[[GADDR]] to %[[LOAD]]
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/struct-empty.c b/clang/test/CIR/CodeGen/struct-empty.c
new file mode 100644
index 000000000000..b2fe72eb1f6f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/struct-empty.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// CIR: ![[lock:.*]] = !cir.struct<struct "rwlock_t" {}>
+// CIR: ![[fs_struct:.*]] = !cir.struct<struct "fs_struct" {!cir.struct<struct "rwlock_t" {}>, !cir.int<s, 32>}
+
+typedef struct { } rwlock_t;
+struct fs_struct { rwlock_t lock; int umask; };
+void __copy_fs_struct(struct fs_struct *fs) { fs->lock = (rwlock_t) { }; }
+
+// CIR-LABEL: __copy_fs_struct
+// CIR:   %[[VAL_1:.*]] = cir.alloca !cir.ptr<![[fs_struct]]>, !cir.ptr<!cir.ptr<![[fs_struct]]>>, ["fs", init] {alignment = 8 : i64}
+// CIR:   %[[VAL_2:.*]] = cir.alloca ![[lock]], !cir.ptr<![[lock]]>, [".compoundliteral"] {alignment = 1 : i64}
+// CIR:   cir.store {{.*}}, %[[VAL_1]] : !cir.ptr<![[fs_struct]]>, !cir.ptr<!cir.ptr<![[fs_struct]]>>
+// CIR:   %[[VAL_3:.*]] = cir.load %[[VAL_1]] : !cir.ptr<!cir.ptr<![[fs_struct]]>>, !cir.ptr<![[fs_struct]]>
+// CIR:   %[[VAL_4:.*]] = cir.get_member %[[VAL_3]][0] {name = "lock"} : !cir.ptr<![[fs_struct]]> -> !cir.ptr<![[lock]]>
+// CIR:   cir.copy %[[VAL_2]] to %[[VAL_4]] : !cir.ptr<![[lock]]>
+
+// LLVM-LABEL: __copy_fs_struct
+// LLVM:  %[[VAL_5:.*]] = getelementptr {{.*}}, {{.*}}, i32 0, i32 0
+// LLVM:  call void @llvm.memcpy.p0.p0.i32(ptr %[[VAL_5]], ptr {{.*}}, i32 0, i1 false)
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/struct.c b/clang/test/CIR/CodeGen/struct.c
new file mode 100644
index 000000000000..3b39dced3574
--- /dev/null
+++ b/clang/test/CIR/CodeGen/struct.c
@@ -0,0 +1,115 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct Bar {
+  int a;
+  char b;
+} bar;
+
+struct Foo {
+  int a;
+  char b;
+  struct Bar z;
+};
+
+// Recursive type
+typedef struct Node {
+  struct Node* next;
+} NodeStru;
+
+void baz(void) {
+  struct Bar b;
+  struct Foo f;
+}
+
+// CHECK-DAG: !ty_Node = !cir.struct<struct "Node" {!cir.ptr<!cir.struct<struct "Node">>} #cir.record.decl.ast>
+// CHECK-DAG: !ty_Bar = !cir.struct<struct "Bar" {!cir.int<s, 32>, !cir.int<s, 8>}>
+// CHECK-DAG: !ty_Foo = !cir.struct<struct "Foo" {!cir.int<s, 32>, !cir.int<s, 8>, !cir.struct<struct "Bar" {!cir.int<s, 32>, !cir.int<s, 8>}>}>
+// CHECK-DAG: !ty_SLocal = !cir.struct<struct "SLocal" {!cir.int<s, 32>}>
+// CHECK-DAG: !ty_SLocal2E0_ = !cir.struct<struct "SLocal.0" {!cir.float}>
+//  CHECK-DAG: module {{.*}} {
+     // CHECK:   cir.func @baz()
+// CHECK-NEXT:     %0 = cir.alloca !ty_Bar, !cir.ptr<!ty_Bar>, ["b"] {alignment = 4 : i64}
+// CHECK-NEXT:     %1 = cir.alloca !ty_Foo, !cir.ptr<!ty_Foo>, ["f"] {alignment = 4 : i64}
+// CHECK-NEXT:     cir.return
+// CHECK-NEXT:   }
+
+void shouldConstInitStructs(void) {
+// CHECK: cir.func @shouldConstInitStructs
+  struct Foo f = {1, 2, {3, 4}};
+  // CHECK: %[[#V0:]] = cir.alloca !ty_Foo, !cir.ptr<!ty_Foo>, ["f"] {alignment = 4 : i64}
+  // CHECK: %[[#V1:]] = cir.const #cir.const_struct<{#cir.int<1> : !s32i, #cir.int<2> : !s8i, #cir.const_struct<{#cir.int<3> : !s32i, #cir.int<4> : !s8i}> : !ty_Bar}> : !ty_Foo
+  // CHECK: cir.store %[[#V1]], %[[#V0]] : !ty_Foo, !cir.ptr<!ty_Foo>
+}
+
+// Should zero-initialize uninitialized global structs.
+struct S {
+  int a,b;
+} s;
+// CHECK-DAG: cir.global external @s = #cir.zero : !ty_S
+
+// Should initialize basic global structs.
+struct S1 {
+  int a;
+  float f;
+  int *p;
+} s1 = {1, .1, 0};
+// CHECK-DAG: cir.global external @s1 = #cir.const_struct<{#cir.int<1> : !s32i, #cir.fp<1.000000e-01> : !cir.float, #cir.ptr<null> : !cir.ptr<!s32i>}> : !ty_S1_
+
+// Should initialize global nested structs.
+struct S2 {
+  struct S2A {
+    int a;
+  } s2a;
+} s2 = {{1}};
+// CHECK-DAG: cir.global external @s2 = #cir.const_struct<{#cir.const_struct<{#cir.int<1> : !s32i}> : !ty_S2A}> : !ty_S2_
+
+// Should initialize global arrays of structs.
+struct S3 {
+  int a;
+} s3[3] = {{1}, {2}, {3}};
+// CHECK-DAG: cir.global external @s3 = #cir.const_array<[#cir.const_struct<{#cir.int<1> : !s32i}> : !ty_S3_, #cir.const_struct<{#cir.int<2> : !s32i}> : !ty_S3_, #cir.const_struct<{#cir.int<3> : !s32i}> : !ty_S3_]> : !cir.array<!ty_S3_ x 3>
+
+void shouldCopyStructAsCallArg(struct S1 s) {
+// CHECK-DAG: cir.func @shouldCopyStructAsCallArg
+  shouldCopyStructAsCallArg(s);
+  // CHECK-DAG: %[[#LV:]] = cir.load %{{.+}} : !cir.ptr<!ty_S1_>, !ty_S1_
+  // CHECK-DAG: cir.call @shouldCopyStructAsCallArg(%[[#LV]]) : (!ty_S1_) -> ()
+}
+
+struct Bar shouldGenerateAndAccessStructArrays(void) {
+  struct Bar s[1] = {{3, 4}};
+  return s[0];
+}
+// CHECK-DAG: cir.func @shouldGenerateAndAccessStructArrays
+// CHECK-DAG: %[[#STRIDE:]] = cir.const #cir.int<0> : !s32i
+// CHECK-DAG: %[[#DARR:]] = cir.cast(array_to_ptrdecay, %{{.+}} : !cir.ptr<!cir.array<!ty_Bar x 1>>), !cir.ptr<!ty_Bar>
+// CHECK-DAG: %[[#ELT:]] = cir.ptr_stride(%[[#DARR]] : !cir.ptr<!ty_Bar>, %[[#STRIDE]] : !s32i), !cir.ptr<!ty_Bar>
+// CHECK-DAG: cir.copy %[[#ELT]] to %{{.+}} : !cir.ptr<!ty_Bar>
+
+// CHECK-DAG: cir.func @local_decl
+// CHECK-DAG: {{%.}} = cir.alloca !ty_Local, !cir.ptr<!ty_Local>, ["a"]
+void local_decl(void) {
+  struct Local {
+    int i;
+  };
+  struct Local a;
+}
+
+// CHECK-DAG: cir.func @useRecursiveType
+// CHECK-DAG: cir.get_member {{%.}}[0] {name = "next"} : !cir.ptr<!ty_Node> -> !cir.ptr<!cir.ptr<!ty_Node>>
+void useRecursiveType(NodeStru* a) {
+  a->next = 0;
+}
+
+// CHECK-DAG: cir.alloca !ty_SLocal, !cir.ptr<!ty_SLocal>, ["loc", init] {alignment = 4 : i64}
+// CHECK-DAG: cir.scope {
+// CHECK-DAG:   cir.alloca !ty_SLocal2E0_, !cir.ptr<!ty_SLocal2E0_>, ["loc", init] {alignment = 4 : i64}
+void local_structs(int a, float b) {
+  struct SLocal { int x; };
+  struct SLocal loc = {a};
+  {
+    struct SLocal { float y; };
+    struct SLocal loc = {b};
+  }   
+}
diff --git a/clang/test/CIR/CodeGen/struct.cpp b/clang/test/CIR/CodeGen/struct.cpp
new file mode 100644
index 000000000000..5b83a573e3b4
--- /dev/null
+++ b/clang/test/CIR/CodeGen/struct.cpp
@@ -0,0 +1,167 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct Bar {
+  int a;
+  char b;
+  void method() {}
+  void method2(int a) {}
+  int method3(int a) { return a; }
+};
+
+struct Foo {
+  int a;
+  char b;
+  Bar z;
+};
+
+void baz() {
+  Bar b;
+  b.method();
+  b.method2(4);
+  int result = b.method3(4);
+  Foo f;
+}
+
+struct incomplete;
+void yoyo(incomplete *i) {}
+
+//  CHECK-DAG: !ty_incomplete = !cir.struct<struct "incomplete" incomplete
+//  CHECK-DAG: !ty_Bar = !cir.struct<struct "Bar" {!cir.int<s, 32>, !cir.int<s, 8>}>
+
+//  CHECK-DAG: !ty_Foo = !cir.struct<struct "Foo" {!cir.int<s, 32>, !cir.int<s, 8>, !cir.struct<struct "Bar" {!cir.int<s, 32>, !cir.int<s, 8>}>}>
+//  CHECK-DAG: !ty_Mandalore = !cir.struct<struct "Mandalore" {!cir.int<u, 32>, !cir.ptr<!cir.void>, !cir.int<s, 32>} #cir.record.decl.ast>
+//  CHECK-DAG: !ty_Adv = !cir.struct<class "Adv" {!cir.struct<struct "Mandalore" {!cir.int<u, 32>, !cir.ptr<!cir.void>, !cir.int<s, 32>} #cir.record.decl.ast>}>
+//  CHECK-DAG: !ty_Entry = !cir.struct<struct "Entry" {!cir.ptr<!cir.func<!cir.int<u, 32> (!cir.int<s, 32>, !cir.ptr<!cir.int<s, 8>>, !cir.ptr<!cir.void>)>>}>
+
+//      CHECK: cir.func linkonce_odr @_ZN3Bar6methodEv(%arg0: !cir.ptr<!ty_Bar>
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!ty_Bar>, !cir.ptr<!cir.ptr<!ty_Bar>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:   cir.store %arg0, %0 : !cir.ptr<!ty_Bar>, !cir.ptr<!cir.ptr<!ty_Bar>>
+// CHECK-NEXT:   %1 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_Bar>>, !cir.ptr<!ty_Bar>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+//      CHECK: cir.func linkonce_odr @_ZN3Bar7method2Ei(%arg0: !cir.ptr<!ty_Bar> {{.*}}, %arg1: !s32i
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!ty_Bar>, !cir.ptr<!cir.ptr<!ty_Bar>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK-NEXT:   cir.store %arg0, %0 : !cir.ptr<!ty_Bar>, !cir.ptr<!cir.ptr<!ty_Bar>>
+// CHECK-NEXT:   cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   %2 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_Bar>>, !cir.ptr<!ty_Bar>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+//      CHECK: cir.func linkonce_odr @_ZN3Bar7method3Ei(%arg0: !cir.ptr<!ty_Bar> {{.*}}, %arg1: !s32i
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!ty_Bar>, !cir.ptr<!cir.ptr<!ty_Bar>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:   cir.store %arg0, %0 : !cir.ptr<!ty_Bar>, !cir.ptr<!cir.ptr<!ty_Bar>>
+// CHECK-NEXT:   cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   %3 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_Bar>>, !cir.ptr<!ty_Bar>
+// CHECK-NEXT:   %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   cir.store %4, %2 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   cir.return %5
+// CHECK-NEXT: }
+
+//      CHECK: cir.func @_Z3bazv()
+// CHECK-NEXT:   %0 = cir.alloca !ty_Bar, !cir.ptr<!ty_Bar>, ["b"] {alignment = 4 : i64}
+// CHECK-NEXT:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["result", init] {alignment = 4 : i64}
+// CHECK-NEXT:   %2 = cir.alloca !ty_Foo, !cir.ptr<!ty_Foo>, ["f"] {alignment = 4 : i64}
+// CHECK-NEXT:   cir.call @_ZN3Bar6methodEv(%0) : (!cir.ptr<!ty_Bar>) -> ()
+// CHECK-NEXT:   %3 = cir.const #cir.int<4> : !s32i
+// CHECK-NEXT:   cir.call @_ZN3Bar7method2Ei(%0, %3) : (!cir.ptr<!ty_Bar>, !s32i) -> ()
+// CHECK-NEXT:   %4 = cir.const #cir.int<4> : !s32i
+// CHECK-NEXT:   %5 = cir.call @_ZN3Bar7method3Ei(%0, %4) : (!cir.ptr<!ty_Bar>, !s32i) -> !s32i
+// CHECK-NEXT:   cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+typedef enum Ways {
+  ThisIsTheWay = 1000024001,
+} Ways;
+
+typedef struct Mandalore {
+    Ways             w;
+    const void*      n;
+    int              d;
+} Mandalore;
+
+class Adv {
+  Mandalore x{ThisIsTheWay};
+public:
+  Adv() {}
+};
+
+void m() { Adv C; }
+
+// CHECK: cir.func linkonce_odr @_ZN3AdvC2Ev(%arg0: !cir.ptr<!ty_Adv>
+// CHECK:     %0 = cir.alloca !cir.ptr<!ty_Adv>, !cir.ptr<!cir.ptr<!ty_Adv>>, ["this", init] {alignment = 8 : i64}
+// CHECK:     cir.store %arg0, %0 : !cir.ptr<!ty_Adv>, !cir.ptr<!cir.ptr<!ty_Adv>>
+// CHECK:     %1 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_Adv>>, !cir.ptr<!ty_Adv>
+// CHECK:     %2 = cir.get_member %1[0] {name = "x"} : !cir.ptr<!ty_Adv> -> !cir.ptr<!ty_Mandalore>
+// CHECK:     %3 = cir.get_member %2[0] {name = "w"} : !cir.ptr<!ty_Mandalore> -> !cir.ptr<!u32i>
+// CHECK:     %4 = cir.const #cir.int<1000024001> : !u32i
+// CHECK:     cir.store %4, %3 : !u32i, !cir.ptr<!u32i>
+// CHECK:     %5 = cir.get_member %2[1] {name = "n"} : !cir.ptr<!ty_Mandalore> -> !cir.ptr<!cir.ptr<!void>>
+// CHECK:     %6 = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CHECK:     cir.store %6, %5 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CHECK:     %7 = cir.get_member %2[2] {name = "d"} : !cir.ptr<!ty_Mandalore> -> !cir.ptr<!s32i>
+// CHECK:     %8 = cir.const #cir.int<0> : !s32i
+// CHECK:     cir.store %8, %7 : !s32i, !cir.ptr<!s32i>
+// CHECK:     cir.return
+// CHECK:   }
+
+struct A {
+  int a;
+};
+
+// Should globally const-initialize struct members.
+struct A simpleConstInit = {1};
+// CHECK: cir.global external @simpleConstInit = #cir.const_struct<{#cir.int<1> : !s32i}> : !ty_A
+
+// Should globally const-initialize arrays with struct members.
+struct A arrConstInit[1] = {{1}};
+// CHECK: cir.global external @arrConstInit = #cir.const_array<[#cir.const_struct<{#cir.int<1> : !s32i}> : !ty_A]> : !cir.array<!ty_A x 1>
+
+// Should locally copy struct members.
+void shouldLocallyCopyStructAssignments(void) {
+  struct A a = { 3 };
+  // CHECK: %[[#SA:]] = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["a"] {alignment = 4 : i64}
+  struct A b = a;
+  // CHECK: %[[#SB:]] = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["b", init] {alignment = 4 : i64}
+  // cir.copy %[[#SA]] to %[[SB]] : !cir.ptr<!ty_A>
+}
+
+A get_default() { return A{2}; }
+
+struct S {
+  S(A a = get_default());
+};
+
+void h() { S s; }
+
+// CHECK: cir.func @_Z1hv()
+// CHECK:   %0 = cir.alloca !ty_S, !cir.ptr<!ty_S>, ["s", init] {alignment = 1 : i64}
+// CHECK:   %1 = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["agg.tmp0"] {alignment = 4 : i64}
+// CHECK:   %2 = cir.call @_Z11get_defaultv() : () -> !ty_A
+// CHECK:   cir.store %2, %1 : !ty_A, !cir.ptr<!ty_A>
+// CHECK:   %3 = cir.load %1 : !cir.ptr<!ty_A>, !ty_A
+// CHECK:   cir.call @_ZN1SC1E1A(%0, %3) : (!cir.ptr<!ty_S>, !ty_A) -> ()
+// CHECK:   cir.return
+// CHECK: }
+
+typedef enum enumy {
+  A = 1
+} enumy;
+
+typedef enumy (*fnPtr)(int instance, const char* name, void* function);
+
+struct Entry {
+  fnPtr procAddr = nullptr;
+};
+
+void ppp() { Entry x; }
+
+// CHECK: cir.func linkonce_odr @_ZN5EntryC2Ev(%arg0: !cir.ptr<!ty_Entry>
+
+// CHECK: cir.get_member %1[0] {name = "procAddr"} : !cir.ptr<!ty_Entry> -> !cir.ptr<!cir.ptr<!cir.func<!u32i (!s32i, !cir.ptr<!s8i>, !cir.ptr<!void>)>>>
diff --git a/clang/test/CIR/CodeGen/structural-binding.cpp b/clang/test/CIR/CodeGen/structural-binding.cpp
new file mode 100644
index 000000000000..d70a9509c6f9
--- /dev/null
+++ b/clang/test/CIR/CodeGen/structural-binding.cpp
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+struct B { B(); };
+
+struct A {
+  B a;
+  int b;
+  char c;
+};
+
+struct C {
+  C(int a, int b): a(a), b(b) {}
+  template <unsigned>
+  friend const int &get(const C&);
+ private:
+  int a;
+  int b;
+};
+
+template <>
+const int &get<0>(const C& c) { return c.a; }
+template <>
+const int &get<1>(const C& c) { return c.b; }
+
+namespace std {
+
+template <typename>
+struct tuple_size;
+
+template <>
+struct tuple_size<C> { constexpr inline static unsigned value = 2; };
+
+template <unsigned, typename>
+struct tuple_element;
+
+template <unsigned I>
+struct tuple_element<I, C> { using type = const int; };
+
+}
+
+
+// binding to data members
+void f(A &a) {
+  // CIR: @_Z1fR1A
+  // LLVM: @_Z1fR1A
+
+  auto &[x, y, z] = a;
+  (x, y, z);
+  // CIR: %[[a:.*]] = cir.load %1 : !cir.ptr<!cir.ptr<!ty_A>>, !cir.ptr<!ty_A>
+  // CIR: {{.*}} = cir.get_member %[[a]][0] {name = "a"} : !cir.ptr<!ty_A> -> !cir.ptr<!ty_B>
+  // CIR: %[[a:.*]] = cir.load %1 : !cir.ptr<!cir.ptr<!ty_A>>, !cir.ptr<!ty_A>
+  // CIR: {{.*}} = cir.get_member %[[a]][2] {name = "b"} : !cir.ptr<!ty_A> -> !cir.ptr<!s32i>
+  // CIR: %[[a:.*]] = cir.load %1 : !cir.ptr<!cir.ptr<!ty_A>>, !cir.ptr<!ty_A>
+  // CIR: {{.*}} = cir.get_member %[[a]][3] {name = "c"} : !cir.ptr<!ty_A> -> !cir.ptr<!s8i>
+  // LLVM: {{.*}} = getelementptr %struct.A, ptr {{.*}}, i32 0, i32 0
+  // LLVM: {{.*}} = getelementptr %struct.A, ptr {{.*}}, i32 0, i32 2
+  // LLVM: {{.*}} = getelementptr %struct.A, ptr {{.*}}, i32 0, i32 3
+
+  auto [x2, y2, z2] = a;
+  (x2, y2, z2);
+  // CIR: cir.call @_ZN1AC1ERKS_(%2, {{.*}}) : (!cir.ptr<!ty_A>, !cir.ptr<!ty_A>) -> ()
+  // CIR: {{.*}} = cir.get_member %2[0] {name = "a"} : !cir.ptr<!ty_A> -> !cir.ptr<!ty_B>
+  // CIR: {{.*}} = cir.get_member %2[2] {name = "b"} : !cir.ptr<!ty_A> -> !cir.ptr<!s32i>
+  // CIR: {{.*}} = cir.get_member %2[3] {name = "c"} : !cir.ptr<!ty_A> -> !cir.ptr<!s8i>
+
+  // for the rest, just expect the codegen does't crash
+  auto &&[x3, y3, z3] = a;
+  (x3, y3, z3);
+
+  const auto &[x4, y4, z4] = a;
+  (x4, y4, z4);
+
+  const auto [x5, y5, z5] = a;
+  (x5, y5, z5);
+}
+
+// binding to a tuple-like type
+void g(C &c) {
+  // CIR: @_Z1gR1C
+  // LLVM: @_Z1gR1C
+
+  auto [x8, y8] = c;
+  (x8, y8);
+  // CIR: cir.call @_ZN1CC1ERKS_(%[[c:.*]], %7) : (!cir.ptr<!ty_C>, !cir.ptr<!ty_C>) -> ()
+  // CIR: %[[x8:.*]] = cir.call @_Z3getILj0EERKiRK1C(%[[c]]) : (!cir.ptr<!ty_C>) -> !cir.ptr<!s32i>
+  // CIR: cir.store %[[x8]], %[[x8p:.*]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  // CIR: %[[x9:.*]] = cir.call @_Z3getILj1EERKiRK1C(%[[c]]) : (!cir.ptr<!ty_C>) -> !cir.ptr<!s32i>
+  // CIR: cir.store %[[x9]], %[[x9p:.*]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  // CIR: {{.*}} = cir.load %[[x8p]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CIR: {{.*}} = cir.load %[[x9p]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // LLVM: call void @_ZN1CC1ERKS_(ptr {{.*}}, ptr {{.*}})
+  // LLVM: {{.*}} = call ptr @_Z3getILj0EERKiRK1C(ptr {{.*}})
+  // LLVM: {{.*}} = call ptr @_Z3getILj1EERKiRK1C(ptr {{.*}})
+
+  auto &[x9, y9] = c;
+  (x9, y9);
+  // CIR: cir.store %12, %[[cp:.*]] : !cir.ptr<!ty_C>, !cir.ptr<!cir.ptr<!ty_C>>
+  // CIR: %[[c:.*]] = cir.load %[[cp]] : !cir.ptr<!cir.ptr<!ty_C>>, !cir.ptr<!ty_C>
+  // CIR: %[[x8:.*]] = cir.call @_Z3getILj0EERKiRK1C(%[[c]]) : (!cir.ptr<!ty_C>) -> !cir.ptr<!s32i>
+  // CIR: cir.store %[[x8]], %[[x8p:.*]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  // CIR: %[[c:.*]] = cir.load %[[cp]] : !cir.ptr<!cir.ptr<!ty_C>>, !cir.ptr<!ty_C>
+  // CIR: %[[x9:.*]] = cir.call @_Z3getILj1EERKiRK1C(%[[c]]) : (!cir.ptr<!ty_C>) -> !cir.ptr<!s32i>
+  // CIR: cir.store %[[x9]], %[[x9p:.*]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  // CIR: {{.*}} = cir.load %[[x8p]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CIR: {{.*}} = cir.load %[[x9p]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> 
+}
+
+// TODO: add test case for binding to an array type
+// after ArrayInitLoopExpr is supported
diff --git a/clang/test/CIR/CodeGen/switch-gnurange.cpp b/clang/test/CIR/CodeGen/switch-gnurange.cpp
new file mode 100644
index 000000000000..f48a32506252
--- /dev/null
+++ b/clang/test/CIR/CodeGen/switch-gnurange.cpp
@@ -0,0 +1,352 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+enum letter {
+ A, B, C, D, E, F, G, H, I, J, L
+};
+
+int sw1(enum letter c) {
+  switch (c) { 
+    case A ... C:
+    case D:
+    case E ... F:
+    case G ... L:
+      return 1;
+    default: 
+      return 0;
+  }
+}
+
+//      CIR:  cir.func @_Z3sw16letter
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      case (range, [0, 2] : !s32i) {
+// CIR-NEXT:        cir.yield
+// CIR-NEXT:      },
+// CIR-NEXT:      case (range, [4, 5] : !s32i) {
+// CIR-NEXT:        cir.yield
+// CIR-NEXT:      },
+// CIR-NEXT:      case (range, [6, 10] : !s32i) {
+// CIR-NEXT:        cir.yield
+// CIR-NEXT:      },
+// CIR-NEXT:      case (equal, 3) {
+// CIR-NEXT:        cir.int<1>
+//      CIR:        cir.return
+// CIR-NEXT:      },
+// CIR-NEXT:      case (default) {
+// CIR-NEXT:        cir.int<0>
+//      CIR:        cir.return
+// CIR-NEXT:      }
+// CIR-NEXT:      ]
+// CIR-NEXT:    }
+
+//      LLVM:  @_Z3sw16letter
+//      LLVM:    switch i32 %[[C:[0-9]+]], label %[[DEFAULT:[0-9]+]] [
+// LLVM-NEXT:      i32 3, label %[[CASE_3:[0-9]+]]
+// LLVM-NEXT:      i32 0, label %[[CASE_0_2:[0-9]+]]
+// LLVM-NEXT:      i32 1, label %[[CASE_0_2]]
+// LLVM-NEXT:      i32 2, label %[[CASE_0_2]]
+// LLVM-NEXT:      i32 4, label %[[CASE_4_5:[0-9]+]]
+// LLVM-NEXT:      i32 5, label %[[CASE_4_5]]
+// LLVM-NEXT:      i32 6, label %[[CASE_6_10:[0-9]+]]
+// LLVM-NEXT:      i32 7, label %[[CASE_6_10]]
+// LLVM-NEXT:      i32 8, label %[[CASE_6_10]]
+// LLVM-NEXT:      i32 9, label %[[CASE_6_10]]
+// LLVM-NEXT:      i32 10, label %[[CASE_6_10]]
+// LLVM-NEXT:    ]
+//      LLVM:  [[CASE_0_2]]:
+//      LLVM:    br label %[[CASE_4_5]]
+//      LLVM:  [[CASE_4_5]]:
+//      LLVM:    br label %[[CASE_6_10]]
+//      LLVM:  [[CASE_6_10]]:
+//      LLVM:    br label %[[CASE_3]]
+//      LLVM:  [[CASE_3]]:
+//      LLVM:    store i32 1
+//      LLVM:    ret
+//      LLVM:  [[DEFAULT]]:
+//      LLVM:    store i32 0
+//      LLVM:    ret
+
+
+int sw2(enum letter c) {
+  switch (c) { 
+    case A ... C:
+    case L ... A:
+      return 1;
+    default: 
+      return 0;
+  }
+}
+
+//      CIR:  cir.func @_Z3sw26letter
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      case (range, [0, 2] : !s32i) {
+//      CIR:        cir.return
+// CIR-NEXT:      },
+// CIR-NEXT:      case (default) {
+//      CIR:        cir.return
+// CIR-NEXT:      }
+// CIR-NEXT:      ]
+// CIR-NEXT:    }
+
+//      LLVM:  @_Z3sw26letter
+//      LLVM:    switch i32 %[[C:[0-9]+]], label %[[DEFAULT:[0-9]+]] [
+// LLVM-NEXT:      i32 0, label %[[CASE:[0-9]+]]
+// LLVM-NEXT:      i32 1, label %[[CASE]]
+// LLVM-NEXT:      i32 2, label %[[CASE]]
+// LLVM-NEXT:    ]
+//      LLVM:  [[CASE]]:
+//      LLVM:    store i32 1
+//      LLVM:    ret
+//      LLVM:  [[DEFAULT]]:
+//      LLVM:    store i32 0
+//      LLVM:    ret
+
+void sw3(enum letter c) {
+  int x = 0;
+  switch (c) { 
+  case A ... C:
+    x = 1;
+    break;
+  case D ... F:
+    x = 2;
+    break;
+  case G ... I:
+    x = 3;
+    break;
+  case J ... L:
+    x = 4;
+    break;
+  }
+}
+
+//      CIR:  cir.func @_Z3sw36letter
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      case (range, [0, 2] : !s32i) {
+// CIR-NEXT:        cir.int<1>
+//      CIR:        cir.break
+// CIR-NEXT:      },
+// CIR-NEXT:      case (range, [3, 5] : !s32i) {
+// CIR-NEXT:        cir.int<2>
+//      CIR:        cir.break
+// CIR-NEXT:      },
+// CIR-NEXT:      case (range, [6, 8] : !s32i) {
+// CIR-NEXT:        cir.int<3>
+//      CIR:        cir.break
+// CIR-NEXT:      },
+// CIR-NEXT:      case (range, [9, 10] : !s32i) {
+// CIR-NEXT:        cir.int<4>
+//      CIR:        cir.break
+// CIR-NEXT:      }
+// CIR-NEXT:      ]
+// CIR-NEXT:    }
+
+//      LLVM:  @_Z3sw36letter
+//      LLVM:    switch i32 %[[C:[0-9]+]], label %[[DEFAULT:[0-9]+]] [
+// LLVM-NEXT:      i32 0, label %[[CASE_AC:[0-9]+]]
+// LLVM-NEXT:      i32 1, label %[[CASE_AC]]
+// LLVM-NEXT:      i32 2, label %[[CASE_AC]]
+// LLVM-NEXT:      i32 3, label %[[CASE_DF:[0-9]+]]
+// LLVM-NEXT:      i32 4, label %[[CASE_DF]]
+// LLVM-NEXT:      i32 5, label %[[CASE_DF]]
+// LLVM-NEXT:      i32 6, label %[[CASE_GI:[0-9]+]]
+// LLVM-NEXT:      i32 7, label %[[CASE_GI]]
+// LLVM-NEXT:      i32 8, label %[[CASE_GI]]
+// LLVM-NEXT:      i32 9, label %[[CASE_JL:[0-9]+]]
+// LLVM-NEXT:      i32 10, label %[[CASE_JL]]
+// LLVM-NEXT:    ]
+//      LLVM:  [[CASE_AC]]:
+//      LLVM:    store i32 1, ptr %[[X:[0-9]+]]
+//      LLVM:    br label %[[EPILOG:[0-9]+]]
+//      LLVM:  [[CASE_DF]]:
+//      LLVM:    store i32 2, ptr %[[X]]
+//      LLVM:    br label %[[EPILOG]]
+//      LLVM:  [[CASE_GI]]:
+//      LLVM:    store i32 3, ptr %[[X]]
+//      LLVM:    br label %[[EPILOG]]
+//      LLVM:  [[CASE_JL]]:
+//      LLVM:    store i32 4, ptr %[[X]]
+//      LLVM:    br label %[[EPILOG]]
+//      LLVM:  [[EPILOG]]:
+// LLVM-NEXT:    br label %[[EPILOG_END:[0-9]+]]
+//      LLVM:  [[EPILOG_END]]:
+// LLVM-NEXT:    ret void
+
+void sw4(int x) {
+  switch (x) {
+    case 66 ... 233:
+      break;
+    case -50 ... 50:
+      break;
+  }
+}
+
+//      CIR:  cir.func @_Z3sw4i
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      case (range, [66, 233] : !s32i) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      },
+// CIR-NEXT:      case (range, [-50, 50] : !s32i) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      }
+// CIR-NEXT:      ]
+// CIR-NEXT:    }
+
+//      LLVM:  @_Z3sw4i
+//      LLVM:    switch i32 %[[X:[0-9]+]], label %[[JUDGE_NEG50_50:[0-9]+]] [
+// LLVM-NEXT:    ]
+//      LLVM:  [[CASE_66_233:[0-9]+]]:
+// LLVM-NEXT:    br label %[[EPILOG:[0-9]+]]
+//      LLVM:  [[CASE_NEG50_50:[0-9]+]]:
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[JUDGE_NEG50_50]]:
+// LLVM-NEXT:    %[[DIFF:[0-9]+]] = sub i32 %[[X]], -50
+// LLVM-NEXT:    %[[DIFF_CMP:[0-9]+]] = icmp ule i32 %[[DIFF]], 100
+// LLVM-NEXT:    br i1 %[[DIFF_CMP]], label %[[CASE_NEG50_50]], label %[[JUDGE_66_233:[0-9]+]]
+//      LLVM:  [[JUDGE_66_233]]:
+// LLVM-NEXT:    %[[DIFF:[0-9]+]] = sub i32 %[[X]], 66
+// LLVM-NEXT:    %[[DIFF_CMP:[0-9]+]] = icmp ule i32 %[[DIFF]], 167
+//      LLVM:    br i1 %[[DIFF_CMP]], label %[[CASE_66_233]], label %[[EPILOG]]
+//      LLVM:  [[EPILOG]]:
+// LLVM-NEXT:    br label %[[EPILOG_END:[0-9]+]]
+//      LLVM:  [[EPILOG_END]]:
+// LLVM-NEXT:    ret void
+
+void sw5(int x) {
+  int y = 0;
+  switch (x) {
+    case 100 ... -100:
+      y = 1;
+  }
+}
+
+//      CIR:  cir.func @_Z3sw5i
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      case (range, [100, -100] : !s32i) {
+// CIR-NEXT:        cir.int<1>
+//      CIR:        cir.yield
+// CIR-NEXT:      }
+// CIR-NEXT:      ]
+
+//      LLVM:  @_Z3sw5i
+//      LLVM:    switch i32 %[[X:[0-9]+]], label %[[EPILOG:[0-9]+]] [
+// LLVM-NEXT:    ]
+//      LLVM:  [[CASE_100_NEG100:[0-9]+]]:
+// LLVM-NEXT:    store i32 1, ptr %[[Y:[0-9]+]]
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[EPILOG]]:
+// LLVM-NEXT:    br label %[[EPILOG_END:[0-9]+]]
+//      LLVM:  [[EPILOG_END]]:
+// LLVM-NEXT:    ret void
+
+void sw6(int x) {
+  int y = 0;
+  switch (x) {
+    case -2147483648 ... 2147483647:
+      y = 1;
+  }
+}
+
+//      CIR:  cir.func @_Z3sw6i
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      case (range, [-2147483648, 2147483647] : !s32i) {
+// CIR-NEXT:        cir.int<1>
+//      CIR:        cir.yield
+// CIR-NEXT:      }
+// CIR-NEXT:      ]
+
+//      LLVM:  @_Z3sw6i
+//      LLVM:    switch i32 %[[X:[0-9]+]], label %[[DEFAULT:[0-9]+]] [
+// LLVM-NEXT:    ]
+//      LLVM:  [[CASE_MIN_MAX:[0-9]+]]:
+// LLVM-NEXT:    store i32 1, ptr %[[Y:[0-9]+]]
+// LLVM-NEXT:    br label %[[EPILOG:[0-9]+]]
+//      LLVM:  [[DEFAULT]]:
+// LLVM-NEXT:    %[[DIFF:[0-9]+]] = sub i32 %[[X]], -2147483648
+// LLVM-NEXT:    %[[DIFF_CMP:[0-9]+]] = icmp ule i32 %[[DIFF]], -1
+// LLVM-NEXT:    br i1 %[[DIFF_CMP]], label %[[CASE_MIN_MAX]], label %[[EPILOG]]
+//      LLVM:  [[EPILOG]]:
+// LLVM-NEXT:    br label %[[EPILOG_END:[0-9]+]]
+//      LLVM:  [[EPILOG_END]]:
+// LLVM-NEXT:    ret void
+
+void sw7(int x) {
+  switch(x) {
+  case 0:
+    break;
+  case 100 ... 200:
+    break;
+  case 1:
+    break;
+  case 300 ... 400:
+    break;
+  default:
+    break;
+  case 500 ... 600:
+    break;
+  }
+}
+
+//      CIR:  cir.func @_Z3sw7i
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      case (equal, 0) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      },
+// CIR-NEXT:      case (range, [100, 200] : !s32i) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      },
+// CIR-NEXT:      case (equal, 1) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      },
+// CIR-NEXT:      case (range, [300, 400] : !s32i) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      },
+// CIR-NEXT:      case (default) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      },
+// CIR-NEXT:      case (range, [500, 600] : !s32i) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      }
+
+//      LLVM:  @_Z3sw7i
+//      LLVM:    switch i32 %[[X:[0-9]+]], label %[[JUDGE_RANGE_500_600:[0-9]+]] [
+// LLVM-NEXT:      i32 0, label %[[CASE_0:[0-9]+]]
+// LLVM-NEXT:      i32 1, label %[[CASE_1:[0-9]+]]
+// LLVM-NEXT:    ]
+//      LLVM:  [[CASE_0]]:
+// LLVM-NEXT:    br label %[[EPILOG:[0-9]+]]
+//      LLVM:  [[CASE_100_200:[0-9]+]]:
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[CASE_1]]:
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[CASE_300_400:[0-9]+]]:
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[JUDGE_RANGE_500_600]]:
+// LLVM-NEXT:    %[[DIFF:[0-9]+]] = sub i32 %[[X]], 500
+// LLVM-NEXT:    %[[DIFF_CMP:[0-9]+]] = icmp ule i32 %[[DIFF]], 100
+// LLVM-NEXT:    br i1 %[[DIFF_CMP]], label %[[CASE_500_600:[0-9]+]], label %[[JUDGE_RANGE_300_400:[0-9]+]]
+//      LLVM:  [[JUDGE_RANGE_300_400]]:
+// LLVM-NEXT:    %[[DIFF:[0-9]+]] = sub i32 %[[X]], 300
+// LLVM-NEXT:    %[[DIFF_CMP:[0-9]+]] = icmp ule i32 %[[DIFF]], 100
+// LLVM-NEXT:    br i1 %[[DIFF_CMP]], label %[[CASE_300_400]], label %[[JUDGE_RANGE_100_200:[0-9]+]]
+//      LLVM:  [[JUDGE_RANGE_100_200]]:
+// LLVM-NEXT:    %[[DIFF:[0-9]+]] = sub i32 %[[X]], 100
+// LLVM-NEXT:    %[[DIFF_CMP:[0-9]+]] = icmp ule i32 %[[DIFF]], 100
+// LLVM-NEXT:    br i1 %[[DIFF_CMP]], label %[[CASE_100_200]], label %[[DEFAULT:[0-9]+]]
+//      LLVM:  [[DEFAULT]]:
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[CASE_500_600]]:
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[EPILOG]]:
+// LLVM-NEXT:    br label %[[EPILOG_END:[0-9]+]]
+//      LLVM:  [[EPILOG_END]]:
+// LLVM-NEXT:    ret void
+
diff --git a/clang/test/CIR/CodeGen/switch-unreachable-after-break.cpp b/clang/test/CIR/CodeGen/switch-unreachable-after-break.cpp
new file mode 100644
index 000000000000..762bf98c6adb
--- /dev/null
+++ b/clang/test/CIR/CodeGen/switch-unreachable-after-break.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// XFAIL: *
+
+void unreachable_after_break(int a) {
+  switch(a) {
+  case 0:
+    break;
+    break;
+    int x = 1;
+  }
+}
+
+int unreachable_after_return(int a) {
+  switch (a) {
+  case 0:
+    return 0;
+    return 1;
+    int x = 1;
+  }
+  return 2;
+}
diff --git a/clang/test/CIR/CodeGen/switch.cpp b/clang/test/CIR/CodeGen/switch.cpp
new file mode 100644
index 000000000000..74b1312fd229
--- /dev/null
+++ b/clang/test/CIR/CodeGen/switch.cpp
@@ -0,0 +1,349 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void sw1(int a) {
+  switch (int b = 1; a) {
+  case 0:
+    b = b + 1;
+    break;
+  case 1:
+    break;
+  case 2: {
+    b = b + 1;
+    int yolo = 100;
+    break;
+  }
+  }
+}
+// CHECK: cir.func @_Z3sw1i
+// CHECK: cir.switch (%3 : !s32i) [
+// CHECK-NEXT: case (equal, 0)  {
+// CHECK-NEXT:   %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %5 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   %6 = cir.binop(add, %4, %5) nsw : !s32i
+// CHECK-NEXT:   cir.store %6, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.break
+// CHECK-NEXT: },
+// CHECK-NEXT: case (equal, 1)  {
+// CHECK-NEXT:   cir.break
+// CHECK-NEXT: },
+// CHECK-NEXT: case (equal, 2)  {
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:       %4 = cir.alloca !s32i, !cir.ptr<!s32i>, ["yolo", init]
+// CHECK-NEXT:       %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:       %6 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       %7 = cir.binop(add, %5, %6) nsw : !s32i
+// CHECK-NEXT:       cir.store %7, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       %8 = cir.const #cir.int<100> : !s32i
+// CHECK-NEXT:       cir.store %8, %4 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       cir.break
+// CHECK-NEXT:     }
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   }
+
+void sw2(int a) {
+  switch (int yolo = 2; a) {
+  case 3:
+    // "fomo" has the same lifetime as "yolo"
+    int fomo = 0;
+    yolo = yolo + fomo;
+    break;
+  }
+}
+
+// CHECK: cir.func @_Z3sw2i
+// CHECK: cir.scope {
+// CHECK-NEXT:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["yolo", init]
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["fomo", init]
+// CHECK:        cir.switch (%4 : !s32i) [
+// CHECK-NEXT:   case (equal, 3)  {
+// CHECK-NEXT:     %5 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:     cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+
+void sw3(int a) {
+  switch (a) {
+  default:
+    break;
+  }
+}
+
+// CHECK: cir.func @_Z3sw3i
+// CHECK: cir.scope {
+// CHECK-NEXT:   %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   cir.switch (%1 : !s32i) [
+// CHECK-NEXT:   case (default)  {
+// CHECK-NEXT:     cir.break
+// CHECK-NEXT:   }
+// CHECK-NEXT:   ]
+
+int sw4(int a) {
+  switch (a) {
+  case 42: {
+    return 3;
+  }
+  default:
+    return 2;
+  }
+  return 0;
+}
+
+// CHECK: cir.func @_Z3sw4i
+// CHECK:       cir.switch (%4 : !s32i) [
+// CHECK-NEXT:       case (equal, 42)  {
+// CHECK-NEXT:         cir.scope {
+// CHECK-NEXT:           %5 = cir.const #cir.int<3> : !s32i
+// CHECK-NEXT:           cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:           %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:           cir.return %6 : !s32i
+// CHECK-NEXT:         }
+// CHECK-NEXT:         cir.yield
+// CHECK-NEXT:       },
+// CHECK-NEXT:       case (default)  {
+// CHECK-NEXT:         %5 = cir.const #cir.int<2> : !s32i
+// CHECK-NEXT:         cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:         %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:         cir.return %6 : !s32i
+// CHECK-NEXT:       }
+// CHECK-NEXT:       ]
+
+void sw5(int a) {
+  switch (a) {
+  case 1:;
+  }
+}
+
+// CHECK: cir.func @_Z3sw5i
+// CHECK: cir.switch (%1 : !s32i) [
+// CHECK-NEXT:   case (equal, 1)  {
+// CHECK-NEXT:     cir.yield
+
+void sw6(int a) {
+  switch (a) {
+  case 0:
+  case 1:
+  case 2:
+    break;
+  case 3:
+  case 4:
+  case 5:
+    break;
+  }
+}
+
+// CHECK: cir.func @_Z3sw6i
+// CHECK: cir.switch (%1 : !s32i) [
+// CHECK-NEXT: case (anyof, [0, 1, 2] : !s32i)  {
+// CHECK-NEXT:   cir.break
+// CHECK-NEXT: },
+// CHECK-NEXT: case (anyof, [3, 4, 5] : !s32i)  {
+// CHECK-NEXT:   cir.break
+// CHECK-NEXT: }
+
+void sw7(int a) {
+  switch (a) {
+  case 0:
+  case 1:
+  case 2:
+    int x;
+  case 3:
+  case 4:
+  case 5:
+    break;
+  }
+}
+
+// CHECK: cir.func @_Z3sw7i
+// CHECK: case (anyof, [0, 1, 2] : !s32i)  {
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT: },
+// CHECK-NEXT: case (anyof, [3, 4, 5] : !s32i)  {
+// CHECK-NEXT:   cir.break
+// CHECK-NEXT: }
+
+void sw8(int a) {
+  switch (a)
+  {
+  case 3:
+    break;
+  case 4:
+  default:
+    break;
+  }
+}
+
+//CHECK:    cir.func @_Z3sw8i
+//CHECK:      case (equal, 3)
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: },
+//CHECK-NEXT: case (equal, 4) {
+//CHECK-NEXT:   cir.yield
+//CHECK-NEXT: }
+//CHECK-NEXT: case (default) {
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+
+void sw9(int a) {
+  switch (a)
+  {
+  case 3:
+    break;
+  default:
+  case 4:
+    break;
+  }
+}
+
+//CHECK:    cir.func @_Z3sw9i
+//CHECK:      case (equal, 3) {
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+//CHECK-NEXT: case (default) {
+//CHECK-NEXT:   cir.yield
+//CHECK-NEXT: }
+//CHECK:      case (equal, 4)
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+
+void sw10(int a) {
+  switch (a)
+  {
+  case 3:
+    break;
+  case 4:
+  default:
+  case 5:
+    break;
+  }
+}
+
+//CHECK:    cir.func @_Z4sw10i
+//CHECK:      case (equal, 3)
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: },
+//CHECK-NEXT: case (equal, 4) {
+//CHECK-NEXT:   cir.yield
+//CHECK-NEXT: }
+//CHECK-NEXT: case (default) {
+//CHECK-NEXT:   cir.yield
+//CHECK-NEXT: }
+//CHECK-NEXT: case (equal, 5) {
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+
+void sw11(int a) {
+  switch (a)
+  {
+  case 3:
+    break;
+  case 4:
+  case 5:
+  default:
+  case 6:
+  case 7:
+    break;
+  }
+}
+
+//CHECK:    cir.func @_Z4sw11i
+//CHECK:      case (equal, 3)
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: },
+//CHECK-NEXT: case (anyof, [4, 5] : !s32i) {
+//CHECK-NEXT:   cir.yield
+//CHECK-NEXT: }
+//CHECK-NEXT: case (default) {
+//CHECK-NEXT:   cir.yield
+//CHECK-NEXT: }
+//CHECK-NEXT: case (anyof, [6, 7] : !s32i)  {
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+
+void sw12(int a) {
+  switch (a)
+  {
+  case 3:
+    return;
+    break;
+  }
+}
+
+//      CHECK: cir.func @_Z4sw12i
+//      CHECK:   cir.scope {
+//      CHECK:     cir.switch
+// CHECK-NEXT:     case (equal, 3) {
+// CHECK-NEXT:       cir.return
+// CHECK-NEXT:     ^bb1:  // no predecessors
+// CHECK-NEXT:       cir.break
+// CHECK-NEXT:     }
+
+void sw13(int a, int b) {
+  switch (a) {
+  case 1:
+    switch (b) {
+    case 2:
+      break;
+    }
+  }
+}
+
+//      CHECK:  cir.func @_Z4sw13ii
+//      CHECK:    cir.scope {
+//      CHECK:      cir.switch
+// CHECK-NEXT:      case (equal, 1) {
+// CHECK-NEXT:        cir.scope {
+//      CHECK:          cir.switch
+// CHECK-NEXT:          case (equal, 2) {
+// CHECK-NEXT:            cir.break
+// CHECK-NEXT:          }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:        }
+// CHECK-NEXT:        cir.yield
+// CHECK-NEXT:      }
+//      CHECK:    }
+//      CHECK:    cir.return
+
+void fallthrough(int x) {
+  switch (x) {
+    case 1:
+      __attribute__((fallthrough));
+    case 2:
+      break;
+    default:
+      break;
+  }
+}
+
+//      CHECK:  cir.func @_Z11fallthroughi
+//      CHECK:    cir.scope {
+//      CHECK:      cir.switch (%1 : !s32i) [
+// CHECK-NEXT:      case (equal, 1) {
+// CHECK-NEXT:        cir.yield
+// CHECK-NEXT:      },
+// CHECK-NEXT:      case (equal, 2) {
+// CHECK-NEXT:        cir.break
+// CHECK-NEXT:      },
+// CHECK-NEXT:      case (default) {
+// CHECK-NEXT:        cir.break
+// CHECK-NEXT:      }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:    }
+
+int unreachable_after_break_1(int a) {
+  switch (a) {
+    case(42):
+      break;
+      goto exit;
+    default:
+      return 0;
+  };
+
+exit:
+  return -1;
+
+}
+// CHECK: cir.func @_Z25unreachable_after_break_1i
+// CHECK:   case (equal, 42) {
+// CHECK:     cir.break
+// CHECK:   ^bb1:  // no predecessors
+// CHECK:     cir.goto "exit"
+// CHECK:   }
diff --git a/clang/test/CIR/CodeGen/temporaries.cpp b/clang/test/CIR/CodeGen/temporaries.cpp
new file mode 100644
index 000000000000..1dafb75e8a70
--- /dev/null
+++ b/clang/test/CIR/CodeGen/temporaries.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: cir-translate %t.cir -cir-to-llvmir -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+struct E {
+  ~E();
+  E operator!();
+};
+
+void f() {
+  !E();
+}
+
+//      CHECK: cir.func private  @_ZN1EC1Ev(!cir.ptr<!ty_E>) extra(#fn_attr)
+// CHECK-NEXT: cir.func private  @_ZN1EntEv(!cir.ptr<!ty_E>) -> !ty_E
+// CHECK-NEXT: cir.func private  @_ZN1ED1Ev(!cir.ptr<!ty_E>) extra(#fn_attr)
+// CHECK-NEXT: cir.func  @_Z1fv() extra(#fn_attr1) {
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     %[[ONE:[0-9]+]] = cir.alloca !ty_E, !cir.ptr<!ty_E>, ["agg.tmp.ensured"] {alignment = 1 : i64}
+// CHECK-NEXT:     %[[TWO:[0-9]+]] = cir.alloca !ty_E, !cir.ptr<!ty_E>, ["ref.tmp0"] {alignment = 1 : i64}
+// CHECK-NEXT:     cir.call @_ZN1EC1Ev(%1) : (!cir.ptr<!ty_E>) -> () extra(#fn_attr)
+// CHECK-NEXT:     %[[THREE:[0-9]+]] = cir.call @_ZN1EntEv(%[[TWO]]) : (!cir.ptr<!ty_E>) -> !ty_E
+// CHECK-NEXT:     cir.store %[[THREE]], %[[ONE]] : !ty_E, !cir.ptr<!ty_E>
+// CHECK-NEXT:     cir.call @_ZN1ED1Ev(%[[ONE]]) : (!cir.ptr<!ty_E>) -> () extra(#fn_attr)
+// CHECK-NEXT:     cir.call @_ZN1ED1Ev(%[[TWO]]) : (!cir.ptr<!ty_E>) -> () extra(#fn_attr)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+const unsigned int n = 1234;
+const int &r = (const int&)n;
+
+//      CHECK: cir.global "private"  constant internal @_ZGR1r_ = #cir.int<1234> : !s32i
+// CHECK-NEXT: cir.global  external @r = #cir.global_view<@_ZGR1r_> : !cir.ptr<!s32i> {alignment = 8 : i64}
+
+//      LLVM: @_ZGR1r_ = internal constant i32 1234, align 4
+// LLVM-NEXT: @r = global ptr @_ZGR1r_, align 8
+
diff --git a/clang/test/CIR/CodeGen/temporary-materialization.cpp b/clang/test/CIR/CodeGen/temporary-materialization.cpp
new file mode 100644
index 000000000000..3b063db09dc3
--- /dev/null
+++ b/clang/test/CIR/CodeGen/temporary-materialization.cpp
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int make_int();
+
+int test() {
+  const int &x = make_int();
+  return x;
+}
+
+//      CHECK: cir.func @_Z4testv()
+// CHECK-NEXT:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:   %[[#TEMP_SLOT:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp0", init] {alignment = 4 : i64}
+// CHECK-NEXT:   %[[#x:]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["x", init] {alignment = 8 : i64}
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     %[[#TEMP_VALUE:]] = cir.call @_Z8make_intv() : () -> !s32i
+// CHECK-NEXT:     cir.store %[[#TEMP_VALUE]], %[[#TEMP_SLOT]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.store %[[#TEMP_SLOT]], %[[#x]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+//      CHECK: }
+
+int test_scoped() {
+  int x = make_int();
+  {
+    const int &y = make_int();
+    x = y;
+  }
+  return x;
+}
+
+//      CHECK: cir.func @_Z11test_scopedv()
+// CHECK-NEXT:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+//      CHECK:   cir.scope {
+// CHECK-NEXT:     %[[#TEMP_SLOT:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp0", init] {alignment = 4 : i64}
+// CHECK-NEXT:     %[[#y:]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["y", init] {alignment = 8 : i64}
+// CHECK-NEXT:     cir.scope {
+// CHECK-NEXT:       %[[#TEMP_VALUE:]] = cir.call @_Z8make_intv() : () -> !s32i
+// CHECK-NEXT:       cir.store %[[#TEMP_VALUE]], %[[#TEMP_SLOT]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     }
+// CHECK-NEXT:     cir.store %[[#TEMP_SLOT]], %[[#y]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+//      CHECK:   }
+//      CHECK: }
diff --git a/clang/test/CIR/CodeGen/ternary.cpp b/clang/test/CIR/CodeGen/ternary.cpp
new file mode 100644
index 000000000000..7238459bd520
--- /dev/null
+++ b/clang/test/CIR/CodeGen/ternary.cpp
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -O1 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int x(int y) {
+  return y > 0 ? 3 : 5;
+}
+
+// CHECK: cir.func @_Z1xi
+// CHECK:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+// CHECK:     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:     cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK:     %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:     %3 = cir.const #cir.int<0> : !s32i
+// CHECK:     %4 = cir.cmp(gt, %2, %3) : !s32i, !cir.bool
+// CHECK:     %5 = cir.const #cir.int<3> : !s32i
+// CHECK:     %6 = cir.const #cir.int<5> : !s32i
+// CHECK:     %7 = cir.select if %4 then %5 else %6 : (!cir.bool, !s32i, !s32i) -> !s32i
+// CHECK:     cir.store %7, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK:     %8 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK:     cir.return %8 : !s32i
+// CHECK:   }
+
+typedef enum {
+  API_A,
+  API_EnumSize = 0x7fffffff
+} APIType;
+
+void oba(const char *);
+
+void m(APIType api) {
+  ((api == API_A) ? (static_cast<void>(0)) : oba("yo.cpp"));
+}
+
+// CHECK:  cir.func @_Z1m7APIType
+// CHECK:    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["api", init] {alignment = 4 : i64}
+// CHECK:    cir.store %arg0, %0 : !u32i, !cir.ptr<!u32i>
+// CHECK:    %1 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+// CHECK:    %2 = cir.cast(integral, %1 : !u32i), !s32i
+// CHECK:    %3 = cir.const #cir.int<0> : !u32i
+// CHECK:    %4 = cir.cast(integral, %3 : !u32i), !s32i
+// CHECK:    %5 = cir.cmp(eq, %2, %4) : !s32i, !cir.bool
+// CHECK:    cir.ternary(%5, true {
+// CHECK:      %6 = cir.const #cir.int<0> : !s32i
+// CHECK:      cir.yield
+// CHECK:    }, false {
+// CHECK:      %6 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 7>>
+// CHECK:      %7 = cir.cast(array_to_ptrdecay, %6 : !cir.ptr<!cir.array<!s8i x 7>>), !cir.ptr<!s8i>
+// CHECK:      cir.call @_Z3obaPKc(%7) : (!cir.ptr<!s8i>) -> ()
+// CHECK:      cir.yield
+// CHECK:    }) : (!cir.bool) -> ()
+// CHECK:    cir.return
+// CHECK:  }
+
+int foo(int a, int b) {
+  if (a < b ? 0 : a)
+    return -1;
+  return 0;
+}
+
+// CHECK:  cir.func @_Z3fooii
+// CHECK:   [[A0:%.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK:   [[B0:%.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK:   [[CMP:%.*]] = cir.cmp(lt, [[A0]], [[B0]]) : !s32i, !cir.bool
+// CHECK:   [[RES:%.*]] = cir.ternary([[CMP]], true {
+// CHECK:     [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:     cir.yield [[ZERO]] : !s32i
+// CHECK:   }, false {
+// CHECK:     [[A1:%.*]] = cir.load {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK:     cir.yield [[A1]] : !s32i
+// CHECK:   }) : (!cir.bool) -> !s32i
+// CHECK:   [[RES_CAST:%.*]] = cir.cast(int_to_bool, [[RES]] : !s32i), !cir.bool
+// CHECK:   cir.if [[RES_CAST]]
diff --git a/clang/test/CIR/CodeGen/three-way-comparison.cpp b/clang/test/CIR/CodeGen/three-way-comparison.cpp
new file mode 100644
index 000000000000..3a6f738c8fcc
--- /dev/null
+++ b/clang/test/CIR/CodeGen/three-way-comparison.cpp
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -DNON_CANONICAL_CMP_RESULTS -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=NONCANONICAL-BEFORE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -DNON_CANONICAL_CMP_RESULTS -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=NONCANONICAL-AFTER
+
+#include "Inputs/std-compare.h"
+
+// BEFORE: #cmp3way_info_partial_ltn1eq0gt1unn127_ = #cir.cmp3way_info<partial, lt = -1, eq = 0, gt = 1, unordered = -127>
+// BEFORE: #cmp3way_info_strong_ltn1eq0gt1_ = #cir.cmp3way_info<strong, lt = -1, eq = 0, gt = 1>
+// BEFORE: !ty_std3A3A__13A3Apartial_ordering = !cir.struct<class "std::__1::partial_ordering" {!cir.int<s, 8>}
+// BEFORE: !ty_std3A3A__13A3Astrong_ordering = !cir.struct<class "std::__1::strong_ordering" {!cir.int<s, 8>}
+
+auto three_way_strong(int x, int y) {
+  return x <=> y;
+}
+
+// BEFORE: cir.func @_Z16three_way_strongii
+// BEFORE:   %{{.+}} = cir.cmp3way(%{{.+}} : !s32i, %{{.+}}, #cmp3way_info_strong_ltn1eq0gt1_) : !s8i
+// BEFORE: }
+
+// AFTER: cir.func @_Z16three_way_strongii
+// AFTER:   %{{.+}} = cir.cmp3way(%{{.+}} : !s32i, %{{.+}}, #cmp3way_info_strong_ltn1eq0gt1_) : !s8i
+// AFTER: }
+
+// NONCANONICAL-BEFORE: #cmp3way_info_strong_lt1eq2gt3_ = #cir.cmp3way_info<strong, lt = 1, eq = 2, gt = 3>
+// NONCANONICAL-BEFORE: cir.func @_Z16three_way_strongii
+// NONCANONICAL-BEFORE:   %{{.+}} = cir.cmp3way(%{{.+}} : !s32i, %{{.+}}, #cmp3way_info_strong_lt1eq2gt3_) : !s8i
+// NONCANONICAL-BEFORE: }
+
+//      NONCANONICAL-AFTER: #cmp3way_info_strong_ltn1eq0gt1_ = #cir.cmp3way_info<strong, lt = -1, eq = 0, gt = 1>
+//      NONCANONICAL-AFTER: cir.func @_Z16three_way_strongii
+//      NONCANONICAL-AFTER:   %[[#CMP3WAY_RESULT:]] = cir.cmp3way(%{{.+}} : !s32i, %{{.+}}, #cmp3way_info_strong_ltn1eq0gt1_) : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#NEGONE:]] = cir.const #cir.int<-1> : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#ONE:]] = cir.const #cir.int<1> : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#CMP_TO_NEGONE:]] = cir.cmp(eq, %[[#CMP3WAY_RESULT]], %[[#NEGONE]]) : !s8i, !cir.bool
+// NONCANONICAL-AFTER-NEXT:   %[[#A:]] = cir.select if %[[#CMP_TO_NEGONE]] then %[[#ONE]] else %[[#CMP3WAY_RESULT]] : (!cir.bool, !s8i, !s8i) -> !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#ZERO:]] = cir.const #cir.int<0> : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#TWO:]] = cir.const #cir.int<2> : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#CMP_TO_ZERO:]] = cir.cmp(eq, %[[#A]], %[[#ZERO]]) : !s8i, !cir.bool
+// NONCANONICAL-AFTER-NEXT:   %[[#B:]] = cir.select if %[[#CMP_TO_ZERO]] then %[[#TWO]] else %[[#A]] : (!cir.bool, !s8i, !s8i) -> !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#ONE2:]] = cir.const #cir.int<1> : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#THREE:]] = cir.const #cir.int<3> : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#CMP_TO_ONE:]] = cir.cmp(eq, %[[#B]], %[[#ONE2]]) : !s8i, !cir.bool
+// NONCANONICAL-AFTER-NEXT:   %{{.+}} = cir.select if %[[#CMP_TO_ONE]] then %[[#THREE]] else %[[#B]] : (!cir.bool, !s8i, !s8i) -> !s8i
+//      NONCANONICAL-AFTER: }
+
+auto three_way_weak(float x, float y) {
+  return x <=> y;
+}
+
+// BEFORE: cir.func @_Z14three_way_weakff
+// BEFORE:   %{{.+}} = cir.cmp3way(%{{.+}} : !cir.float, %{{.+}}, #cmp3way_info_partial_ltn1eq0gt1unn127_) : !s8i
+// BEFORE: }
+
+//      AFTER: cir.func @_Z14three_way_weakff
+//      AFTER:   %[[#LHS:]] = cir.load %0 : !cir.ptr<!cir.float>, !cir.float
+// AFTER-NEXT:   %[[#RHS:]] = cir.load %1 : !cir.ptr<!cir.float>, !cir.float
+// AFTER-NEXT:   %[[#LT:]] = cir.const #cir.int<-1> : !s8i
+// AFTER-NEXT:   %[[#EQ:]] = cir.const #cir.int<0> : !s8i
+// AFTER-NEXT:   %[[#GT:]] = cir.const #cir.int<1> : !s8i
+// AFTER-NEXT:   %[[#UNORDERED:]] = cir.const #cir.int<-127> : !s8i
+// AFTER-NEXT:   %[[#CMP_LT:]] = cir.cmp(lt, %[[#LHS]], %[[#RHS]]) : !cir.float, !cir.bool
+// AFTER-NEXT:   %[[#CMP_EQ:]] = cir.cmp(eq, %[[#LHS]], %[[#RHS]]) : !cir.float, !cir.bool
+// AFTER-NEXT:   %[[#CMP_GT:]] = cir.cmp(gt, %[[#LHS]], %[[#RHS]]) : !cir.float, !cir.bool
+// AFTER-NEXT:   %[[#CMP_EQ_RES:]] = cir.select if %[[#CMP_EQ]] then %[[#EQ]] else %[[#UNORDERED]] : (!cir.bool, !s8i, !s8i) -> !s8i
+// AFTER-NEXT:   %[[#CMP_GT_RES:]] = cir.select if %[[#CMP_GT]] then %[[#GT]] else %[[#CMP_EQ_RES]] : (!cir.bool, !s8i, !s8i) -> !s8i
+// AFTER-NEXT:   %{{.+}} = cir.select if %[[#CMP_LT]] then %[[#LT]] else %[[#CMP_GT_RES]] : (!cir.bool, !s8i, !s8i) -> !s8i
+//      AFTER: }
diff --git a/clang/test/CIR/CodeGen/throw.cpp b/clang/test/CIR/CodeGen/throw.cpp
new file mode 100644
index 000000000000..c2395c3725c3
--- /dev/null
+++ b/clang/test/CIR/CodeGen/throw.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+double d(int a, int b) {
+   if (b == 0)
+      throw "Division by zero condition!";
+   return (a/b);
+}
+
+//      CIR: cir.if
+// CIR-NEXT:   %[[ADDR:.*]] = cir.alloc.exception 8
+// CIR-NEXT:   %[[STR:.*]] = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 28>>
+// CIR-NEXT:   %[[STR_ADD:.*]] = cir.cast(array_to_ptrdecay, %[[STR]] : !cir.ptr<!cir.array<!s8i x 28>>), !cir.ptr<!s8i>
+// CIR-NEXT:   cir.store %[[STR_ADD]], %[[ADDR]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CIR-NEXT:   cir.throw %[[ADDR]] : !cir.ptr<!cir.ptr<!s8i>>, @_ZTIPKc
+// CIR-NEXT:   cir.unreachable
+// CIR-NEXT: }
+
+// LLVM: %[[ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 8)
+// LLVM: store ptr @.str, ptr %[[ADDR]], align 8
+// LLVM: call void @__cxa_throw(ptr %[[ADDR]], ptr @_ZTIPKc, ptr null)
+// LLVM: unreachable
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/tls.c b/clang/test/CIR/CodeGen/tls.c
new file mode 100644
index 000000000000..499afad56ee2
--- /dev/null
+++ b/clang/test/CIR/CodeGen/tls.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+extern __thread int b;
+int c(void) { return *&b; }
+// CIR: cir.global "private" external tls_dyn @b : !s32i
+// CIR: cir.func @c() -> !s32i
+// CIR:   %[[TLS_ADDR:.*]] = cir.get_global thread_local @b : !cir.ptr<!s32i>
+
+__thread int a;
+// CIR: cir.global external tls_dyn @a = #cir.int<0> : !s32i
+
+// LLVM: @b = external thread_local global i32
+// LLVM: @a = thread_local global i32 0
+
+// LLVM-LABEL: @c
+// LLVM: = call ptr @llvm.threadlocal.address.p0(ptr @b)
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/trap.cpp b/clang/test/CIR/CodeGen/trap.cpp
new file mode 100644
index 000000000000..2d1089421876
--- /dev/null
+++ b/clang/test/CIR/CodeGen/trap.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void foo();
+
+void basic() {
+  foo();
+  __builtin_trap();
+}
+
+//      CHECK: cir.func @_Z5basicv()
+// CHECK-NEXT:   cir.call @_Z3foov() : () -> ()
+// CHECK-NEXT:   cir.trap
+// CHECK-NEXT: }
+
+void code_after_unreachable() {
+  foo();
+  __builtin_trap();
+  foo();
+}
+
+//      CHECK: cir.func @_Z22code_after_unreachablev()
+// CHECK-NEXT:   cir.call @_Z3foov() : () -> ()
+// CHECK-NEXT:   cir.trap
+// CHECK-NEXT: ^bb1:
+// CHECK-NEXT:   cir.call @_Z3foov() : () -> ()
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/CodeGen/try-catch-dtors.cpp b/clang/test/CIR/CodeGen/try-catch-dtors.cpp
new file mode 100644
index 000000000000..3f5e2b9d46d2
--- /dev/null
+++ b/clang/test/CIR/CodeGen/try-catch-dtors.cpp
@@ -0,0 +1,312 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-cir-flat %s -o %t.flat.cir
+// RUN: FileCheck --input-file=%t.flat.cir --check-prefix=CIR_FLAT %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct Vec {
+  Vec();
+  Vec(Vec&&);
+  ~Vec();
+};
+
+void yo() {
+  int r = 1;
+  try {
+    Vec v;
+  } catch (...) {
+    r++;
+  }
+}
+
+// CIR-DAG: ![[VecTy:.*]] = !cir.struct<struct "Vec" {!cir.int<u, 8>}>
+// CIR-DAG: ![[S1:.*]] = !cir.struct<struct "S1" {!cir.struct<struct "Vec" {!cir.int<u, 8>}>}>
+
+// CIR_FLAT-DAG: ![[VecTy:.*]] = !cir.struct<struct "Vec" {!cir.int<u, 8>}>
+// CIR_FLAT-DAG: ![[S1:.*]] = !cir.struct<struct "S1" {!cir.struct<struct "Vec" {!cir.int<u, 8>}>}>
+
+// CIR: cir.scope {
+// CIR:   %[[VADDR:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v", init]
+// CIR:   cir.try {
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[VADDR]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call @_ZN3VecD1Ev(%[[VADDR]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.yield
+// CIR:   } catch [type #cir.all {
+// CIR:     cir.catch_param -> !cir.ptr<!void>
+// CIR:   }]
+// CIR: }
+// CIR: cir.return
+
+// LLVM-LABEL: @_Z2yov()
+
+// LLVM: 2:
+// LLVM:   %[[Vec:.*]] = alloca %struct.Vec
+// LLVM:   br label %[[INVOKE_BB:.*]],
+
+// LLVM: [[INVOKE_BB]]:
+// LLVM:   invoke void @_ZN3VecC1Ev(ptr %[[Vec]])
+// LLVM:           to label %[[DTOR_BB:.*]] unwind label %[[LPAD_BB:.*]],
+
+// LLVM: [[DTOR_BB]]:
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[Vec]])
+// LLVM:   br label %15
+
+// LLVM: [[LPAD_BB]]:
+// LLVM:   landingpad { ptr, i32 }
+// LLVM:           catch ptr null
+// LLVM:   br label %[[CATCH_BB:.*]],
+
+// LLVM: [[CATCH_BB]]:
+// LLVM:   call ptr @__cxa_begin_catch
+// LLVM:   call void @__cxa_end_catch()
+// LLVM:   br label %[[RET_BB:.*]],
+
+// LLVM: [[RET_BB]]:
+// LLVM:   ret void
+
+struct S1 {
+  Vec v;
+};
+
+void yo2() {
+  int r = 1;
+  try {
+    Vec v;
+    S1((Vec&&) v);
+  } catch (...) {
+    r++;
+  }
+}
+// CIR-LABEL: @_Z3yo2v
+// CIR:   cir.scope {
+// CIR:     cir.alloca ![[VecTy]]
+// CIR:     cir.try {
+// CIR:       cir.call exception @_ZN3VecC1Ev
+// CIR:       cir.scope {
+// CIR:         cir.alloca ![[S1:.*]], !cir.ptr<![[S1:.*]]>, ["agg.tmp.ensured"]
+// CIR:         cir.call exception @_ZN3VecC1EOS_{{.*}} cleanup {
+// CIR:           cir.call @_ZN3VecD1Ev
+// CIR:           cir.yield
+// CIR:         cir.call @_ZN2S1D2Ev
+// CIR:       }
+// CIR:       cir.call @_ZN3VecD1Ev
+// CIR:       cir.yield
+// CIR:     } catch [type #cir.all {
+// CIR:       cir.catch_param -> !cir.ptr<!void>
+// CIR:       cir.yield
+// CIR:     }]
+// CIR:   }
+// CIR:   cir.return
+// CIR: }
+
+// CIR_FLAT-LABEL: @_Z3yo2v
+// CIR_FLAT:    cir.try_call @_ZN3VecC1Ev(%2) ^[[NEXT_CALL_PREP:.*]], ^[[PAD_NODTOR:.*]] : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:  ^[[NEXT_CALL_PREP]]:
+// CIR_FLAT:    cir.br ^[[NEXT_CALL:.*]] loc
+// CIR_FLAT:  ^[[NEXT_CALL]]:
+// CIR_FLAT:    cir.try_call @_ZN3VecC1EOS_({{.*}}) ^[[CONT0:.*]], ^[[PAD_DTOR:.*]] :
+// CIR_FLAT:  ^[[CONT0]]:
+// CIR_FLAT:    cir.call @_ZN2S1D2Ev
+// CIR_FLAT:    cir.br ^[[CONT1:.*]] loc
+// CIR_FLAT:  ^[[CONT1]]:
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev
+// CIR_FLAT:    cir.br ^[[AFTER_TRY:.*]] loc
+// CIR_FLAT:  ^[[PAD_NODTOR]]:
+// CIR_FLAT:    %exception_ptr, %type_id = cir.eh.inflight_exception
+// CIR_FLAT:    cir.br ^[[CATCH_BEGIN:.*]](%exception_ptr : !cir.ptr<!void>)
+// CIR_FLAT:  ^[[PAD_DTOR]]:
+// CIR_FLAT:    %exception_ptr_0, %type_id_1 = cir.eh.inflight_exception
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%2) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.br ^[[CATCH_BEGIN]](%exception_ptr_0 : !cir.ptr<!void>)
+// CIR_FLAT:  ^[[CATCH_BEGIN]](
+// CIR_FLAT:    cir.catch_param begin
+// CIR_FLAT:    cir.br ^[[AFTER_TRY]]
+// CIR_FLAT:  ^[[AFTER_TRY]]:
+// CIR_FLAT:    cir.return
+// CIR_FLAT:  }
+
+void yo3(bool x) {
+  int r = 1;
+  try {
+    Vec v1, v2, v3, v4;
+  } catch (...) {
+    r++;
+  }
+}
+
+// CIR-LABEL: @_Z3yo3b
+// CIR: cir.scope {
+// CIR:   %[[V1:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v1"
+// CIR:   %[[V2:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v2"
+// CIR:   %[[V3:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v3"
+// CIR:   %[[V4:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v4"
+// CIR:   cir.try {
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> () cleanup {
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.yield
+// CIR:     }
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> () cleanup {
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.yield
+// CIR:     }
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[V4]]) : (!cir.ptr<![[VecTy]]>) -> () cleanup {
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.yield
+// CIR:     }
+// CIR:     cir.call @_ZN3VecD1Ev(%[[V4]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call @_ZN3VecD1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.yield
+// CIR:   } catch [type #cir.all {
+// CIR:   }]
+// CIR: }
+// CIR: cir.return
+
+// CIR_FLAT-LABEL: @_Z3yo3b
+// CIR_FLAT:  ^bb1:
+// CIR_FLAT:   %[[V1:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v1"
+// CIR_FLAT:   %[[V2:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v2"
+// CIR_FLAT:   %[[V3:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v3"
+// CIR_FLAT:   %[[V4:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v4"
+// CIR_FLAT:    cir.br ^[[CALL0:.*]] loc
+// CIR_FLAT:  ^[[CALL0]]:
+// CIR_FLAT:    cir.try_call @_ZN3VecC1Ev(%[[V1]]) ^[[CALL1:.*]], ^[[CLEANUP_V1:.*]] : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:  ^[[CALL1]]:
+// CIR_FLAT:    cir.try_call @_ZN3VecC1Ev(%[[V2]]) ^[[CALL2:.*]], ^[[CLEANUP_V2:.*]] : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:  ^[[CALL2]]:
+// CIR_FLAT:    cir.try_call @_ZN3VecC1Ev(%[[V3]]) ^[[CALL3:.*]], ^[[CLEANUP_V3:.*]] : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:  ^[[CALL3]]:
+// CIR_FLAT:    cir.try_call @_ZN3VecC1Ev(%[[V4]]) ^[[NOTROW_CLEANUP:.*]], ^[[CLEANUP_V4:.*]] : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:  ^[[NOTROW_CLEANUP]]:
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V4]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.br ^[[AFTER_TRY:.*]] loc
+// CIR_FLAT:  ^[[CLEANUP_V1]]:
+// CIR_FLAT:    %exception_ptr, %type_id = cir.eh.inflight_exception
+// CIR_FLAT:    cir.br ^[[CATCH_BEGIN:.*]](%exception_ptr : !cir.ptr<!void>)
+// CIR_FLAT:  ^[[CLEANUP_V2]]:
+// CIR_FLAT:    %exception_ptr_0, %type_id_1 = cir.eh.inflight_exception
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.br ^[[CATCH_BEGIN]](%exception_ptr_0 : !cir.ptr<!void>)
+// CIR_FLAT:  ^[[CLEANUP_V3]]:
+// CIR_FLAT:    %exception_ptr_2, %type_id_3 = cir.eh.inflight_exception
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.br ^[[CATCH_BEGIN]](%exception_ptr_2 : !cir.ptr<!void>)
+// CIR_FLAT:  ^[[CLEANUP_V4]]:
+// CIR_FLAT:    %exception_ptr_4, %type_id_5 = cir.eh.inflight_exception
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.br ^[[CATCH_BEGIN]](%exception_ptr_4 : !cir.ptr<!void>)
+// CIR_FLAT:  ^[[CATCH_BEGIN]]({{.*}}
+// CIR_FLAT:    cir.catch_param begin
+// CIR_FLAT:    cir.br ^[[AFTER_TRY]]
+// CIR_FLAT:  ^[[AFTER_TRY]]:
+// CIR_FLAT:    cir.return
+
+// LLVM-LABEL: @_Z3yo3b
+// LLVM:   %[[V1:.*]] = alloca %struct.Vec
+// LLVM:   %[[V2:.*]] = alloca %struct.Vec
+// LLVM:   %[[V3:.*]] = alloca %struct.Vec
+// LLVM:   %[[V4:.*]] = alloca %struct.Vec
+// LLVM:   br label %[[CALL0:.*]],
+// LLVM: [[CALL0]]:
+// LLVM:   invoke void @_ZN3VecC1Ev(ptr %[[V1]])
+// LLVM:           to label %[[CALL1:.*]] unwind label %[[LPAD0:.*]],
+// LLVM: [[CALL1]]:
+// LLVM:   invoke void @_ZN3VecC1Ev(ptr %[[V2]])
+// LLVM:           to label %[[CALL2:.*]] unwind label %[[LPAD1:.*]],
+// LLVM: [[CALL2]]:
+// LLVM:   invoke void @_ZN3VecC1Ev(ptr %[[V3]])
+// LLVM:           to label %[[CALL3:.*]] unwind label %[[LPAD2:.*]],
+// LLVM: [[CALL3]]:
+// LLVM:   invoke void @_ZN3VecC1Ev(ptr %[[V4]])
+// LLVM:           to label %[[REGULAR_CLEANUP:.*]] unwind label %[[LPAD3:.*]],
+// LLVM: [[REGULAR_CLEANUP]]:
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V4]]),
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V3]]),
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V2]]),
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V1]]),
+// LLVM:   br label %[[RET:.*]],
+// LLVM: [[LPAD0]]:
+// LLVM:   landingpad { ptr, i32 }
+// LLVM:           catch ptr null,
+// LLVM:   br label %[[CATCH:.*]],
+// LLVM: [[LPAD1]]:
+// LLVM:   landingpad { ptr, i32 }
+// LLVM:           catch ptr null,
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V1]]),
+// LLVM:   br label %[[CATCH]],
+// LLVM: [[LPAD2]]:
+// LLVM:   landingpad { ptr, i32 }
+// LLVM:           catch ptr null,
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V2]]),
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V1]]),
+// LLVM:   br label %[[CATCH]],
+// LLVM: [[LPAD3]]:
+// LLVM:   landingpad { ptr, i32 }
+// LLVM:           catch ptr null,
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V3]]),
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V2]]),
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V1]]),
+// LLVM:   br label %[[CATCH]],
+// LLVM: [[CATCH]]:
+// LLVM:   call ptr @__cxa_begin_catch
+// LLVM:   br label %[[RET]],
+// LLVM: [[RET]]:
+// LLVM:   ret void
+
+void yo2(bool x) {
+  int r = 1;
+  try {
+    Vec v1, v2;
+    try {
+        Vec v3, v4;
+    } catch (...) {
+    r++;
+    }
+  } catch (...) {
+    r++;
+  }
+}
+
+// CIR: cir.scope {
+// CIR:   %[[V1:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v1"
+// CIR:   %[[V2:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v2"
+// CIR:   cir.try {
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> () cleanup {
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.yield
+// CIR:     }
+// CIR:     cir.scope {
+// CIR:       %[[V3:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v3"
+// CIR:       %[[V4:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v4"
+// CIR:       cir.try {
+// CIR:         cir.call exception @_ZN3VecC1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:         cir.call exception @_ZN3VecC1Ev(%[[V4]]) : (!cir.ptr<![[VecTy]]>) -> () cleanup {
+// CIR:           cir.call @_ZN3VecD1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:           cir.yield
+// CIR:         }
+// CIR:         cir.call @_ZN3VecD1Ev(%[[V4]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:         cir.call @_ZN3VecD1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:         cir.yield
+// CIR:       } catch [type #cir.all {
+// CIR:         cir.catch_param -> !cir.ptr<!void>
+// CIR:       }]
+// CIR:     }
+// CIR:     cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.yield
+// CIR:   } catch [type #cir.all {
+// CIR:     cir.catch_param -> !cir.ptr<!void>
+// CIR:   }]
diff --git a/clang/test/CIR/CodeGen/try-catch.cpp b/clang/test/CIR/CodeGen/try-catch.cpp
new file mode 100644
index 000000000000..8945bc33b739
--- /dev/null
+++ b/clang/test/CIR/CodeGen/try-catch.cpp
@@ -0,0 +1,107 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+double division(int a, int b);
+
+// CHECK: cir.func @_Z2tcv()
+unsigned long long tc() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  try {
+    // CHECK: cir.scope {
+    // CHECK: %[[local_a:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+    // CHECK: %[[msg:.*]] = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["msg"]
+    // CHECK: %[[idx:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["idx"]
+
+    // CHECK: cir.try {
+    int a = 4;
+    z = division(x, y);
+    // CHECK: %[[div_res:.*]] = cir.call exception @_Z8divisionii({{.*}}) : (!s32i, !s32i) -> !cir.double
+    a++;
+
+  } catch (int idx) {
+    // CHECK: } catch [type #cir.global_view<@_ZTIi> : !cir.ptr<!u8i> {
+    // CHECK:   %[[catch_idx_addr:.*]] = cir.catch_param -> !cir.ptr<!s32i>
+    // CHECK:   %[[idx_load:.*]] = cir.load %[[catch_idx_addr]] : !cir.ptr<!s32i>, !s32i
+    // CHECK:   cir.store %[[idx_load]], %[[idx]] : !s32i, !cir.ptr<!s32i>
+    z = 98;
+    idx++;
+  } catch (const char* msg) {
+    // CHECK: }, type #cir.global_view<@_ZTIPKc> : !cir.ptr<!u8i> {
+    // CHECK:   %[[msg_addr:.*]] = cir.catch_param -> !cir.ptr<!s8i>
+    // CHECK:   cir.store %[[msg_addr]], %[[msg]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+    z = 99;
+    (void)msg[0];
+  } // CHECK: }, #cir.unwind {
+    // CHECK: cir.resume
+    // CHECK-NEXT: }
+
+  return z;
+}
+
+// CHECK: cir.func @_Z3tc2v
+unsigned long long tc2() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  try {
+    int a = 4;
+    z = division(x, y);
+    a++;
+  } catch (int idx) {
+    z = 98;
+    idx++;
+  } catch (const char* msg) {
+    z = 99;
+    (void)msg[0];
+  } catch (...) {
+    // CHECK: }, type #cir.all {
+    // CHECK:   cir.catch_param
+    // CHECK:   cir.const #cir.int<100> : !s32i
+    z = 100;
+  }
+
+  return z;
+}
+
+// CHECK: cir.func @_Z3tc3v
+unsigned long long tc3() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  try {
+    z = division(x, y);
+  } catch (...) {
+    // CHECK: } catch [type #cir.all {
+    // CHECK:   cir.catch_param
+    // CHECK:   cir.const #cir.int<100> : !s32i
+    z = 100;
+  }
+
+  return z;
+}
+
+// CIR: cir.func @_Z3tc4v()
+unsigned long long tc4() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  // CIR-NOT: cir.try
+  try {
+    int a = 4;
+    a++;
+
+    // CIR: cir.scope {
+    // CIR: cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+    // CIR-NOT: cir.alloca !cir.ptr<!cir.eh.info>
+    // CIR: cir.const #cir.int<4> : !s32i
+    // CIR: cir.unary(inc,
+    // CIR: cir.store %11, %8 : !s32i, !cir.ptr<!s32i>
+  } catch (int idx) {
+    z = 98;
+    idx++;
+  }
+
+  return z;
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/typedef.c b/clang/test/CIR/CodeGen/typedef.c
new file mode 100644
index 000000000000..2f34ff824e1c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/typedef.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+void local_typedef() {
+  typedef struct {int a;} Struct;
+  Struct s;
+}
+
+//CHECK:  cir.func no_proto @local_typedef()
+//CHECK:    {{.*}} = cir.alloca !ty_Struct, !cir.ptr<!ty_Struct>, ["s"] {alignment = 4 : i64}
+//CHECK:    cir.return
diff --git a/clang/test/CIR/CodeGen/types-nullptr.cpp b/clang/test/CIR/CodeGen/types-nullptr.cpp
new file mode 100644
index 000000000000..00250f438940
--- /dev/null
+++ b/clang/test/CIR/CodeGen/types-nullptr.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef decltype(nullptr) nullptr_t;
+void f() { nullptr_t t = nullptr; }
+
+// CHECK: %0 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CHECK: %1 = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CHECK: cir.store %1, %0 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
diff --git a/clang/test/CIR/CodeGen/types.c b/clang/test/CIR/CodeGen/types.c
new file mode 100644
index 000000000000..18db058b67e5
--- /dev/null
+++ b/clang/test/CIR/CodeGen/types.c
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cpp.cir
+// RUN: FileCheck --input-file=%t.cpp.cir --check-prefix=CHECK-CPP %s
+
+int t0(int i) { return i; }
+unsigned int t1(unsigned int i) { return i; }
+
+char t2(char i) { return i; }
+unsigned char t3(unsigned char i) { return i; }
+
+short t4(short i) { return i; }
+unsigned short t5(unsigned short i) { return i; }
+
+float t6(float i) { return i; }
+double t7(double i) { return i; }
+long double t10(long double i) { return i; }
+
+void t8(void) {}
+
+#ifdef __cplusplus
+bool t9(bool b) { return b; }
+#endif
+
+// CHECK: cir.func @t0(%arg0: !s32i loc({{.*}})) -> !s32i
+// CHECK: cir.func @t1(%arg0: !u32i loc({{.*}})) -> !u32i
+// CHECK: cir.func @t2(%arg0: !s8i loc({{.*}})) -> !s8i
+// CHECK: cir.func @t3(%arg0: !u8i loc({{.*}})) -> !u8i
+// CHECK: cir.func @t4(%arg0: !s16i loc({{.*}})) -> !s16i
+// CHECK: cir.func @t5(%arg0: !u16i loc({{.*}})) -> !u16i
+// CHECK: cir.func @t6(%arg0: !cir.float loc({{.*}})) -> !cir.float
+// CHECK: cir.func @t7(%arg0: !cir.double loc({{.*}})) -> !cir.double
+// CHECK: cir.func @t10(%arg0: !cir.long_double<!cir.f80> loc({{.*}})) -> !cir.long_double<!cir.f80>
+// CHECK: cir.func @t8()
+
+// CHECK-CPP: cir.func @_Z2t0i(%arg0: !s32i loc({{.*}})) -> !s32i
+// CHECK-CPP: cir.func @_Z2t1j(%arg0: !u32i loc({{.*}})) -> !u32i
+// CHECK-CPP: cir.func @_Z2t2c(%arg0: !s8i loc({{.*}})) -> !s8i
+// CHECK-CPP: cir.func @_Z2t3h(%arg0: !u8i loc({{.*}})) -> !u8i
+// CHECK-CPP: cir.func @_Z2t4s(%arg0: !s16i loc({{.*}})) -> !s16i
+// CHECK-CPP: cir.func @_Z2t5t(%arg0: !u16i loc({{.*}})) -> !u16i
+// CHECK-CPP: cir.func @_Z2t6f(%arg0: !cir.float loc({{.*}})) -> !cir.float
+// CHECK-CPP: cir.func @_Z2t7d(%arg0: !cir.double loc({{.*}})) -> !cir.double
+// CHECK-CPP: cir.func @{{.+}}t10{{.+}}(%arg0: !cir.long_double<!cir.f80> loc({{.*}})) -> !cir.long_double<!cir.f80>
+// CHECK-CPP: cir.func @_Z2t8v()
+// CHECK-CPP: cir.func @_Z2t9b(%arg0: !cir.bool loc({{.*}})) -> !cir.bool
diff --git a/clang/test/CIR/CodeGen/unary-deref.cpp b/clang/test/CIR/CodeGen/unary-deref.cpp
new file mode 100644
index 000000000000..b5ceb4cceb7f
--- /dev/null
+++ b/clang/test/CIR/CodeGen/unary-deref.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+struct MyIntPointer {
+  int *ptr = nullptr;
+  int read() const { return *ptr; }
+};
+
+void foo() {
+  MyIntPointer p;
+  (void)p.read();
+}
+
+// CHECK:  cir.func linkonce_odr  @_ZNK12MyIntPointer4readEv
+// CHECK:  %2 = cir.load %0
+// CHECK:  %3 = cir.get_member %2[0] {name = "ptr"}
+// CHECK:  %4 = cir.load deref %3 : !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:  %5 = cir.load %4
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/unary.c b/clang/test/CIR/CodeGen/unary.c
new file mode 100644
index 000000000000..63f355c09f45
--- /dev/null
+++ b/clang/test/CIR/CodeGen/unary.c
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int valueNegationInt(int i) {
+// CHECK: cir.func @valueNegationInt(
+  return !i;
+  // CHECK: %[[#INT:]] = cir.load %{{[0-9]+}} : !cir.ptr<!s32i>, !s32i
+  // CHECK: %[[#INT_TO_BOOL:]] = cir.cast(int_to_bool, %[[#INT]] : !s32i), !cir.bool
+  // CHECK: = cir.unary(not, %[[#INT_TO_BOOL]]) : !cir.bool, !cir.bool
+}
+
+short valueNegationShort(short s) {
+// CHECK: cir.func @valueNegationShort(
+  return !s;
+  // CHECK: %[[#SHORT:]] = cir.load %{{[0-9]+}} : !cir.ptr<!s16i>, !s16i
+  // CHECK: %[[#SHORT_TO_BOOL:]] = cir.cast(int_to_bool, %[[#SHORT]] : !s16i), !cir.bool
+  // CHECK: = cir.unary(not, %[[#SHORT_TO_BOOL]]) : !cir.bool, !cir.bool
+}
+
+long valueNegationLong(long l) {
+// CHECK: cir.func @valueNegationLong(
+  return !l;
+  // CHECK: %[[#LONG:]] = cir.load %{{[0-9]+}} : !cir.ptr<!s64i>, !s64i
+  // CHECK: %[[#LONG_TO_BOOL:]] = cir.cast(int_to_bool, %[[#LONG]] : !s64i), !cir.bool
+  // CHECK: = cir.unary(not, %[[#LONG_TO_BOOL]]) : !cir.bool, !cir.bool
+}
+
+float valueNegationFloat(float f) {
+// CHECK: cir.func @valueNegationFloat(
+  return !f;
+  // CHECK: %[[#FLOAT:]] = cir.load %{{[0-9]+}} : !cir.ptr<!cir.float>, !cir.float
+  // CHECK: %[[#FLOAT_TO_BOOL:]] = cir.cast(float_to_bool, %[[#FLOAT]] : !cir.float), !cir.bool
+  // CHECK: %[[#FLOAT_NOT:]] = cir.unary(not, %[[#FLOAT_TO_BOOL]]) : !cir.bool, !cir.bool
+  // CHECK: = cir.cast(bool_to_int, %[[#FLOAT_NOT]] : !cir.bool), !s32i
+}
+
+double valueNegationDouble(double d) {
+// CHECK: cir.func @valueNegationDouble(
+  return !d;
+  // CHECK: %[[#DOUBLE:]] = cir.load %{{[0-9]+}} : !cir.ptr<!cir.double>, !cir.double
+  // CHECK: %[[#DOUBLE_TO_BOOL:]] = cir.cast(float_to_bool, %[[#DOUBLE]] : !cir.double), !cir.bool
+  // CHECK: %[[#DOUBLE_NOT:]] = cir.unary(not, %[[#DOUBLE_TO_BOOL]]) : !cir.bool, !cir.bool
+  // CHECK: = cir.cast(bool_to_int, %[[#DOUBLE_NOT]] : !cir.bool), !s32i
+}
diff --git a/clang/test/CIR/CodeGen/unary.cpp b/clang/test/CIR/CodeGen/unary.cpp
new file mode 100644
index 000000000000..986e9b2dcedc
--- /dev/null
+++ b/clang/test/CIR/CodeGen/unary.cpp
@@ -0,0 +1,232 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+unsigned up0() {
+  unsigned a = 1;
+  return +a;
+}
+
+// CHECK: cir.func @_Z3up0v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#INPUT:]] = cir.load %[[#A]]
+// CHECK: %[[#OUTPUT:]] = cir.unary(plus, %[[#INPUT]])
+// CHECK: cir.store %[[#OUTPUT]], %[[#RET]]
+
+unsigned um0() {
+  unsigned a = 1;
+  return -a;
+}
+
+// CHECK: cir.func @_Z3um0v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#INPUT:]] = cir.load %[[#A]]
+// CHECK: %[[#OUTPUT:]] = cir.unary(minus, %[[#INPUT]])
+// CHECK: cir.store %[[#OUTPUT]], %[[#RET]]
+
+unsigned un0() {
+  unsigned a = 1;
+  return ~a; // a ^ -1 , not
+}
+
+// CHECK: cir.func @_Z3un0v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#INPUT:]] = cir.load %[[#A]]
+// CHECK: %[[#OUTPUT:]] = cir.unary(not, %[[#INPUT]])
+// CHECK: cir.store %[[#OUTPUT]], %[[#RET]]
+
+int inc0() {
+  int a = 1;
+  ++a;
+  return a;
+}
+
+// CHECK: cir.func @_Z4inc0v() -> !s32i
+// CHECK: %[[#RET:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK: %[[#ATMP:]] = cir.const #cir.int<1> : !s32i
+// CHECK: cir.store %[[#ATMP]], %[[#A]] : !s32i
+// CHECK: %[[#INPUT:]] = cir.load %[[#A]]
+// CHECK: %[[#INCREMENTED:]] = cir.unary(inc, %[[#INPUT]])
+// CHECK: cir.store %[[#INCREMENTED]], %[[#A]]
+// CHECK: %[[#A_TO_OUTPUT:]] = cir.load %[[#A]]
+// CHECK: cir.store %[[#A_TO_OUTPUT]], %[[#RET]]
+// CHECK: %[[#OUTPUT:]] = cir.load %[[#RET]]
+// CHECK: cir.return %[[#OUTPUT]] : !s32i
+
+int dec0() {
+  int a = 1;
+  --a;
+  return a;
+}
+
+// CHECK: cir.func @_Z4dec0v() -> !s32i
+// CHECK: %[[#RET:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK: %[[#ATMP:]] = cir.const #cir.int<1> : !s32i
+// CHECK: cir.store %[[#ATMP]], %[[#A]] : !s32i
+// CHECK: %[[#INPUT:]] = cir.load %[[#A]]
+// CHECK: %[[#INCREMENTED:]] = cir.unary(dec, %[[#INPUT]])
+// CHECK: cir.store %[[#INCREMENTED]], %[[#A]]
+// CHECK: %[[#A_TO_OUTPUT:]] = cir.load %[[#A]]
+// CHECK: cir.store %[[#A_TO_OUTPUT]], %[[#RET]]
+// CHECK: %[[#OUTPUT:]] = cir.load %[[#RET]]
+// CHECK: cir.return %[[#OUTPUT]] : !s32i
+
+
+int inc1() {
+  int a = 1;
+  a++;
+  return a;
+}
+
+// CHECK: cir.func @_Z4inc1v() -> !s32i
+// CHECK: %[[#RET:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK: %[[#ATMP:]] = cir.const #cir.int<1> : !s32i
+// CHECK: cir.store %[[#ATMP]], %[[#A]] : !s32i
+// CHECK: %[[#INPUT:]] = cir.load %[[#A]]
+// CHECK: %[[#INCREMENTED:]] = cir.unary(inc, %[[#INPUT]])
+// CHECK: cir.store %[[#INCREMENTED]], %[[#A]]
+// CHECK: %[[#A_TO_OUTPUT:]] = cir.load %[[#A]]
+// CHECK: cir.store %[[#A_TO_OUTPUT]], %[[#RET]]
+// CHECK: %[[#OUTPUT:]] = cir.load %[[#RET]]
+// CHECK: cir.return %[[#OUTPUT]] : !s32i
+
+int dec1() {
+  int a = 1;
+  a--;
+  return a;
+}
+
+// CHECK: cir.func @_Z4dec1v() -> !s32i
+// CHECK: %[[#RET:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK: %[[#ATMP:]] = cir.const #cir.int<1> : !s32i
+// CHECK: cir.store %[[#ATMP]], %[[#A]] : !s32i
+// CHECK: %[[#INPUT:]] = cir.load %[[#A]]
+// CHECK: %[[#INCREMENTED:]] = cir.unary(dec, %[[#INPUT]])
+// CHECK: cir.store %[[#INCREMENTED]], %[[#A]]
+// CHECK: %[[#A_TO_OUTPUT:]] = cir.load %[[#A]]
+// CHECK: cir.store %[[#A_TO_OUTPUT]], %[[#RET]]
+// CHECK: %[[#OUTPUT:]] = cir.load %[[#RET]]
+// CHECK: cir.return %[[#OUTPUT]] : !s32i
+
+// Ensure the increment is performed after the assignment to b.
+int inc2() {
+  int a = 1;
+  int b = a++;
+  return b;
+}
+
+// CHECK: cir.func @_Z4inc2v() -> !s32i
+// CHECK: %[[#RET:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK: %[[#B:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CHECK: %[[#ATMP:]] = cir.const #cir.int<1> : !s32i
+// CHECK: cir.store %[[#ATMP]], %[[#A]] : !s32i
+// CHECK: %[[#ATOB:]] = cir.load %[[#A]]
+// CHECK: %[[#INCREMENTED:]] = cir.unary(inc, %[[#ATOB]])
+// CHECK: cir.store %[[#INCREMENTED]], %[[#A]]
+// CHECK: cir.store %[[#ATOB]], %[[#B]]
+// CHECK: %[[#B_TO_OUTPUT:]] = cir.load %[[#B]]
+// CHECK: cir.store %[[#B_TO_OUTPUT]], %[[#RET]]
+// CHECK: %[[#OUTPUT:]] = cir.load %[[#RET]]
+// CHECK: cir.return %[[#OUTPUT]] : !s32i
+
+int *inc_p(int *i) {
+  --i;
+  ++i;
+  return i;
+}
+
+// CHECK: cir.func @_Z5inc_pPi(%arg0: !cir.ptr<!s32i>
+
+// CHECK:   %[[#i_addr:]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["i", init] {alignment = 8 : i64}
+// CHECK:   %[[#i_dec:]] = cir.load %[[#i_addr]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:   %[[#dec_const:]] = cir.const #cir.int<-1> : !s32i
+// CHECK:   = cir.ptr_stride(%[[#i_dec]] : !cir.ptr<!s32i>, %[[#dec_const]] : !s32i), !cir.ptr<!s32i>
+
+// CHECK:   %[[#i_inc:]] = cir.load %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:   %[[#inc_const:]] = cir.const #cir.int<1> : !s32i
+// CHECK:   = cir.ptr_stride(%[[#i_inc]] : !cir.ptr<!s32i>, %[[#inc_const]] : !s32i), !cir.ptr<!s32i>
+
+void floats(float f) {
+// CHECK: cir.func @{{.+}}floats{{.+}}
+  f = +f; // CHECK: %{{[0-9]+}} = cir.unary(plus, %{{[0-9]+}}) : !cir.float, !cir.float
+  f = -f; // CHECK: %{{[0-9]+}} = cir.unary(minus, %{{[0-9]+}}) : !cir.float, !cir.float
+  ++f; // CHECK: = cir.unary(inc, %{{[0-9]+}}) : !cir.float, !cir.float
+  --f; // CHECK: = cir.unary(dec, %{{[0-9]+}}) : !cir.float, !cir.float
+  f++; // CHECK: = cir.unary(inc, %{{[0-9]+}}) : !cir.float, !cir.float
+  f--; // CHECK: = cir.unary(dec, %{{[0-9]+}}) : !cir.float, !cir.float
+
+  f = !f;
+  // CHECK: %[[#F_BOOL:]] = cir.cast(float_to_bool, %{{[0-9]+}} : !cir.float), !cir.bool
+  // CHECK: = cir.unary(not, %[[#F_BOOL]]) : !cir.bool, !cir.bool
+}
+
+void doubles(double d) {
+// CHECK: cir.func @{{.+}}doubles{{.+}}
+  d = +d; // CHECK: %{{[0-9]+}} = cir.unary(plus, %{{[0-9]+}}) : !cir.double, !cir.double
+  d = -d; // CHECK: %{{[0-9]+}} = cir.unary(minus, %{{[0-9]+}}) : !cir.double, !cir.double
+  ++d; // CHECK: = cir.unary(inc, %{{[0-9]+}}) : !cir.double, !cir.double
+  --d; // CHECK: = cir.unary(dec, %{{[0-9]+}}) : !cir.double, !cir.double
+  d++; // CHECK: = cir.unary(inc, %{{[0-9]+}}) : !cir.double, !cir.double
+  d--; // CHECK: = cir.unary(dec, %{{[0-9]+}}) : !cir.double, !cir.double
+
+  d = !d;
+  // CHECK: %[[#D_BOOL:]] = cir.cast(float_to_bool, %{{[0-9]+}} : !cir.double), !cir.bool
+  // CHECK: = cir.unary(not, %[[#D_BOOL]]) : !cir.bool, !cir.bool
+}
+
+void pointers(int *p) {
+// CHECK: cir.func @{{[^ ]+}}pointers
+  // CHECK: %[[#P:]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+
+  p = +p;
+  // CHECK: cir.unary(plus, %{{.+}}) : !cir.ptr<!s32i>, !cir.ptr<!s32i>
+
+  ++p;
+  // CHECK:  %[[#INC:]] = cir.const #cir.int<1> : !s32i
+  // CHECK:  %[[#RES:]] = cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#INC]] : !s32i), !cir.ptr<!s32i>
+  // CHECK:  cir.store %[[#RES]], %[[#P]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  --p;
+  // CHECK:  %[[#DEC:]] = cir.const #cir.int<-1> : !s32i
+  // CHECK:  %[[#RES:]] = cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#DEC]] : !s32i), !cir.ptr<!s32i>
+  // CHECK:  cir.store %[[#RES]], %[[#P]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  p++;
+  // CHECK:  %[[#INC:]] = cir.const #cir.int<1> : !s32i
+  // CHECK:  %[[#RES:]] = cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#INC]] : !s32i), !cir.ptr<!s32i>
+  // CHECK:  cir.store %[[#RES]], %[[#P]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  p--;
+  // CHECK:  %[[#DEC:]] = cir.const #cir.int<-1> : !s32i
+  // CHECK:  %[[#RES:]] = cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#DEC]] : !s32i), !cir.ptr<!s32i>
+  // CHECK:  cir.store %[[#RES]], %[[#P]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+
+  bool p1 = !p;
+  // %[[BOOLPTR:]] = cir.cast(ptr_to_bool, %15 : !cir.ptr<!s32i>), !cir.bool
+  // cir.unary(not, %[[BOOLPTR]]) : !cir.bool, !cir.bool
+}
+
+void chars(char c) {
+// CHECK: cir.func @{{.+}}chars{{.+}}
+
+  int c1 = +c;
+  // CHECK: %[[#PROMO:]] = cir.cast(integral, %{{.+}} : !s8i), !s32i
+  // CHECK: cir.unary(plus, %[[#PROMO]]) : !s32i, !s32i
+  int c2 = -c;
+  // CHECK: %[[#PROMO:]] = cir.cast(integral, %{{.+}} : !s8i), !s32i
+  // CHECK: cir.unary(minus, %[[#PROMO]]) : !s32i, !s32i
+
+  // Chars can go through some integer promotion codegen paths even when not promoted.
+  ++c; // CHECK: cir.unary(inc, %10) : !s8i, !s8i
+  --c; // CHECK: cir.unary(dec, %12) : !s8i, !s8i
+  c++; // CHECK: cir.unary(inc, %14) : !s8i, !s8i
+  c--; // CHECK: cir.unary(dec, %16) : !s8i, !s8i
+
+  bool c3 = !c;
+  // CHECK: %[[#C_BOOL:]] = cir.cast(int_to_bool, %{{[0-9]+}} : !s8i), !cir.bool
+  // CHECK: cir.unary(not, %[[#C_BOOL]]) : !cir.bool, !cir.bool
+}
diff --git a/clang/test/CIR/CodeGen/union-init.c b/clang/test/CIR/CodeGen/union-init.c
new file mode 100644
index 000000000000..e9fa2ea1deb6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/union-init.c
@@ -0,0 +1,63 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+typedef union {
+  int value;
+  struct {
+    int x : 16;
+    int y : 16;
+  };
+} A;
+
+void foo(int x) {
+  A a = {.x = x};
+}
+
+// CHECK-DAG: ![[anon0:.*]] = !cir.struct<struct  {!cir.int<u, 32>}>
+// CHECK-DAG: ![[anon:.*]] = !cir.struct<struct  {!cir.int<s, 32>}>
+// CHECK-DAG: #[[bfi_x:.*]] = #cir.bitfield_info<name = "x", storage_type = !u32i, size = 16, offset = 0, is_signed = true>
+// CHECK-DAG: #[[bfi_y:.*]] = #cir.bitfield_info<name = "y", storage_type = !u32i, size = 16, offset = 16, is_signed = true>
+// CHECK-DAG: ![[anon1:.*]] = !cir.struct<union "{{.*}}" {!cir.int<u, 32>, !cir.array<!cir.int<u, 8> x 4>}
+
+// CHECK-LABEL:   cir.func @foo(
+// CHECK:  %[[VAL_1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK:  %[[VAL_2:.*]] = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["a", init] {alignment = 4 : i64}
+// CHECK:  cir.store {{.*}}, %[[VAL_1]] : !s32i, !cir.ptr<!s32i>
+// CHECK:  %[[VAL_3:.*]] = cir.get_member %[[VAL_2]][1] {name = ""} : !cir.ptr<!ty_A> -> !cir.ptr<!ty_anon2E0_>
+// CHECK:  %[[VAL_4:.*]] = cir.cast(bitcast, %[[VAL_3]] : !cir.ptr<!ty_anon2E0_>), !cir.ptr<!u32i>
+// CHECK:  %[[VAL_5:.*]] = cir.load %[[VAL_1]] : !cir.ptr<!s32i>, !s32i
+// CHECK:  %[[VAL_6:.*]] = cir.set_bitfield(#[[bfi_x]], %[[VAL_4]] : !cir.ptr<!u32i>, %[[VAL_5]] : !s32i) -> !s32i
+// CHECK:  %[[VAL_7:.*]] = cir.cast(bitcast, %[[VAL_3]] : !cir.ptr<!ty_anon2E0_>), !cir.ptr<!u32i>
+// CHECK:  %[[VAL_8:.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:  %[[VAL_9:.*]] = cir.set_bitfield(#[[bfi_y]], %[[VAL_7]] : !cir.ptr<!u32i>, %[[VAL_8]] : !s32i) -> !s32i
+// CHECK:  cir.return
+
+union { int i; float f; } u = { };
+// CHECK: cir.global external @u = #cir.zero : ![[anon]]
+
+unsigned is_little(void) {
+  const union {
+    unsigned int u;
+    unsigned char c[4];
+  } one = {1};
+  return one.c[0];
+}
+
+// CHECK: cir.func @is_little
+// CHECK: %[[VAL_1:.*]] = cir.get_global @is_little.one : !cir.ptr<![[anon0]]>
+// CHECK: %[[VAL_2:.*]] = cir.cast(bitcast, %[[VAL_1]] : !cir.ptr<![[anon0]]>), !cir.ptr<![[anon1]]>
+// CHECK: %[[VAL_3:.*]] = cir.get_member %[[VAL_2]][1] {name = "c"} : !cir.ptr<![[anon1]]> -> !cir.ptr<!cir.array<!u8i x 4>>
+
+typedef union {
+  int x;
+} U;
+
+// CHECK: %[[VAL_0:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK: %[[VAL_1:.*]] = cir.alloca !ty_U, !cir.ptr<!ty_U>, ["u", init] {alignment = 4 : i64}
+// CHECK: cir.store %arg0, %[[VAL_0]] : !s32i, !cir.ptr<!s32i>
+// CHECK: %[[VAL_2:.*]] = cir.load %[[VAL_0]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[VAL_3:.*]] = cir.cast(bitcast, %[[VAL_1]] : !cir.ptr<!ty_U>), !cir.ptr<!s32i>
+// CHECK: cir.store %[[VAL_2]], %[[VAL_3]] : !s32i, !cir.ptr<!s32i>
+
+void union_cast(int x) {
+  U u = (U) x;
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/union.cpp b/clang/test/CIR/CodeGen/union.cpp
new file mode 100644
index 000000000000..90b27a6bd9c7
--- /dev/null
+++ b/clang/test/CIR/CodeGen/union.cpp
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef struct { int x; } yolo;
+typedef union { yolo y; struct { int lifecnt; }; } yolm;
+typedef union { yolo y; struct { int *lifecnt; int genpad; }; } yolm2;
+typedef union { yolo y; struct { bool life; int genpad; }; } yolm3;
+
+// CHECK-DAG: !ty_U23A3ADummy = !cir.struct<struct "U2::Dummy" {!cir.int<s, 16>, !cir.float} #cir.record.decl.ast>
+// CHECK-DAG: !ty_anon2E0_ = !cir.struct<struct "anon.0" {!cir.int<s, 32>} #cir.record.decl.ast>
+// CHECK-DAG: !ty_anon2E2_ = !cir.struct<struct "anon.2" {!cir.bool, !cir.int<s, 32>} #cir.record.decl.ast>
+// CHECK-DAG: !ty_yolo = !cir.struct<struct "yolo" {!cir.int<s, 32>} #cir.record.decl.ast>
+// CHECK-DAG: !ty_anon2E1_ = !cir.struct<struct "anon.1" {!cir.ptr<!cir.int<s, 32>>, !cir.int<s, 32>} #cir.record.decl.ast>
+
+// CHECK-DAG: !ty_yolm = !cir.struct<union "yolm" {!cir.struct<struct "yolo" {!cir.int<s, 32>} #cir.record.decl.ast>, !cir.struct<struct "anon.0" {!cir.int<s, 32>} #cir.record.decl.ast>}>
+// CHECK-DAG: !ty_yolm3_ = !cir.struct<union "yolm3" {!cir.struct<struct "yolo" {!cir.int<s, 32>} #cir.record.decl.ast>, !cir.struct<struct "anon.2" {!cir.bool, !cir.int<s, 32>} #cir.record.decl.ast>}>
+// CHECK-DAG: !ty_yolm2_ = !cir.struct<union "yolm2" {!cir.struct<struct "yolo" {!cir.int<s, 32>} #cir.record.decl.ast>, !cir.struct<struct "anon.1" {!cir.ptr<!cir.int<s, 32>>, !cir.int<s, 32>} #cir.record.decl.ast>}>
+
+// Should generate a union type with all members preserved.
+union U {
+  bool b;
+  short s;
+  int i;
+  float f;
+  double d;
+};
+// CHECK-DAG: !ty_U = !cir.struct<union "U" {!cir.bool, !cir.int<s, 16>, !cir.int<s, 32>, !cir.float, !cir.double}>
+
+// Should generate unions with complex members.
+union U2 {
+  bool b;
+  struct Dummy {
+    short s;
+    float f;
+  } s;
+} u2;
+// CHECK-DAG: !cir.struct<union "U2" {!cir.bool, !cir.struct<struct "U2::Dummy" {!cir.int<s, 16>, !cir.float} #cir.record.decl.ast>} #cir.record.decl.ast>
+
+// Should genereate unions without padding.
+union U3 {
+  short b;
+  U u;
+} u3;
+// CHECK-DAG: !ty_U3_ = !cir.struct<union "U3" {!cir.int<s, 16>, !cir.struct<union "U" {!cir.bool, !cir.int<s, 16>, !cir.int<s, 32>, !cir.float, !cir.double}>} #cir.record.decl.ast>
+
+void m() {
+  yolm q;
+  yolm2 q2;
+  yolm3 q3;
+}
+
+// CHECK:   cir.func @_Z1mv()
+// CHECK:   cir.alloca !ty_yolm, !cir.ptr<!ty_yolm>, ["q"] {alignment = 4 : i64}
+// CHECK:   cir.alloca !ty_yolm2_, !cir.ptr<!ty_yolm2_>, ["q2"] {alignment = 8 : i64}
+// CHECK:   cir.alloca !ty_yolm3_, !cir.ptr<!ty_yolm3_>, ["q3"] {alignment = 4 : i64}
+
+void shouldGenerateUnionAccess(union U u) {
+  u.b = true;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[0] {name = "b"} : !cir.ptr<!ty_U> -> !cir.ptr<!cir.bool>
+  // CHECK: cir.store %{{.+}}, %[[#BASE]] : !cir.bool, !cir.ptr<!cir.bool>
+  u.b;
+  // CHECK: cir.get_member %0[0] {name = "b"} : !cir.ptr<!ty_U> -> !cir.ptr<!cir.bool>
+  u.i = 1;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[2] {name = "i"} : !cir.ptr<!ty_U> -> !cir.ptr<!s32i>
+  // CHECK: cir.store %{{.+}}, %[[#BASE]] : !s32i, !cir.ptr<!s32i>
+  u.i;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[2] {name = "i"} : !cir.ptr<!ty_U> -> !cir.ptr<!s32i>
+  u.f = 0.1F;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[3] {name = "f"} : !cir.ptr<!ty_U> -> !cir.ptr<!cir.float>
+  // CHECK: cir.store %{{.+}}, %[[#BASE]] : !cir.float, !cir.ptr<!cir.float>
+  u.f;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[3] {name = "f"} : !cir.ptr<!ty_U> -> !cir.ptr<!cir.float>
+  u.d = 0.1;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[4] {name = "d"} : !cir.ptr<!ty_U> -> !cir.ptr<!cir.double>
+  // CHECK: cir.store %{{.+}}, %[[#BASE]] : !cir.double, !cir.ptr<!cir.double>
+  u.d;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[4] {name = "d"} : !cir.ptr<!ty_U> -> !cir.ptr<!cir.double>
+}
+
+typedef union {
+  short a;
+  int b;
+} A;
+
+void noCrushOnDifferentSizes() {
+  A a = {0};
+  // CHECK:  %[[#TMP0:]] = cir.alloca !ty_A, !cir.ptr<!ty_A>, ["a"] {alignment = 4 : i64}
+  // CHECK:  %[[#TMP1:]] = cir.cast(bitcast, %[[#TMP0]] : !cir.ptr<!ty_A>), !cir.ptr<!ty_anon_struct>
+  // CHECK:  %[[#TMP2:]] = cir.const #cir.zero : !ty_anon_struct
+  // CHECK:  cir.store %[[#TMP2]], %[[#TMP1]] : !ty_anon_struct, !cir.ptr<!ty_anon_struct>
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/unreachable.cpp b/clang/test/CIR/CodeGen/unreachable.cpp
new file mode 100644
index 000000000000..c617fe8c6212
--- /dev/null
+++ b/clang/test/CIR/CodeGen/unreachable.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void foo();
+
+void basic() {
+  foo();
+  __builtin_unreachable();
+}
+
+//      CHECK: cir.func @_Z5basicv()
+// CHECK-NEXT:   cir.call @_Z3foov() : () -> ()
+// CHECK-NEXT:   cir.unreachable
+// CHECK-NEXT: }
+
+void code_after_unreachable() {
+  foo();
+  __builtin_unreachable();
+  foo();
+}
+
+// CHECK: cir.func @_Z22code_after_unreachablev()
+// CHECK:   cir.call @_Z3foov() : () -> ()
+// CHECK:   cir.unreachable
+// CHECK: ^{{.+}}:
+// CHECK:   cir.call @_Z3foov() : () -> ()
+// CHECK:   cir.return
+// CHECK: }
diff --git a/clang/test/CIR/CodeGen/var-arg-float.c b/clang/test/CIR/CodeGen/var-arg-float.c
new file mode 100644
index 000000000000..e1e2d413a528
--- /dev/null
+++ b/clang/test/CIR/CodeGen/var-arg-float.c
@@ -0,0 +1,117 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+#include <stdarg.h>
+
+double f1(int n, ...) {
+  va_list valist;
+  va_start(valist, n);
+  double res = va_arg(valist, double);
+  va_end(valist);
+  return res;
+}
+
+// BEFORE: !ty___va_list = !cir.struct<struct "__va_list" {!cir.ptr<!cir.void>, !cir.ptr<!cir.void>, !cir.ptr<!cir.void>, !cir.int<s, 32>, !cir.int<s, 32>}
+// BEFORE:  cir.func @f1(%arg0: !s32i, ...) -> !cir.double
+// BEFORE:  [[RETP:%.*]] = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["__retval"]
+// BEFORE:  [[RESP:%.*]] = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["res", init]
+// BEFORE:  cir.va.start [[VARLIST:%.*]] : !cir.ptr<!ty___va_list>
+// BEFORE:  [[TMP0:%.*]] = cir.va.arg [[VARLIST]] : (!cir.ptr<!ty___va_list>) -> !cir.double
+// BEFORE:  cir.store [[TMP0]], [[RESP]] : !cir.double, !cir.ptr<!cir.double>
+// BEFORE:  cir.va.end [[VARLIST]] : !cir.ptr<!ty___va_list>
+// BEFORE:  [[RES:%.*]] = cir.load [[RESP]] : !cir.ptr<!cir.double>, !cir.double
+// BEFORE:   cir.store [[RES]], [[RETP]] : !cir.double, !cir.ptr<!cir.double>
+// BEFORE:  [[RETV:%.*]] = cir.load [[RETP]] : !cir.ptr<!cir.double>, !cir.double
+// BEFORE:   cir.return [[RETV]] : !cir.double
+
+// beginning block cir code
+// AFTER: !ty___va_list = !cir.struct<struct "__va_list" {!cir.ptr<!cir.void>, !cir.ptr<!cir.void>, !cir.ptr<!cir.void>, !cir.int<s, 32>, !cir.int<s, 32>}
+// AFTER:  cir.func @f1(%arg0: !s32i, ...) -> !cir.double
+// AFTER:  [[RETP:%.*]] = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["__retval"]
+// AFTER:  [[RESP:%.*]] = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["res", init]
+// AFTER:  cir.va.start [[VARLIST:%.*]] : !cir.ptr<!ty___va_list>
+// AFTER:  [[VR_OFFS_P:%.*]] = cir.get_member [[VARLIST]][4] {name = "vr_offs"} : !cir.ptr<!ty___va_list> -> !cir.ptr<!s32i>
+// AFTER:  [[VR_OFFS:%.*]] = cir.load [[VR_OFFS_P]] : !cir.ptr<!s32i>, !s32i
+// AFTER:  [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// AFTER:  [[CMP0:%.*]] = cir.cmp(ge, [[VR_OFFS]], [[ZERO]]) : !s32i, !cir.bool
+// AFTER-NEXT:  cir.brcond [[CMP0]] [[BB_ON_STACK:\^bb.*]], [[BB_MAY_REG:\^bb.*]]
+
+// AFTER: [[BB_MAY_REG]]:
+// AFTER-NEXT: [[SIXTEEN:%.*]] = cir.const #cir.int<16> : !s32i
+// AFTER-NEXT: [[NEW_REG_OFFS:%.*]] = cir.binop(add, [[VR_OFFS]], [[SIXTEEN]]) : !s32i
+// AFTER-NEXT: cir.store [[NEW_REG_OFFS]], [[VR_OFFS_P]] : !s32i, !cir.ptr<!s32i>
+// AFTER-NEXT: [[CMP1:%.*]] = cir.cmp(le, [[NEW_REG_OFFS]], [[ZERO]]) : !s32i, !cir.bool
+// AFTER-NEXT: cir.brcond [[CMP1]] [[BB_IN_REG:\^bb.*]], [[BB_ON_STACK]]
+
+
+// AFTER: [[BB_IN_REG]]:
+// AFTER-NEXT: [[VR_TOP_P:%.*]] = cir.get_member [[VARLIST]][2] {name = "vr_top"} : !cir.ptr<!ty___va_list> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: [[VR_TOP:%.*]] = cir.load [[VR_TOP_P]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: [[TMP2:%.*]] = cir.cast(bitcast, [[VR_TOP]] : !cir.ptr<!void>), !cir.ptr<i8>
+// AFTER-NEXT: [[TMP3:%.*]] = cir.ptr_stride([[TMP2]] : !cir.ptr<i8>, [[VR_OFFS]] : !s32i), !cir.ptr<i8>
+// AFTER-NEXT: [[IN_REG_OUTPUT:%.*]] = cir.cast(bitcast, [[TMP3]] : !cir.ptr<i8>), !cir.ptr<!void>
+// AFTER-NEXT: cir.br [[BB_END:\^bb.*]]([[IN_REG_OUTPUT]] : !cir.ptr<!void>)
+
+
+// AFTER: [[BB_ON_STACK]]:
+// AFTER-NEXT: [[STACK_P:%.*]] = cir.get_member [[VARLIST]][0] {name = "stack"} : !cir.ptr<!ty___va_list> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: [[STACK_V:%.*]] = cir.load [[STACK_P]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: [[EIGHT_IN_PTR_ARITH:%.*]]  = cir.const #cir.int<8> : !u64i
+// AFTER-NEXT: [[TMP4:%.*]] = cir.cast(bitcast, [[STACK_V]] : !cir.ptr<!void>), !cir.ptr<i8>
+// AFTER-NEXT: [[TMP5:%.*]] = cir.ptr_stride([[TMP4]] : !cir.ptr<i8>, [[EIGHT_IN_PTR_ARITH]] : !u64i), !cir.ptr<i8>
+// AFTER-NEXT: [[NEW_STACK_V:%.*]] = cir.cast(bitcast, [[TMP5]] : !cir.ptr<i8>), !cir.ptr<!void>
+// AFTER-NEXT: cir.store [[NEW_STACK_V]], [[STACK_P]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: cir.br [[BB_END]]([[STACK_V]] : !cir.ptr<!void>)
+
+// AFTER-NEXT: [[BB_END]]([[BLK_ARG:%.*]]: !cir.ptr<!void>):  // 2 preds: [[BB_IN_REG]], [[BB_ON_STACK]]
+// AFTER-NEXT:  [[TMP0:%.*]] = cir.cast(bitcast, [[BLK_ARG]] : !cir.ptr<!void>), !cir.ptr<!cir.double>
+// AFTER-NEXT:  [[TMP1:%.*]] = cir.load [[TMP0]] : !cir.ptr<!cir.double>, !cir.double
+// AFTER:   cir.store [[TMP1]], [[RESP]] : !cir.double, !cir.ptr<!cir.double>
+// AFTER:   cir.va.end [[VARLIST]] : !cir.ptr<!ty___va_list>
+// AFTER:   [[RES:%.*]] = cir.load [[RESP]] : !cir.ptr<!cir.double>, !cir.double
+// AFTER:   cir.store [[RES]], [[RETP]] : !cir.double, !cir.ptr<!cir.double>
+// AFTER:  [[RETV:%.*]] = cir.load [[RETP]] : !cir.ptr<!cir.double>, !cir.double
+// AFTER:   cir.return [[RETV]] : !cir.double
+
+// beginning block llvm code
+// LLVM: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+// LLVM: define dso_local double @f1(i32 %0, ...)
+// LLVM: [[ARGN:%.*]] = alloca i32, i64 1, align 4,
+// LLVM: [[RETP:%.*]] = alloca double, i64 1, align 8,
+// LLVM: [[RESP:%.*]] = alloca double, i64 1, align 8,
+// LLVM: call void @llvm.va_start.p0(ptr [[VARLIST:%.*]]),
+// LLVM: [[VR_OFFS_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 4
+// LLVM: [[VR_OFFS:%.*]] = load i32, ptr [[VR_OFFS_P]], align 4,
+// LLVM-NEXT: [[CMP0:%.*]] = icmp sge i32 [[VR_OFFS]], 0,
+// LLVM-NEXT: br i1 [[CMP0]], label %[[BB_ON_STACK:.*]], label %[[BB_MAY_REG:.*]],
+
+// LLVM:  [[BB_MAY_REG]]: ;
+// LLVM-NEXT: [[NEW_REG_OFFS:%.*]] = add i32 [[VR_OFFS]], 16,
+// LLVM-NEXT: store i32 [[NEW_REG_OFFS]], ptr [[VR_OFFS_P]], align 4,
+// LLVM-NEXT: [[CMP1:%.*]] = icmp sle i32 [[NEW_REG_OFFS]], 0,
+// LLVM-NEXT: br i1 [[CMP1]], label %[[BB_IN_REG:.*]], label %[[BB_ON_STACK]],
+
+// LLVM:  [[BB_IN_REG]]: ;
+// LLVM-NEXT: [[VR_TOP_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 2,
+// LLVM-NEXT: [[VR_TOP:%.*]] = load ptr, ptr [[VR_TOP_P]], align 8,
+// LLVM-NEXT: [[EXT64_VR_OFFS:%.*]] = sext i32 [[VR_OFFS]] to i64,
+// LLVM-NEXT: [[IN_REG_OUTPUT:%.*]] = getelementptr i8, ptr [[VR_TOP]], i64 [[EXT64_VR_OFFS]],
+// LLVM-NEXT: br label %[[BB_END:.*]],
+
+// LLVM:  [[BB_ON_STACK]]: ;
+// LLVM-NEXT: [[STACK_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 0,
+// LLVM-NEXT: [[STACK_V:%.*]] = load ptr, ptr [[STACK_P]], align 8,
+// LLVM-NEXT: [[NEW_STACK_V:%.*]] = getelementptr i8, ptr [[STACK_V]], i64 8,
+// LLVM-NEXT: store ptr [[NEW_STACK_V]], ptr [[STACK_P]], align 8,
+// LLVM-NEXT: br label %[[BB_END]],
+
+// LLVM: [[BB_END]]: ; preds = %[[BB_ON_STACK]], %[[BB_IN_REG]]
+// LLVM-NEXT: [[PHIP:%.*]] = phi ptr [ [[IN_REG_OUTPUT]], %[[BB_IN_REG]] ], [ [[STACK_V]], %[[BB_ON_STACK]] ]
+// LLVM-NEXT: [[PHIV:%.*]] = load double, ptr [[PHIP]], align 8,
+// LLVM-NEXT: store double [[PHIV]], ptr [[RESP]], align 8,
+// LLVM: call void @llvm.va_end.p0(ptr [[VARLIST]]),
+// LLVM: [[RES:%.*]] = load double, ptr [[RESP]], align 8,
+// LLVM: store double [[RES]], ptr [[RETP]], align 8,
+// LLVM: [[RETV:%.*]] = load double, ptr [[RETP]], align 8,
+// LLVM-NEXT: ret double [[RETV]],
diff --git a/clang/test/CIR/CodeGen/var-arg-scope.c b/clang/test/CIR/CodeGen/var-arg-scope.c
new file mode 100644
index 000000000000..f5c3c65cd467
--- /dev/null
+++ b/clang/test/CIR/CodeGen/var-arg-scope.c
@@ -0,0 +1,105 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+void f1(__builtin_va_list c) {
+  { __builtin_va_arg(c, void *); }
+}
+
+// BEFORE: cir.func @f1(%arg0: !ty___va_list) attributes
+// BEFORE: [[VAR_LIST:%.*]] = cir.alloca !ty___va_list, !cir.ptr<!ty___va_list>, ["c", init] {alignment = 8 : i64}
+// BEFORE: cir.store %arg0, [[VAR_LIST]] : !ty___va_list, !cir.ptr<!ty___va_list>
+// BEFORE: cir.scope {
+// BEFORE-NEXT: [[TMP:%.*]] = cir.va.arg [[VAR_LIST]] : (!cir.ptr<!ty___va_list>) -> !cir.ptr<!void>
+// BEFORE-NEXT: }
+// BEFORE-NEXT: cir.return
+
+// AFTER: cir.func @f1(%arg0: !ty___va_list) attributes
+// AFTER: [[VARLIST:%.*]] = cir.alloca !ty___va_list, !cir.ptr<!ty___va_list>, ["c", init] {alignment = 8 : i64}
+// AFTER: cir.store %arg0, [[VARLIST]] : !ty___va_list, !cir.ptr<!ty___va_list>
+// AFTER: cir.scope {
+//
+// AFTER-NEXT: [[GR_OFFS_P:%.*]] = cir.get_member [[VARLIST]][3] {name = "gr_offs"} : !cir.ptr<!ty___va_list> -> !cir.ptr<!s32i>
+// AFTER-NEXT: [[GR_OFFS:%.*]] = cir.load [[GR_OFFS_P]] : !cir.ptr<!s32i>
+// AFTER:  [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// AFTER:  [[CMP0:%.*]] = cir.cmp(ge, [[GR_OFFS]], [[ZERO]]) : !s32i, !cir.bool
+// AFTER-NEXT:  cir.brcond [[CMP0]] [[BB_ON_STACK:\^bb.*]], [[BB_MAY_REG:\^bb.*]]
+
+// This BB calculates to see if it is possible to pass arg in register.
+// AFTER: [[BB_MAY_REG]]:
+// AFTER-NEXT: [[EIGHT:%.*]] = cir.const #cir.int<8> : !s32i
+// AFTER-NEXT: [[NEW_REG_OFFS:%.*]] = cir.binop(add, [[GR_OFFS]], [[EIGHT]]) : !s32i
+// AFTER-NEXT: cir.store [[NEW_REG_OFFS]], [[GR_OFFS_P]] : !s32i, !cir.ptr<!s32i>
+// AFTER-NEXT: [[CMP1:%.*]] = cir.cmp(le, [[NEW_REG_OFFS]], [[ZERO]]) : !s32i, !cir.bool
+// AFTER-NEXT: cir.brcond [[CMP1]] [[BB_IN_REG:\^bb.*]], [[BB_ON_STACK]]
+
+// arg is passed in register.
+// AFTER: [[BB_IN_REG]]:
+// AFTER-NEXT: [[GR_TOP_P:%.*]] = cir.get_member [[VARLIST]][1] {name = "gr_top"} : !cir.ptr<!ty___va_list> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: [[GR_TOP:%.*]] = cir.load [[GR_TOP_P]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: [[TMP2:%.*]] = cir.cast(bitcast, [[GR_TOP]] : !cir.ptr<!void>), !cir.ptr<i8>
+// AFTER-NEXT: [[TMP3:%.*]] = cir.ptr_stride([[TMP2]] : !cir.ptr<i8>, [[GR_OFFS]] : !s32i), !cir.ptr<i8>
+// AFTER-NEXT: [[IN_REG_OUTPUT:%.*]] = cir.cast(bitcast, [[TMP3]] : !cir.ptr<i8>), !cir.ptr<!void>
+// AFTER-NEXT: cir.br [[BB_END:\^bb.*]]([[IN_REG_OUTPUT]] : !cir.ptr<!void>)
+
+// arg is passed in stack.
+// AFTER: [[BB_ON_STACK]]:
+// AFTER-NEXT: [[STACK_P:%.*]] = cir.get_member [[VARLIST]][0] {name = "stack"} : !cir.ptr<!ty___va_list> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: [[STACK_V:%.*]] = cir.load [[STACK_P]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: [[EIGHT_IN_PTR_ARITH:%.*]]  = cir.const #cir.int<8> : !u64i
+// AFTER-NEXT: [[TMP4:%.*]] = cir.cast(bitcast, [[STACK_V]] : !cir.ptr<!void>), !cir.ptr<i8>
+// AFTER-NEXT: [[TMP5:%.*]] = cir.ptr_stride([[TMP4]] : !cir.ptr<i8>, [[EIGHT_IN_PTR_ARITH]] : !u64i), !cir.ptr<i8>
+// AFTER-NEXT: [[NEW_STACK_V:%.*]] = cir.cast(bitcast, [[TMP5]] : !cir.ptr<i8>), !cir.ptr<!void>
+// AFTER-NEXT: cir.store [[NEW_STACK_V]], [[STACK_P]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: cir.br [[BB_END]]([[STACK_V]] : !cir.ptr<!void>)
+
+// This BB is where different path converges. BLK_ARG is the arg addr which
+// could come from IN_REG block where arg is passed in register, and saved in callee
+// stack's argument saving area.
+// Or from ON_STACK block which means arg is passed in from caller's stack area.
+// AFTER-NEXT: [[BB_END]]([[BLK_ARG:%.*]]: !cir.ptr<!void>):  // 2 preds: [[BB_IN_REG]], [[BB_ON_STACK]]
+// AFTER-NEXT:  [[TMP0:%.*]] = cir.cast(bitcast, [[BLK_ARG]] : !cir.ptr<!void>), !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT:  [[TMP1:%.*]] = cir.load [[TMP0]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: cir.yield
+// AFTER-NEXT: }
+// AFTER-NEXT: cir.return
+
+// LLVM: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+// LLVM: define dso_local void @f1(%struct.__va_list %0)
+// LLVM: [[VARLIST:%.*]] = alloca %struct.__va_list, i64 1, align 8,
+// LLVM: br label %[[SCOPE_FRONT:.*]],
+
+// LLVM: [[SCOPE_FRONT]]: ; preds = %1
+// LLVM: [[GR_OFFS_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 3
+// LLVM: [[GR_OFFS:%.*]] = load i32, ptr [[GR_OFFS_P]], align 4,
+// LLVM-NEXT: [[CMP0:%.*]] = icmp sge i32 [[GR_OFFS]], 0,
+// LLVM-NEXT: br i1 [[CMP0]], label %[[BB_ON_STACK:.*]], label %[[BB_MAY_REG:.*]],
+
+// LLVM:  [[BB_MAY_REG]]: ;
+// LLVM: [[NEW_REG_OFFS:%.*]] = add i32 [[GR_OFFS]], 8,
+// LLVM: store i32 [[NEW_REG_OFFS]], ptr [[GR_OFFS_P]], align 4,
+// LLVM-NEXT: [[CMP1:%.*]] = icmp sle i32 [[NEW_REG_OFFS]], 0,
+// LLVM-NEXT: br i1 [[CMP1]], label %[[BB_IN_REG:.*]], label %[[BB_ON_STACK]],
+
+// LLVM:  [[BB_IN_REG]]: ;
+// LLVM-NEXT: [[GR_TOP_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 1,
+// LLVM-NEXT: [[GR_TOP:%.*]] = load ptr, ptr [[GR_TOP_P]], align 8,
+// LLVM-NEXT: [[EXT64_GR_OFFS:%.*]] = sext i32 [[GR_OFFS]] to i64,
+// LLVM-NEXT: [[IN_REG_OUTPUT:%.*]] = getelementptr i8, ptr [[GR_TOP]], i64 [[EXT64_GR_OFFS]],
+// LLVM-NEXT: br label %[[BB_END:.*]],
+
+// LLVM:  [[BB_ON_STACK]]: ;
+// LLVM-NEXT: [[STACK_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 0,
+// LLVM-NEXT: [[STACK_V:%.*]] = load ptr, ptr [[STACK_P]], align 8,
+// LLVM-NEXT: [[NEW_STACK_V:%.*]] = getelementptr i8, ptr [[STACK_V]], i64 8,
+// LLVM-NEXT: store ptr [[NEW_STACK_V]], ptr [[STACK_P]], align 8,
+// LLVM-NEXT: br label %[[BB_END]],
+
+// LLVM: [[BB_END]]: ; preds = %[[BB_ON_STACK]], %[[BB_IN_REG]]
+// LLVM-NEXT: [[PHIP:%.*]] = phi ptr [ [[IN_REG_OUTPUT]], %[[BB_IN_REG]] ], [ [[STACK_V]], %[[BB_ON_STACK]] ]
+// LLVM-NEXT: [[PHIV:%.*]] = load ptr, ptr [[PHIP]], align 8,
+// LLVM-NEXT: br label %[[OUT_SCOPE:.*]],
+
+// LLVM: [[OUT_SCOPE]]: ; preds = %[[BB_END]]
+// LLVM-NEXT:  ret void,
diff --git a/clang/test/CIR/CodeGen/var-arg.c b/clang/test/CIR/CodeGen/var-arg.c
new file mode 100644
index 000000000000..e689adefebe5
--- /dev/null
+++ b/clang/test/CIR/CodeGen/var-arg.c
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+#include <stdarg.h>
+
+int f1(int n, ...) {
+  va_list valist;
+  va_start(valist, n);
+  int res = va_arg(valist, int);
+  va_end(valist);
+  return res;
+}
+
+// BEFORE: !ty___va_list = !cir.struct<struct "__va_list" {!cir.ptr<!cir.void>, !cir.ptr<!cir.void>, !cir.ptr<!cir.void>, !cir.int<s, 32>, !cir.int<s, 32>}
+// BEFORE:  cir.func @f1(%arg0: !s32i, ...) -> !s32i
+// BEFORE:  [[RETP:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// BEFORE:  [[RESP:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["res", init]
+// BEFORE:  cir.va.start [[VARLIST:%.*]] : !cir.ptr<!ty___va_list>
+// BEFORE:  [[TMP0:%.*]] = cir.va.arg [[VARLIST]] : (!cir.ptr<!ty___va_list>) -> !s32i
+// BEFORE:  cir.store [[TMP0]], [[RESP]] : !s32i, !cir.ptr<!s32i>
+// BEFORE:  cir.va.end [[VARLIST]] : !cir.ptr<!ty___va_list>
+// BEFORE:  [[RES:%.*]] = cir.load [[RESP]] : !cir.ptr<!s32i>, !s32i
+// BEFORE:   cir.store [[RES]], [[RETP]] : !s32i, !cir.ptr<!s32i>
+// BEFORE:  [[RETV:%.*]] = cir.load [[RETP]] : !cir.ptr<!s32i>, !s32i
+// BEFORE:   cir.return [[RETV]] : !s32i
+
+// AFTER: !ty___va_list = !cir.struct<struct "__va_list" {!cir.ptr<!cir.void>, !cir.ptr<!cir.void>, !cir.ptr<!cir.void>, !cir.int<s, 32>, !cir.int<s, 32>}
+// AFTER:  cir.func @f1(%arg0: !s32i, ...) -> !s32i
+// AFTER:  [[RETP:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// AFTER:  [[RESP:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["res", init]
+// AFTER:  cir.va.start [[VARLIST:%.*]] : !cir.ptr<!ty___va_list>
+// AFTER:  [[GR_OFFS_P:%.*]] = cir.get_member [[VARLIST]][3] {name = "gr_offs"} : !cir.ptr<!ty___va_list> -> !cir.ptr<!s32i>
+// AFTER:  [[GR_OFFS:%.*]] = cir.load [[GR_OFFS_P]] : !cir.ptr<!s32i>, !s32i
+// AFTER:  [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// AFTER:  [[CMP0:%.*]] = cir.cmp(ge, [[GR_OFFS]], [[ZERO]]) : !s32i, !cir.bool
+// AFTER-NEXT:  cir.brcond [[CMP0]] [[BB_ON_STACK:\^bb.*]], [[BB_MAY_REG:\^bb.*]]
+
+// This BB calculates to see if it is possible to pass arg in register.
+// AFTER: [[BB_MAY_REG]]:
+// AFTER-NEXT: [[EIGHT:%.*]] = cir.const #cir.int<8> : !s32i
+// AFTER-NEXT: [[NEW_REG_OFFS:%.*]] = cir.binop(add, [[GR_OFFS]], [[EIGHT]]) : !s32i
+// AFTER-NEXT: cir.store [[NEW_REG_OFFS]], [[GR_OFFS_P]] : !s32i, !cir.ptr<!s32i>
+// AFTER-NEXT: [[CMP1:%.*]] = cir.cmp(le, [[NEW_REG_OFFS]], [[ZERO]]) : !s32i, !cir.bool
+// AFTER-NEXT: cir.brcond [[CMP1]] [[BB_IN_REG:\^bb.*]], [[BB_ON_STACK]]
+
+// arg is passed in register.
+// AFTER: [[BB_IN_REG]]:
+// AFTER-NEXT: [[GR_TOP_P:%.*]] = cir.get_member [[VARLIST]][1] {name = "gr_top"} : !cir.ptr<!ty___va_list> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: [[GR_TOP:%.*]] = cir.load [[GR_TOP_P]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: [[TMP2:%.*]] = cir.cast(bitcast, [[GR_TOP]] : !cir.ptr<!void>), !cir.ptr<i8>
+// AFTER-NEXT: [[TMP3:%.*]] = cir.ptr_stride([[TMP2]] : !cir.ptr<i8>, [[GR_OFFS]] : !s32i), !cir.ptr<i8>
+// AFTER-NEXT: [[IN_REG_OUTPUT:%.*]] = cir.cast(bitcast, [[TMP3]] : !cir.ptr<i8>), !cir.ptr<!void>
+// AFTER-NEXT: cir.br [[BB_END:\^bb.*]]([[IN_REG_OUTPUT]] : !cir.ptr<!void>)
+
+// arg is passed in stack.
+// AFTER: [[BB_ON_STACK]]:
+// AFTER-NEXT: [[STACK_P:%.*]] = cir.get_member [[VARLIST]][0] {name = "stack"} : !cir.ptr<!ty___va_list> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: [[STACK_V:%.*]] = cir.load [[STACK_P]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: [[EIGHT_IN_PTR_ARITH:%.*]]  = cir.const #cir.int<8> : !u64i
+// AFTER-NEXT: [[TMP4:%.*]] = cir.cast(bitcast, [[STACK_V]] : !cir.ptr<!void>), !cir.ptr<i8>
+// AFTER-NEXT: [[TMP5:%.*]] = cir.ptr_stride([[TMP4]] : !cir.ptr<i8>, [[EIGHT_IN_PTR_ARITH]] : !u64i), !cir.ptr<i8>
+// AFTER-NEXT: [[NEW_STACK_V:%.*]] = cir.cast(bitcast, [[TMP5]] : !cir.ptr<i8>), !cir.ptr<!void>
+// AFTER-NEXT: cir.store [[NEW_STACK_V]], [[STACK_P]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: cir.br [[BB_END]]([[STACK_V]] : !cir.ptr<!void>)
+
+// This BB is where different path converges. BLK_ARG is the arg addr which
+// could come from IN_REG block where arg is passed in register, and saved in callee
+// stack's argument saving area.
+// Or from ON_STACK block which means arg is passed in from caller's stack area.
+// AFTER-NEXT: [[BB_END]]([[BLK_ARG:%.*]]: !cir.ptr<!void>):  // 2 preds: [[BB_IN_REG]], [[BB_ON_STACK]]
+// AFTER-NEXT:  [[TMP0:%.*]] = cir.cast(bitcast, [[BLK_ARG]] : !cir.ptr<!void>), !cir.ptr<!s32i>
+// AFTER-NEXT:  [[TMP1:%.*]] = cir.load [[TMP0]] : !cir.ptr<!s32i>, !s32i
+// AFTER:   cir.store [[TMP1]], [[RESP]] : !s32i, !cir.ptr<!s32i>
+// AFTER:   cir.va.end [[VARLIST]] : !cir.ptr<!ty___va_list>
+// AFTER:   [[RES:%.*]] = cir.load [[RESP]] : !cir.ptr<!s32i>, !s32i
+// AFTER:   cir.store [[RES]], [[RETP]] : !s32i, !cir.ptr<!s32i>
+// AFTER:  [[RETV:%.*]] = cir.load [[RETP]] : !cir.ptr<!s32i>, !s32i
+// AFTER:   cir.return [[RETV]] : !s32i
+
+// LLVM: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+// LLVM: define dso_local i32 @f1(i32 %0, ...)
+// LLVM: [[ARGN:%.*]] = alloca i32, i64 1, align 4,
+// LLVM: [[RETP:%.*]] = alloca i32, i64 1, align 4,
+// LLVM: [[RESP:%.*]] = alloca i32, i64 1, align 4,
+// LLVM: call void @llvm.va_start.p0(ptr [[VARLIST:%.*]]),
+// LLVM: [[GR_OFFS_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 3
+// LLVM: [[GR_OFFS:%.*]] = load i32, ptr [[GR_OFFS_P]], align 4,
+// LLVM-NEXT: [[CMP0:%.*]] = icmp sge i32 [[GR_OFFS]], 0,
+// LLVM-NEXT: br i1 [[CMP0]], label %[[BB_ON_STACK:.*]], label %[[BB_MAY_REG:.*]],
+
+// LLVM:  [[BB_MAY_REG]]: ;
+// LLVM: [[NEW_REG_OFFS:%.*]] = add i32 [[GR_OFFS]], 8,
+// LLVM: store i32 [[NEW_REG_OFFS]], ptr [[GR_OFFS_P]], align 4,
+// LLVM-NEXT: [[CMP1:%.*]] = icmp sle i32 [[NEW_REG_OFFS]], 0,
+// LLVM-NEXT: br i1 [[CMP1]], label %[[BB_IN_REG:.*]], label %[[BB_ON_STACK]],
+
+// LLVM:  [[BB_IN_REG]]: ;
+// LLVM-NEXT: [[GR_TOP_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 1,
+// LLVM-NEXT: [[GR_TOP:%.*]] = load ptr, ptr [[GR_TOP_P]], align 8,
+// LLVM-NEXT: [[EXT64_GR_OFFS:%.*]] = sext i32 [[GR_OFFS]] to i64,
+// LLVM-NEXT: [[IN_REG_OUTPUT:%.*]] = getelementptr i8, ptr [[GR_TOP]], i64 [[EXT64_GR_OFFS]],
+// LLVM-NEXT: br label %[[BB_END:.*]],
+
+// LLVM:  [[BB_ON_STACK]]: ;
+// LLVM-NEXT: [[STACK_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 0,
+// LLVM-NEXT: [[STACK_V:%.*]] = load ptr, ptr [[STACK_P]], align 8,
+// LLVM-NEXT: [[NEW_STACK_V:%.*]] = getelementptr i8, ptr [[STACK_V]], i64 8,
+// LLVM-NEXT: store ptr [[NEW_STACK_V]], ptr [[STACK_P]], align 8,
+// LLVM-NEXT: br label %[[BB_END]],
+
+// LLVM: [[BB_END]]: ; preds = %[[BB_ON_STACK]], %[[BB_IN_REG]]
+// LLVM-NEXT: [[PHIP:%.*]] = phi ptr [ [[IN_REG_OUTPUT]], %[[BB_IN_REG]] ], [ [[STACK_V]], %[[BB_ON_STACK]] ]
+// LLVM-NEXT: [[PHIV:%.*]] = load i32, ptr [[PHIP]], align 4,
+// LLVM-NEXT: store i32 [[PHIV]], ptr [[RESP]], align 4,
+// LLVM: call void @llvm.va_end.p0(ptr [[VARLIST]]),
+// LLVM: [[RES:%.*]] = load i32, ptr [[RESP]], align 4,
+// LLVM: store i32 [[RES]], ptr [[RETP]], align 4,
+// LLVM: [[RETV:%.*]] = load i32, ptr [[RETP]], align 4,
+// LLVM-NEXT: ret i32 [[RETV]],
diff --git a/clang/test/CIR/CodeGen/variadics.c b/clang/test/CIR/CodeGen/variadics.c
new file mode 100644
index 000000000000..dd79ceedd93a
--- /dev/null
+++ b/clang/test/CIR/CodeGen/variadics.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -x c++ -std=c++20 -triple aarch64-none-linux-android24  -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s
+
+typedef __builtin_va_list va_list;
+
+#define va_start(ap, param) __builtin_va_start(ap, param)
+#define va_end(ap)          __builtin_va_end(ap)
+#define va_arg(ap, type)    __builtin_va_arg(ap, type)
+#define va_copy(dst, src)   __builtin_va_copy(dst, src)
+
+// CHECK: [[VALISTTYPE:!.+va_list.*]] = !cir.struct<struct "{{.*}}__va_list
+
+int average(int count, ...) {
+// CHECK: cir.func @{{.*}}average{{.*}}(%arg0: !s32i, ...) -> !s32i
+// AMR64_CHECK: cir.func @{{.*}}average{{.*}}(%arg0: !s32i loc({{.+}}), ...) -> !s32i
+    va_list args, args_copy;
+    va_start(args, count);
+    // CHECK: cir.va.start %{{[0-9]+}} : !cir.ptr<[[VALISTTYPE]]>
+
+    va_copy(args_copy, args);
+    // CHECK: cir.va.copy %{{[0-9]+}} to %{{[0-9]+}} : !cir.ptr<[[VALISTTYPE]]>, !cir.ptr<[[VALISTTYPE]]>
+
+    int sum = 0;
+    for(int i = 0; i < count; i++) {
+        sum += va_arg(args, int);
+        // CHECK: %{{[0-9]+}} = cir.va.arg %{{[0-9]+}} : (!cir.ptr<[[VALISTTYPE]]>) -> !s32i
+    }
+
+    va_end(args);
+    // CHECK: cir.va.end %{{[0-9]+}} : !cir.ptr<[[VALISTTYPE]]>
+
+    return count > 0 ? sum / count : 0;
+}
+
+int test(void) {
+  return average(5, 1, 2, 3, 4, 5);
+  // CHECK: cir.call @{{.*}}average{{.*}}(%{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}) : (!s32i, !s32i, !s32i, !s32i, !s32i, !s32i) -> !s32i
+}
diff --git a/clang/test/CIR/CodeGen/vbase.cpp b/clang/test/CIR/CodeGen/vbase.cpp
new file mode 100644
index 000000000000..1ba565b7cb79
--- /dev/null
+++ b/clang/test/CIR/CodeGen/vbase.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+struct A {
+  int a;
+};
+
+struct B:  virtual A {
+  int b;
+};
+
+void ppp() { B b; }
+
+
+// Vtable definition for B
+// CIR:  cir.global linkonce_odr @_ZTV1B = #cir.vtable<{#cir.const_array<[#cir.ptr<12 : i64> : !cir.ptr<!u8i>, #cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1B> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}>
+
+// VTT for B.
+// CIR:  cir.global linkonce_odr @_ZTT1B = #cir.const_array<[#cir.global_view<@_ZTV1B, [0 : i32, 0 : i32, 3 : i32]> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 1>
+
+// CIR:  cir.global "private" external @_ZTVN10__cxxabiv121__vmi_class_type_infoE
+
+// Type info name for B
+// CIR:  cir.global linkonce_odr comdat @_ZTS1B = #cir.const_array<"1B" : !cir.array<!s8i x 2>> : !cir.array<!s8i x 2>
+
+// CIR:  cir.global "private" external @_ZTVN10__cxxabiv117__class_type_infoE : !cir.ptr<!cir.ptr<!u8i>>
+
+// Type info name for A
+// CIR:  cir.global linkonce_odr comdat @_ZTS1A = #cir.const_array<"1A" : !cir.array<!s8i x 2>> : !cir.array<!s8i x 2>
+
+// Type info A.
+// CIR:  cir.global constant external @_ZTI1A = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1A> : !cir.ptr<!u8i>}>
+
+// Type info B.
+// CIR:  cir.global constant external @_ZTI1B = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv121__vmi_class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1B> : !cir.ptr<!u8i>, #cir.int<0> : !u32i, #cir.int<1> : !u32i, #cir.global_view<@_ZTI1A> : !cir.ptr<!u8i>, #cir.int<-6141> : !s64i}>
+
+// LLVM: $_ZTS1B = comdat any
+// LLVM: $_ZTS1A = comdat any
+
+// LLVM: @_ZTV1B = linkonce_odr global { [3 x ptr] } { [3 x ptr] [ptr inttoptr (i64 12 to ptr), ptr null, ptr @_ZTI1B] }
+// LLVM: @_ZTT1B = linkonce_odr global [1 x ptr] [ptr getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV1B, i32 0, i32 0, i32 3)]
+// LLVM: @_ZTVN10__cxxabiv121__vmi_class_type_infoE = external global ptr
+// LLVM: @_ZTS1B = linkonce_odr global [2 x i8] c"1B", comdat
+// LLVM: @_ZTVN10__cxxabiv117__class_type_infoE = external global ptr
+// LLVM: @_ZTS1A = linkonce_odr global [2 x i8] c"1A", comdat
+// LLVM: @_ZTI1A = constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i32 2), ptr @_ZTS1A }
+// LLVM: @_ZTI1B = constant { ptr, ptr, i32, i32, ptr, i64 } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i32 2), ptr @_ZTS1B, i32 0, i32 1, ptr @_ZTI1A, i64 -6141 }
diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp
new file mode 100644
index 000000000000..ad99c6e4fe6a
--- /dev/null
+++ b/clang/test/CIR/CodeGen/vector.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -clangir-disable-emit-cxx-default -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "std-cxx.h"
+
+namespace std {
+  template<typename T>
+  void vector<T>::resize(size_type __sz) {
+    size_type __cs = size();
+    if (__cs) {}
+  }
+} // namespace std
+
+// CHECK: cir.func linkonce_odr @_ZNSt6vectorIyE6resizeEm(
+// CHECK:   %0 = cir.alloca !cir.ptr<!ty_std3A3Avector3Cunsigned_long_long3E>, !cir.ptr<!cir.ptr<!ty_std3A3Avector3Cunsigned_long_long3E>>, ["this", init] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !u64i, !cir.ptr<!u64i>, ["__sz", init] {alignment = 8 : i64}
+// CHECK:   %2 = cir.alloca !u64i, !cir.ptr<!u64i>, ["__cs", init] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %0 : !cir.ptr<!ty_std3A3Avector3Cunsigned_long_long3E>, !cir.ptr<!cir.ptr<!ty_std3A3Avector3Cunsigned_long_long3E>>
+// CHECK:   cir.store %arg1, %1 : !u64i, !cir.ptr<!u64i>
+// CHECK:   %3 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_std3A3Avector3Cunsigned_long_long3E>>, !cir.ptr<!ty_std3A3Avector3Cunsigned_long_long3E>
+// CHECK:   %4 = cir.call @_ZNKSt6vectorIyE4sizeEv(%3) : (!cir.ptr<!ty_std3A3Avector3Cunsigned_long_long3E>) -> !u64i
+// CHECK:   cir.store %4, %2 : !u64i, !cir.ptr<!u64i>
+// CHECK:   cir.scope {
+// CHECK:     %5 = cir.load %2 : !cir.ptr<!u64i>, !u64i
+// CHECK:     %6 = cir.cast(int_to_bool, %5 : !u64i), !cir.bool
+// CHECK:     cir.if %6 {
+// CHECK:     }
+// CHECK:   }
+// CHECK:   cir.return
+
+void m() {
+  std::vector<unsigned long long> a;
+  int i = 43;
+  a.resize(i);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGen/vectype-ext.cpp b/clang/test/CIR/CodeGen/vectype-ext.cpp
new file mode 100644
index 000000000000..04d97c406a94
--- /dev/null
+++ b/clang/test/CIR/CodeGen/vectype-ext.cpp
@@ -0,0 +1,509 @@
+// RUN: %clang_cc1 -std=c++17 -fclangir -emit-cir -triple x86_64-unknown-linux-gnu %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++17 -fclangir -emit-llvm -triple x86_64-unknown-linux-gnu %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+typedef int vi4 __attribute__((ext_vector_type(4)));
+typedef int vi3 __attribute__((ext_vector_type(3)));
+typedef int vi2 __attribute__((ext_vector_type(2)));
+typedef double vd2 __attribute__((ext_vector_type(2)));
+typedef long vl2 __attribute__((ext_vector_type(2)));
+typedef unsigned short vus2 __attribute__((ext_vector_type(2)));
+
+// CIR: cir.func {{@.*vector_int_test.*}}
+// LLVM: define dso_local void {{@.*vector_int_test.*}}
+void vector_int_test(int x) {
+
+  // Vector constant.
+  vi4 a = { 1, 2, 3, 4 };
+  // CIR: %{{[0-9]+}} = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+  // LLVM: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %{{[0-9]+}}, align 16
+
+  // Non-const vector initialization.
+  vi4 b = { x, 5, 6, x + 1 };
+  // CIR: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}} : !s32i, !s32i, !s32i, !s32i) : !cir.vector<!s32i x 4>
+  // LLVM:      %[[#X1:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#X2:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#SUM:]] = add nsw i32 %[[#X2]], 1
+  // LLVM-NEXT: %[[#VEC1:]] = insertelement <4 x i32> undef, i32 %[[#X1]], i64 0
+  // LLVM-NEXT: %[[#VEC2:]] = insertelement <4 x i32> %[[#VEC1]], i32 5, i64 1
+  // LLVM-NEXT: %[[#VEC3:]] = insertelement <4 x i32> %[[#VEC2]], i32 6, i64 2
+  // LLVM-NEXT: %[[#VEC4:]] = insertelement <4 x i32> %[[#VEC3]], i32 %[[#SUM]], i64 3
+  // LLVM-NEXT: store <4 x i32> %[[#VEC4]], ptr %{{[0-9]+}}, align 16
+
+  // Incomplete vector initialization.
+  vi4 bb = { x, x + 1 };
+  // CIR: %[[#zero:]] = cir.const #cir.int<0> : !s32i
+  // CIR: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}}, %[[#zero]], %[[#zero]] : !s32i, !s32i, !s32i, !s32i) : !cir.vector<!s32i x 4>
+  // LLVM:      %[[#X1:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#X2:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#SUM:]] = add nsw i32 %[[#X2]], 1
+  // LLVM-NEXT: %[[#VEC1:]] = insertelement <4 x i32> undef, i32 %[[#X1]], i64 0
+  // LLVM-NEXT: %[[#VEC2:]] = insertelement <4 x i32> %[[#VEC1]], i32 %[[#SUM]], i64 1
+  // LLVM-NEXT: %[[#VEC3:]] = insertelement <4 x i32> %[[#VEC2]], i32 0, i64 2
+  // LLVM-NEXT: %[[#VEC4:]] = insertelement <4 x i32> %[[#VEC3]], i32 0, i64 3
+  // LLVM-NEXT: store <4 x i32> %[[#VEC4]], ptr %{{[0-9]+}}, align 16
+
+
+  // Scalar to vector conversion, a.k.a. vector splat.  Only valid as an
+  // operand of a binary operator, not as a regular conversion.
+  bb = a + 7;
+  // CIR: %[[#seven:]] = cir.const #cir.int<7> : !s32i
+  // CIR: %{{[0-9]+}} = cir.vec.splat %[[#seven]] : !s32i, !cir.vector<!s32i x 4>
+  // LLVM:      %[[#A:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#BB:]] = add <4 x i32> %[[#A]], <i32 7, i32 7, i32 7, i32 7>
+  // LLVM-NEXT: store <4 x i32> %[[#BB]], ptr %{{[0-9]+}}, align 16
+
+  // Vector to vector conversion
+  vd2 bbb = { };
+  bb = (vi4)bbb;
+  // CIR: %{{[0-9]+}} = cir.cast(bitcast, %{{[0-9]+}} : !cir.vector<!cir.double x 2>), !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = bitcast <2 x double> %{{[0-9]+}} to <4 x i32>
+
+  // Extract element
+  int c = a[x];
+  // CIR: %{{[0-9]+}} = cir.vec.extract %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // LLVM:      %[[#A:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#X:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#EXT:]] = extractelement <4 x i32> %[[#A]], i32 %[[#X]]
+  // LLVM-NEXT: store i32 %[[#EXT]], ptr %{{[0-9]+}}, align 4
+
+  // Insert element
+  a[x] = x;
+  // CIR: %[[#LOADEDVI:]] = cir.load %[[#STORAGEVI:]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR: %[[#UPDATEDVI:]] = cir.vec.insert %{{[0-9]+}}, %[[#LOADEDVI]][%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // CIR: cir.store %[[#UPDATEDVI]], %[[#STORAGEVI]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+  // LLVM:      %[[#X1:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#X2:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#A:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#INS:]] = insertelement <4 x i32> %[[#A]], i32 %[[#X1]], i32 %[[#X2]]
+  // LLVM-NEXT: store <4 x i32> %[[#INS]], ptr %{{[0-9]+}}, align 16
+
+  // Compound assignment
+  a[x] += a[0];
+  // CIR: %[[#RHSCA:]] = cir.vec.extract %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // CIR: %[[#LHSCA:]] = cir.vec.extract %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // CIR: %[[#SUMCA:]] = cir.binop(add, %[[#LHSCA]], %[[#RHSCA]]) nsw : !s32i
+  // CIR: cir.vec.insert %[[#SUMCA]], %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // LLVM:      %[[#A1:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#RHSCA:]] = extractelement <4 x i32> %[[#A1]], i32 0
+  // LLVM-NEXT: %[[#X:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#A2:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#LHSCA:]] = extractelement <4 x i32> %[[#A2]], i32 %[[#X]]
+  // LLVM-NEXT: %[[#SUMCA:]] = add nsw i32 %[[#LHSCA]], %[[#RHSCA]]
+  // LLVM-NEXT: %[[#A3:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#RES:]] = insertelement <4 x i32> %[[#A3]], i32 %[[#SUMCA]], i32 %[[#X]]
+  // LLVM-NEXT: store <4 x i32> %[[#RES]], ptr %{{[0-9]+}}, align 16
+
+  // Binary arithmetic operations
+  vi4 d = a + b;
+  // CIR: %{{[0-9]+}} = cir.binop(add, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = add <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 e = a - b;
+  // CIR: %{{[0-9]+}} = cir.binop(sub, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = sub <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 f = a * b;
+  // CIR: %{{[0-9]+}} = cir.binop(mul, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = mul <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 g = a / b;
+  // CIR: %{{[0-9]+}} = cir.binop(div, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = sdiv <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 h = a % b;
+  // CIR: %{{[0-9]+}} = cir.binop(rem, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = srem <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 i = a & b;
+  // CIR: %{{[0-9]+}} = cir.binop(and, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = and <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 j = a | b;
+  // CIR: %{{[0-9]+}} = cir.binop(or, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = or <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 k = a ^ b;
+  // CIR: %{{[0-9]+}} = cir.binop(xor, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = xor <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+
+  // Unary arithmetic operations
+  vi4 l = +a;
+  // CIR: %{{[0-9]+}} = cir.unary(plus, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM:      %[[#VAL:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: store <4 x i32> %[[#VAL]], ptr %{{[0-9]+}}, align 16
+  vi4 m = -a;
+  // CIR: %{{[0-9]+}} = cir.unary(minus, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM:      %[[#VAL:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#RES:]] = sub <4 x i32> zeroinitializer, %[[#VAL]]
+  // LLVM-NEXT: store <4 x i32> %[[#RES]], ptr %{{[0-9]+}}, align 16
+  vi4 n = ~a;
+  // CIR: %{{[0-9]+}} = cir.unary(not, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM:      %[[#VAL:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#RES:]] = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %[[#VAL]]
+  // LLVM-NEXT: store <4 x i32> %[[#RES]], ptr %{{[0-9]+}}, align 16
+
+  // TODO: Ternary conditional operator
+
+  // Comparisons
+  vi4 o = a == b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(eq, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM: %[[#RES:]] = icmp eq <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: %[[#EXT:]] = sext <4 x i1> %[[#RES]] to <4 x i32>
+  vi4 p = a != b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(ne, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM: %[[#RES:]] = icmp ne <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: %[[#EXT:]] = sext <4 x i1> %[[#RES]] to <4 x i32>
+  vi4 q = a < b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(lt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM: %[[#RES:]] = icmp slt <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: %[[#EXT:]] = sext <4 x i1> %[[#RES]] to <4 x i32>
+  vi4 r = a > b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(gt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM: %[[#RES:]] = icmp sgt <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: %[[#EXT:]] = sext <4 x i1> %[[#RES]] to <4 x i32>
+  vi4 s = a <= b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(le, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM: %[[#RES:]] = icmp sle <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: %[[#EXT:]] = sext <4 x i1> %[[#RES]] to <4 x i32>
+  vi4 t = a >= b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(ge, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM: %[[#RES:]] = icmp sge <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: %[[#EXT:]] = sext <4 x i1> %[[#RES]] to <4 x i32>
+
+  // __builtin_shufflevector
+  vi4 u = __builtin_shufflevector(a, b, 7, 5, 3, 1);
+  // CIR: %{{[0-9]+}} = cir.vec.shuffle(%{{[0-9]+}}, %{{[0-9]+}} : !cir.vector<!s32i x 4>) [#cir.int<7> : !s64i, #cir.int<5> : !s64i, #cir.int<3> : !s64i, #cir.int<1> : !s64i] : !cir.vector<!s32i x 4>
+
+  // LLVM:      %[[#A:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#B:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#SHFL:]] = shufflevector <4 x i32> %[[#A]], <4 x i32> %[[#B]], <4 x i32> <i32 7, i32 5, i32 3, i32 1>
+  // LLVM-NEXT: store <4 x i32> %[[#SHFL]], ptr %{{[0-9]+}}, align 16
+
+  vi4 v = __builtin_shufflevector(a, b);
+  // CIR: %{{[0-9]+}} = cir.vec.shuffle.dynamic %{{[0-9]+}} : !cir.vector<!s32i x 4>, %{{[0-9]+}} : !cir.vector<!s32i x 4>
+
+  // LLVM:      %[[#A:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#B:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#IDXMOD:]] = and <4 x i32> %[[#B]], <i32 3, i32 3, i32 3, i32 3>
+  // LLVM-NEXT: %[[#IDX0:]] = extractelement <4 x i32> %[[#IDXMOD]], i64 0
+  // LLVM-NEXT: %[[#EXT1:]] = extractelement <4 x i32> %[[#A]], i32 %[[#IDX0]]
+  // LLVM-NEXT: %[[#INS1:]] = insertelement <4 x i32> undef, i32 %[[#EXT1]], i64 0
+  // LLVM-NEXT: %[[#IDX1:]] = extractelement <4 x i32> %[[#IDXMOD]], i64 1
+  // LLVM-NEXT: %[[#EXT2:]] = extractelement <4 x i32> %[[#A]], i32 %[[#IDX1]]
+  // LLVM-NEXT: %[[#INS2:]] = insertelement <4 x i32> %[[#INS1]], i32 %[[#EXT2]], i64 1
+  // LLVM-NEXT: %[[#IDX2:]] = extractelement <4 x i32> %[[#IDXMOD]], i64 2
+  // LLVM-NEXT: %[[#EXT3:]] = extractelement <4 x i32> %[[#A]], i32 %[[#IDX2]]
+  // LLVM-NEXT: %[[#INS3:]] = insertelement <4 x i32> %[[#INS2]], i32 %[[#EXT3]], i64 2
+  // LLVM-NEXT: %[[#IDX3:]] = extractelement <4 x i32> %[[#IDXMOD]], i64 3
+  // LLVM-NEXT: %[[#EXT4:]] = extractelement <4 x i32> %[[#A]], i32 %[[#IDX3]]
+  // LLVM-NEXT: %[[#INS4:]] = insertelement <4 x i32> %[[#INS3]], i32 %[[#EXT4]], i64 3
+  // LLVM-NEXT: store <4 x i32> %[[#INS4]], ptr %{{[0-9]+}}, align 16
+}
+
+// CIR: cir.func {{@.*vector_double_test.*}}
+// LLVM: define dso_local void {{@.*vector_double_test.*}}
+void vector_double_test(int x, double y) {
+  // Vector constant.
+  vd2 a = { 1.5, 2.5 };
+  // CIR: %{{[0-9]+}} = cir.const #cir.const_vector<[#cir.fp<1.500000e+00> : !cir.double, #cir.fp<2.500000e+00> : !cir.double]> : !cir.vector<!cir.double x 2>
+
+  // LLVM: store <2 x double> <double 1.500000e+00, double 2.500000e+00>, ptr %{{[0-9]+}}, align 16
+
+  // Non-const vector initialization.
+  vd2 b = { y, y + 1.0 };
+  // CIR: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}} : !cir.double, !cir.double) : !cir.vector<!cir.double x 2>
+
+  // LLVM:      %[[#Y1:]] = load double, ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: %[[#Y2:]] = load double, ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: %[[#SUM:]] = fadd double %[[#Y2]], 1.000000e+00
+  // LLVM-NEXT: %[[#VEC1:]] = insertelement <2 x double> undef, double %[[#Y1]], i64 0
+  // LLVM-NEXT: %[[#VEC2:]] = insertelement <2 x double> %[[#VEC1]], double %[[#SUM]], i64 1
+  // LLVM-NEXT: store <2 x double> %[[#VEC2]], ptr %{{[0-9]+}}, align 16
+
+  // Incomplete vector initialization
+  vd2 bb = { y };
+  // CIR: [[#dzero:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+  // CIR: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %[[#dzero]] : !cir.double, !cir.double) : !cir.vector<!cir.double x 2>
+
+  // LLVM:      %[[#Y1:]] = load double, ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: %[[#VEC1:]] = insertelement <2 x double> undef, double %[[#Y1]], i64 0
+  // LLVM-NEXT: %[[#VEC2:]] = insertelement <2 x double> %[[#VEC1]], double 0.000000e+00, i64 1
+  // LLVM-NEXT: store <2 x double> %[[#VEC2]], ptr %{{[0-9]+}}, align 16
+
+  // Scalar to vector conversion, a.k.a. vector splat.  Only valid as an
+  // operand of a binary operator, not as a regular conversion.
+  bb = a + 2.5;
+  // CIR: %[[#twohalf:]] = cir.const #cir.fp<2.500000e+00> : !cir.double
+  // CIR: %{{[0-9]+}} = cir.vec.splat %[[#twohalf]] : !cir.double, !cir.vector<!cir.double x 2>
+
+  // LLVM:      %[[#A:]] = load <2 x double>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#BB:]] = fadd <2 x double> %[[#A]], <double 2.500000e+00, double 2.500000e+00>
+  // LLVM-NEXT: store <2 x double> %[[#BB]], ptr %{{[0-9]+}}, align 16
+
+  // Extract element
+  double c = a[x];
+  // CIR: %{{[0-9]+}} = cir.vec.extract %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!cir.double x 2>
+  // LLVM: %{{[0-9]+}} = extractelement <2 x double> %{{[0-9]+}}, i32 %{{[0-9]+}}
+
+  // Insert element
+  a[x] = y;
+  // CIR: %[[#LOADEDVF:]] = cir.load %[[#STORAGEVF:]] : !cir.ptr<!cir.vector<!cir.double x 2>>, !cir.vector<!cir.double x 2>
+  // CIR: %[[#UPDATEDVF:]] = cir.vec.insert %{{[0-9]+}}, %[[#LOADEDVF]][%{{[0-9]+}} : !s32i] : !cir.vector<!cir.double x 2>
+  // CIR: cir.store %[[#UPDATEDVF]], %[[#STORAGEVF]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>
+
+  // LLVM:      %[[#Y:]] = load double, ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: %[[#X:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#A:]] = load <2 x double>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#INS:]] = insertelement <2 x double> %[[#A]], double %[[#Y]], i32 %[[#X]]
+  // LLVM-NEXT: store <2 x double> %[[#INS]], ptr %{{[0-9]+}}, align 16
+
+  // Binary arithmetic operations
+  vd2 d = a + b;
+  // CIR: %{{[0-9]+}} = cir.binop(add, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  // LLVM: %{{[0-9]+}} = fadd <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  vd2 e = a - b;
+  // CIR: %{{[0-9]+}} = cir.binop(sub, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  // LLVM: %{{[0-9]+}} = fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  vd2 f = a * b;
+  // CIR: %{{[0-9]+}} = cir.binop(mul, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  // LLVM: %{{[0-9]+}} = fmul <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  vd2 g = a / b;
+  // CIR: %{{[0-9]+}} = cir.binop(div, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  // LLVM: %{{[0-9]+}} = fdiv <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+
+  // Unary arithmetic operations
+  vd2 l = +a;
+  // CIR: %{{[0-9]+}} = cir.unary(plus, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!cir.double x 2>
+  // LLVM:      %[[#VAL:]] = load <2 x double>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: store <2 x double> %[[#VAL]], ptr %{{[0-9]+}}, align 16
+  vd2 m = -a;
+  // CIR: %{{[0-9]+}} = cir.unary(minus, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!cir.double x 2>
+  // LLVM:      %[[#VAL:]] = load <2 x double>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#RES:]] = fneg <2 x double> %[[#VAL]]
+  // LLVM-NEXT: store <2 x double> %[[#RES]], ptr %{{[0-9]+}}, align 16
+
+  // Comparisons
+  vl2 o = a == b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(eq, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // LLVM: %[[#RES:]] = fcmp oeq <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
+  vl2 p = a != b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(ne, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // LLVM: %[[#RES:]] = fcmp une <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
+  vl2 q = a < b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(lt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // LLVM: %[[#RES:]] = fcmp olt <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
+  vl2 r = a > b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(gt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // LLVM: %[[#RES:]] = fcmp ogt <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
+  vl2 s = a <= b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(le, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // LLVM: %[[#RES:]] = fcmp ole <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
+  vl2 t = a >= b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(ge, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // LLVM: %[[#RES:]] = fcmp oge <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
+
+  // __builtin_convertvector
+  vus2 w = __builtin_convertvector(a, vus2);
+  // CIR: %{{[0-9]+}} = cir.cast(float_to_int, %{{[0-9]+}} : !cir.vector<!cir.double x 2>), !cir.vector<!u16i x 2>
+  // LLVM: %{{[0-9]+}} = fptoui <2 x double> %{{[0-9]+}} to <2 x i16>
+}
+
+// CIR: cir.func {{@.*test_load.*}}
+// LLVM: define dso_local void {{@.*test_load.*}}
+void test_load() {
+  vi4 a = { 1, 2, 3, 4 };
+
+  vi2 b;
+
+  b = a.wz;
+  // CIR:      %[[#LOAD1:]] = cir.load %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#SHUFFLE1:]] = cir.vec.shuffle(%[[#LOAD1]], %[[#LOAD1]] : !cir.vector<!s32i x 4>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 2>
+  // CIR-NEXT: cir.store %[[#SHUFFLE1]], %{{[0-9]+}} : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+
+  // LLVM:      %[[#LOAD1:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#SHUFFLE1:]] = shufflevector <4 x i32> %[[#LOAD1]], <4 x i32> %[[#LOAD1]], <2 x i32> <i32 3, i32 2>
+  // LLVM-NEXT: store <2 x i32> %[[#SHUFFLE1]], ptr %{{[0-9]+}}, align 8
+
+  int one_elem_load = a.s2;
+  // CIR-NEXT: %[[#LOAD8:]] = cir.load %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#EXTRACT_INDEX:]] = cir.const #cir.int<2> : !s64i
+  // CIR-NEXT: %[[#EXTRACT1:]] = cir.vec.extract %[[#LOAD8]][%[[#EXTRACT_INDEX]] : !s64i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store %[[#EXTRACT1]], %{{[0-9]+}} : !s32i, !cir.ptr<!s32i>
+
+  // LLVM-NEXT: %[[#LOAD8:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#EXTRACT1:]] = extractelement <4 x i32> %[[#LOAD8]], i64 2
+  // LLVM-NEXT: store i32 %[[#EXTRACT1]], ptr %{{[0-9]+}}, align 4
+
+}
+
+// CIR: cir.func {{@.*test_store.*}}
+// LLVM: define dso_local void {{@.*test_store.*}}
+void test_store() {
+  vi4 a;
+  // CIR: %[[#PVECA:]] = cir.alloca !cir.vector<!s32i x 4>
+  // LLVM: %[[#PVECA:]] = alloca <4 x i32>
+
+  vi2 b = {1, 2};
+  // CIR-NEXT: %[[#PVECB:]] = cir.alloca !cir.vector<!s32i x 2>
+  // LLVM-NEXT: %[[#PVECB:]] = alloca <2 x i32>
+
+  vi3 c = {};
+  // CIR-NEXT: %[[#PVECC:]] = cir.alloca !cir.vector<!s32i x 3>
+  // LLVM-NEXT: %[[#PVECC:]] = alloca <3 x i32>
+
+  a.xy = b;
+  // CIR:      %[[#LOAD4RHS:]] = cir.load %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+  // CIR-NEXT: %[[#LOAD5LHS:]] = cir.load %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#SHUFFLE5:]] = cir.vec.shuffle(%[[#LOAD4RHS]], %[[#LOAD4RHS]] : !cir.vector<!s32i x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<-1> : !s32i, #cir.int<-1> : !s32i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#SHUFFLE6:]] = cir.vec.shuffle(%[[#LOAD5LHS]], %[[#SHUFFLE5]] : !cir.vector<!s32i x 4>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store %[[#SHUFFLE6]], %{{[0-9]+}} : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // LLVM:      %[[#LOAD4RHS:]] = load <2 x i32>, ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: %[[#LOAD5LHS:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#SHUFFLE5:]] = shufflevector <2 x i32> %[[#LOAD4RHS]], <2 x i32> %[[#LOAD4RHS]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+  // LLVM-NEXT: %[[#SHUFFLE6:]] = shufflevector <4 x i32> %[[#LOAD5LHS]], <4 x i32> %[[#SHUFFLE5]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  // LLVM-NEXT: store <4 x i32> %[[#SHUFFLE6]], ptr %{{[0-9]+}}, align 16
+
+  // load single element
+  a.s0 = 1;
+  // CIR-NEXT: cir.const #cir.int<1>
+  // CIR-NEXT: %[[#LOAD7:]] = cir.load %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#INSERT_INDEX:]] = cir.const #cir.int<0> : !s64i
+  // CIR-NEXT: %[[#INSERT1:]] = cir.vec.insert %{{[0-9]+}}, %[[#LOAD7]][%[[#INSERT_INDEX]] : !s64i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store %[[#INSERT1]], %{{[0-9]+}} : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // LLVM-NEXT: %[[#LOAD7:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#INSERT1:]] = insertelement <4 x i32> %[[#LOAD7]], i32 1, i64 0
+  // LLVM-NEXT: store <4 x i32> %[[#INSERT1]], ptr %{{[0-9]+}}, align 16
+
+  // extend length from 2 to 4, then merge two vectors
+  a.lo = b;
+  // CIR:      %[[#VECB:]] = cir.load %[[#PVECB]] : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+  // CIR-NEXT: %[[#VECA:]] = cir.load %[[#PVECA]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#EXTVECB:]] = cir.vec.shuffle(%[[#VECB]], %[[#VECB]] : !cir.vector<!s32i x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<-1> : !s32i, #cir.int<-1> : !s32i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#RESULT:]] = cir.vec.shuffle(%[[#VECA]], %[[#EXTVECB]] : !cir.vector<!s32i x 4>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store %[[#RESULT]], %[[#PVECA]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // LLVM:      %[[#VECB:]] = load <2 x i32>, ptr %[[#PVECB]], align 8
+  // LLVM-NEXT: %[[#VECA:]] = load <4 x i32>, ptr %[[#PVECA]], align 16
+  // LLVM-NEXT: %[[#EXTVECB:]] = shufflevector <2 x i32> %[[#VECB]], <2 x i32> %[[#VECB]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+  // LLVM-NEXT: %[[#RESULT:]] = shufflevector <4 x i32> %[[#VECA]], <4 x i32> %[[#EXTVECB]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  // LLVM-NEXT: store <4 x i32> %[[#RESULT]], ptr %[[#PVECA]], align 16
+
+  // OpenCL C Specification 6.3.7. Vector Components
+  // The suffixes .lo (or .even) and .hi (or .odd) for a 3-component vector type
+  // operate as if the 3-component vector type is a 4-component vector type with
+  // the value in the w component undefined.
+  b = c.hi;
+
+  // CIR-NEXT: %[[#VECC:]] = cir.load %[[#PVECC]] : !cir.ptr<!cir.vector<!s32i x 3>>, !cir.vector<!s32i x 3>
+  // CIR-NEXT: %[[#HIPART:]] = cir.vec.shuffle(%[[#VECC]], %[[#VECC]] : !cir.vector<!s32i x 3>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 2>
+  // CIR-NEXT: cir.store %[[#HIPART]], %[[#PVECB]] : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+
+  // LLVM-NEXT: %[[#VECC:]] = load <3 x i32>, ptr %[[#PVECC]], align 16
+  // LLVM-NEXT: %[[#HIPART:]] = shufflevector <3 x i32> %[[#VECC]], <3 x i32> %[[#VECC]], <2 x i32> <i32 2, i32 3>
+  // LLVM-NEXT: store <2 x i32> %[[#HIPART]], ptr %[[#PVECB]], align 8
+
+  // c.hi is c[2, 3], in which 3 should be ignored in CIRGen for store
+  c.hi = b;
+
+  // CIR-NEXT: %[[#VECB:]] = cir.load %[[#PVECB]] : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+  // CIR-NEXT: %[[#VECC:]] = cir.load %[[#PVECC]] : !cir.ptr<!cir.vector<!s32i x 3>>, !cir.vector<!s32i x 3>
+  // CIR-NEXT: %[[#EXTVECB:]] = cir.vec.shuffle(%[[#VECB]], %[[#VECB]] : !cir.vector<!s32i x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<-1> : !s32i] : !cir.vector<!s32i x 3>
+  // CIR-NEXT: %[[#RESULT:]] = cir.vec.shuffle(%[[#VECC]], %[[#EXTVECB]] : !cir.vector<!s32i x 3>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 3>
+  // CIR-NEXT: cir.store %[[#RESULT]], %[[#PVECC]] : !cir.vector<!s32i x 3>, !cir.ptr<!cir.vector<!s32i x 3>>
+
+  // LLVM-NEXT: %[[#VECB:]] = load <2 x i32>, ptr %[[#PVECB]], align 8
+  // LLVM-NEXT: %[[#VECC:]] = load <3 x i32>, ptr %[[#PVECC]], align 16
+  // LLVM-NEXT: %[[#EXTVECB:]] = shufflevector <2 x i32> %[[#VECB]], <2 x i32> %[[#VECB]], <3 x i32> <i32 0, i32 1, i32 poison>
+  // LLVM-NEXT: %[[#RESULT:]] = shufflevector <3 x i32> %[[#VECC]], <3 x i32> %[[#EXTVECB]], <3 x i32> <i32 0, i32 1, i32 3>
+  // LLVM-NEXT: store <3 x i32> %[[#RESULT]], ptr %[[#PVECC]], align 16
+
+}
+
+// CIR: cir.func {{@.*test_build_lvalue.*}}
+// LLVM: define dso_local void {{@.*test_build_lvalue.*}}
+void test_build_lvalue() {
+  // special cases only
+
+  vi4 *pv, v;
+
+  // CIR-NEXT: %[[#ALLOCAPV:]] = cir.alloca !cir.ptr<!cir.vector<!s32i x 4>>, !cir.ptr<!cir.ptr<!cir.vector<!s32i x 4>>>, ["pv"] {alignment = 8 : i64}
+  // CIR-NEXT: %[[#ALLOCAV:]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["v"] {alignment = 16 : i64}
+  // CIR-NEXT: %[[#ALLOCAS:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["s", init] {alignment = 4 : i64}
+  // CIR-NEXT: %[[#ALLOCATMP:]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["tmp"] {alignment = 16 : i64}
+  // CIR-NEXT: %[[#ALLOCAR:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["r", init] {alignment = 4 : i64}
+
+  // LLVM-NEXT: %[[#ALLOCAPV:]] = alloca ptr, i64 1, align 8
+  // LLVM-NEXT: %[[#ALLOCAV:]] = alloca <4 x i32>, i64 1, align 16
+  // LLVM-NEXT: %[[#ALLOCAS:]] = alloca i32, i64 1, align 4
+  // LLVM-NEXT: %[[#ALLOCATMP:]] = alloca <4 x i32>, i64 1, align 16
+  // LLVM-NEXT: %[[#ALLOCAR:]] = alloca i32, i64 1, align 4
+
+  pv->x = 99;
+  // CIR-NEXT: %[[#VAL:]] = cir.const #cir.int<99> : !s32i
+  // CIR-NEXT: %[[#PV:]] = cir.load %[[#ALLOCAPV]] : !cir.ptr<!cir.ptr<!cir.vector<!s32i x 4>>>, !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR-NEXT: %[[#V:]] = cir.load %[[#PV]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#IDX:]] = cir.const #cir.int<0> : !s64i
+  // CIR-NEXT: %[[#RESULT:]] = cir.vec.insert %[[#VAL]], %[[#V]][%[[#IDX]] : !s64i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store %[[#RESULT]], %[[#PV]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // LLVM-NEXT: %[[#PV:]] = load ptr, ptr %[[#ALLOCAPV]], align 8
+  // LLVM-NEXT: %[[#V:]] = load <4 x i32>, ptr %[[#PV]], align 16
+  // LLVM-NEXT: %[[#RESULT:]] = insertelement <4 x i32> %[[#V]], i32 99, i64 0
+  // LLVM-NEXT: store <4 x i32> %[[#RESULT]], ptr %[[#PV]], align 16
+
+  int s = (v+v).x;
+
+  // CIR-NEXT: %[[#LOAD1:]] = cir.load %[[#ALLOCAV]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#LOAD2:]] = cir.load %[[#ALLOCAV]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#SUM:]] = cir.binop(add, %[[#LOAD1]], %[[#LOAD2]]) : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store %[[#SUM]], %[[#ALLOCATMP]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR-NEXT: %[[#TMP:]] = cir.load %[[#ALLOCATMP]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#IDX:]] = cir.const #cir.int<0> : !s64i
+  // CIR-NEXT: %[[#RESULT:]] = cir.vec.extract %[[#TMP]][%[[#IDX]] : !s64i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store %[[#RESULT]], %[[#ALLOCAS]] : !s32i, !cir.ptr<!s32i>
+
+  // LLVM-NEXT: %[[#LOAD1:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#LOAD2:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#SUM:]] = add <4 x i32> %[[#LOAD1]], %[[#LOAD2]]
+  // LLVM-NEXT: store <4 x i32> %[[#SUM]], ptr %[[#ALLOCATMP]], align 16
+  // LLVM-NEXT: %[[#TMP:]] = load <4 x i32>, ptr %[[#ALLOCATMP]], align 16
+  // LLVM-NEXT: %[[#RESULT:]] = extractelement <4 x i32> %[[#TMP]], i64 0
+  // LLVM-NEXT: store i32 %[[#RESULT]], ptr %[[#ALLOCAS]], align 4
+
+  int r = v.xy.x;
+  // CIR-NEXT: %[[#V:]] = cir.load %[[#ALLOCAV]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#IDX:]] = cir.const #cir.int<0> : !s64i
+  // CIR-NEXT: %[[#RESULT:]] = cir.vec.extract %[[#V]][%[[#IDX]] : !s64i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store %[[#RESULT]], %[[#ALLOCAR]] : !s32i, !cir.ptr<!s32i>
+
+  // LLVM-NEXT: %[[#V:]] = load <4 x i32>, ptr %[[#ALLOCAV]], align 16
+  // LLVM-NEXT: %[[#RESULT:]] = extractelement <4 x i32> %[[#V]], i64 0
+  // LLVM-NEXT: store i32 %[[#RESULT]], ptr %[[#ALLOCAR]], align 4
+
+}
+
+// CIR: cir.func {{@.*test_vec3.*}}
+// LLVM: define dso_local void {{@.*test_vec3.*}}
+void test_vec3() {
+  vi3 v = {};
+  // CIR-NEXT: %[[#PV:]] = cir.alloca !cir.vector<!s32i x 3>, !cir.ptr<!cir.vector<!s32i x 3>>, ["v"] {alignment = 16 : i64}
+  // CIR-NEXT: %[[#VVAL:]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<!s32i x 3>
+  // CIR-NEXT: cir.store %[[#VVAL]], %[[#PV]] : !cir.vector<!s32i x 3>, !cir.ptr<!cir.vector<!s32i x 3>>
+
+  // LLVM-NEXT: %[[#PV:]] = alloca <3 x i32>, i64 1, align 16
+  // LLVM-NEXT: store <3 x i32> zeroinitializer, ptr %[[#PV]], align 16
+
+  v + 1;
+  // CIR-NEXT: %[[#PV4:]] = cir.cast(bitcast, %[[#PV]] : !cir.ptr<!cir.vector<!s32i x 3>>), !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR-NEXT: %[[#V4:]] = cir.load %[[#PV4]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#V3:]] = cir.vec.shuffle(%[[#V4]], %[[#V4]] : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 3>
+  // CIR:      %[[#RES:]] = cir.binop(add, %[[#V3]], %{{[0-9]+}}) : !cir.vector<!s32i x 3>
+
+  // LLVM-NEXT: %[[#V4:]] = load <4 x i32>, ptr %[[#PV:]], align 16
+  // LLVM-NEXT: %[[#V3:]] = shufflevector <4 x i32> %[[#V4]], <4 x i32> %[[#V4]], <3 x i32> <i32 0, i32 1, i32 2>
+  // LLVM-NEXT: %[[#RES:]] = add <3 x i32> %[[#V3]], <i32 1, i32 1, i32 1>
+
+}
diff --git a/clang/test/CIR/CodeGen/vectype-issized.c b/clang/test/CIR/CodeGen/vectype-issized.c
new file mode 100644
index 000000000000..380ed7a13f28
--- /dev/null
+++ b/clang/test/CIR/CodeGen/vectype-issized.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir -emit-cir -target-feature +neon %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir -emit-llvm -target-feature +neon %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+typedef __attribute__((neon_vector_type(8))) short  c;
+void d() { c a[8]; }
+
+// CIR-LABEL: d
+// CIR: {{%.*}} = cir.alloca !cir.array<!cir.vector<!s16i x 8> x 8>,
+// CIR-SAME: !cir.ptr<!cir.array<!cir.vector<!s16i x 8> x 8>>, ["a"]
+// CIR-SAME: {alignment = 16 : i64}
+
+// LLVM-LABEL: d
+// LLVM: {{%.*}} = alloca [8 x <8 x i16>], i64 1, align 16
diff --git a/clang/test/CIR/CodeGen/vectype.cpp b/clang/test/CIR/CodeGen/vectype.cpp
new file mode 100644
index 000000000000..1f6076f7f49d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/vectype.cpp
@@ -0,0 +1,170 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+typedef int vi4 __attribute__((vector_size(16)));
+typedef double vd2 __attribute__((vector_size(16)));
+typedef long long vll2 __attribute__((vector_size(16)));
+typedef unsigned short vus2 __attribute__((vector_size(4)));
+
+void vector_int_test(int x) {
+
+  // Vector constant.
+  vi4 a = { 1, 2, 3, 4 };
+  // CHECK: %{{[0-9]+}} = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+
+  // Non-const vector initialization.
+  vi4 b = { x, 5, 6, x + 1 };
+  // CHECK: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}} : !s32i, !s32i, !s32i, !s32i) : !cir.vector<!s32i x 4>
+
+  // Incomplete vector initialization.
+  vi4 bb = { x, x + 1 };
+  // CHECK: %[[#zero:]] = cir.const #cir.int<0> : !s32i
+  // CHECK: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}}, %[[#zero]], %[[#zero]] : !s32i, !s32i, !s32i, !s32i) : !cir.vector<!s32i x 4>
+
+  // Scalar to vector conversion, a.k.a. vector splat.  Only valid as an
+  // operand of a binary operator, not as a regular conversion.
+  bb = a + 7;
+  // CHECK: %[[#seven:]] = cir.const #cir.int<7> : !s32i
+  // CHECK: %{{[0-9]+}} = cir.vec.splat %[[#seven]] : !s32i, !cir.vector<!s32i x 4>
+
+  // Vector to vector conversion
+  vd2 bbb = { };
+  bb = (vi4)bbb;
+  // CHECK: %{{[0-9]+}} = cir.cast(bitcast, %{{[0-9]+}} : !cir.vector<!cir.double x 2>), !cir.vector<!s32i x 4>
+
+  // Extract element
+  int c = a[x];
+  // CHECK: %{{[0-9]+}} = cir.vec.extract %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+
+  // Insert element
+  a[x] = x;
+  // CHECK: %[[#LOADEDVI:]] = cir.load %[[#STORAGEVI:]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CHECK: %[[#UPDATEDVI:]] = cir.vec.insert %{{[0-9]+}}, %[[#LOADEDVI]][%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // CHECK: cir.store %[[#UPDATEDVI]], %[[#STORAGEVI]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // Compound assignment
+  a[x] += a[0];
+  // CHECK: %[[#LOADCA1:]] = cir.load %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CHECK: %[[#RHSCA:]] = cir.vec.extract %[[#LOADCA1]][%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // CHECK: %[[#LOADCAIDX2:]] = cir.load %{{[0-9]+}} : !cir.ptr<!s32i>, !s32i
+  // CHECK: %[[#LOADCAVEC3:]] = cir.load %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CHECK: %[[#LHSCA:]] = cir.vec.extract %[[#LOADCAVEC3]][%[[#LOADCAIDX2]] : !s32i] : !cir.vector<!s32i x 4>
+  // CHECK: %[[#SUMCA:]] = cir.binop(add, %[[#LHSCA]], %[[#RHSCA]]) nsw : !s32i
+  // CHECK: %[[#LOADCAVEC4:]] = cir.load %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CHECK: %[[#RESULTCAVEC:]] = cir.vec.insert %[[#SUMCA]], %[[#LOADCAVEC4]][%[[#LOADCAIDX2]] : !s32i] : !cir.vector<!s32i x 4>
+  // CHECK: cir.store %[[#RESULTCAVEC]], %{{[0-9]+}} : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // Binary arithmetic operations
+  vi4 d = a + b;
+  // CHECK: %{{[0-9]+}} = cir.binop(add, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 e = a - b;
+  // CHECK: %{{[0-9]+}} = cir.binop(sub, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 f = a * b;
+  // CHECK: %{{[0-9]+}} = cir.binop(mul, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 g = a / b;
+  // CHECK: %{{[0-9]+}} = cir.binop(div, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 h = a % b;
+  // CHECK: %{{[0-9]+}} = cir.binop(rem, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 i = a & b;
+  // CHECK: %{{[0-9]+}} = cir.binop(and, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 j = a | b;
+  // CHECK: %{{[0-9]+}} = cir.binop(or, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 k = a ^ b;
+  // CHECK: %{{[0-9]+}} = cir.binop(xor, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+
+  // Unary arithmetic operations
+  vi4 l = +a;
+  // CHECK: %{{[0-9]+}} = cir.unary(plus, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 m = -a;
+  // CHECK: %{{[0-9]+}} = cir.unary(minus, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 n = ~a;
+  // CHECK: %{{[0-9]+}} = cir.unary(not, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+
+  // Ternary conditional operator
+  vi4 tc = a ? b : d;
+  // CHECK: %{{[0-9]+}} = cir.vec.ternary(%{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+
+  // Comparisons
+  vi4 o = a == b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(eq, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 p = a != b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(ne, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 q = a < b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(lt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 r = a > b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(gt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 s = a <= b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(le, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 t = a >= b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(ge, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+
+  // __builtin_shufflevector
+  vi4 u = __builtin_shufflevector(a, b, 7, 5, 3, 1);
+  // CHECK: %{{[0-9]+}} = cir.vec.shuffle(%{{[0-9]+}}, %{{[0-9]+}} : !cir.vector<!s32i x 4>) [#cir.int<7> : !s64i, #cir.int<5> : !s64i, #cir.int<3> : !s64i, #cir.int<1> : !s64i] : !cir.vector<!s32i x 4>
+  vi4 v = __builtin_shufflevector(a, b);
+  // CHECK: %{{[0-9]+}} = cir.vec.shuffle.dynamic %{{[0-9]+}} : !cir.vector<!s32i x 4>, %{{[0-9]+}} : !cir.vector<!s32i x 4>
+}
+
+void vector_double_test(int x, double y) {
+  // Vector constant.
+  vd2 a = { 1.5, 2.5 };
+  // CHECK: %{{[0-9]+}} = cir.const #cir.const_vector<[#cir.fp<1.500000e+00> : !cir.double, #cir.fp<2.500000e+00> : !cir.double]> : !cir.vector<!cir.double x 2>
+
+  // Non-const vector initialization.
+  vd2 b = { y, y + 1.0 };
+  // CHECK: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}} : !cir.double, !cir.double) : !cir.vector<!cir.double x 2>
+
+  // Incomplete vector initialization
+  vd2 bb = { y };
+  // CHECK: [[#dzero:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+  // CHECK: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %[[#dzero]] : !cir.double, !cir.double) : !cir.vector<!cir.double x 2>
+
+  // Scalar to vector conversion, a.k.a. vector splat.  Only valid as an
+  // operand of a binary operator, not as a regular conversion.
+  bb = a + 2.5;
+  // CHECK: %[[#twohalf:]] = cir.const #cir.fp<2.500000e+00> : !cir.double
+  // CHECK: %{{[0-9]+}} = cir.vec.splat %[[#twohalf]] : !cir.double, !cir.vector<!cir.double x 2>
+
+  // Extract element
+  double c = a[x];
+  // CHECK: %{{[0-9]+}} = cir.vec.extract %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!cir.double x 2>
+
+  // Insert element
+  a[x] = y;
+  // CHECK: %[[#LOADEDVF:]] = cir.load %[[#STORAGEVF:]] : !cir.ptr<!cir.vector<!cir.double x 2>>, !cir.vector<!cir.double x 2>
+  // CHECK: %[[#UPDATEDVF:]] = cir.vec.insert %{{[0-9]+}}, %[[#LOADEDVF]][%{{[0-9]+}} : !s32i] : !cir.vector<!cir.double x 2>
+  // CHECK: cir.store %[[#UPDATEDVF]], %[[#STORAGEVF]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>
+
+  // Binary arithmetic operations
+  vd2 d = a + b;
+  // CHECK: %{{[0-9]+}} = cir.binop(add, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  vd2 e = a - b;
+  // CHECK: %{{[0-9]+}} = cir.binop(sub, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  vd2 f = a * b;
+  // CHECK: %{{[0-9]+}} = cir.binop(mul, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  vd2 g = a / b;
+  // CHECK: %{{[0-9]+}} = cir.binop(div, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+
+  // Unary arithmetic operations
+  vd2 l = +a;
+  // CHECK: %{{[0-9]+}} = cir.unary(plus, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!cir.double x 2>
+  vd2 m = -a;
+  // CHECK: %{{[0-9]+}} = cir.unary(minus, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!cir.double x 2>
+
+  // Comparisons
+  vll2 o = a == b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(eq, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  vll2 p = a != b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(ne, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  vll2 q = a < b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(lt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  vll2 r = a > b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(gt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  vll2 s = a <= b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(le, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  vll2 t = a >= b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(ge, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+
+  // __builtin_convertvector
+  vus2 w = __builtin_convertvector(a, vus2);
+  // CHECK: %{{[0-9]+}} = cir.cast(float_to_int, %{{[0-9]+}} : !cir.vector<!cir.double x 2>), !cir.vector<!u16i x 2>
+}
diff --git a/clang/test/CIR/CodeGen/visibility-attribute.c b/clang/test/CIR/CodeGen/visibility-attribute.c
new file mode 100644
index 000000000000..549f05d052b8
--- /dev/null
+++ b/clang/test/CIR/CodeGen/visibility-attribute.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o -  | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+extern int glob_default;
+// CIR: cir.global "private" external @glob_default : !s32i
+// LLVM: @glob_default = external global i32
+
+extern int __attribute__((__visibility__("hidden"))) glob_hidden;
+// CIR: cir.global "private" hidden external @glob_hidden : !s32i
+// LLVM: @glob_hidden = external hidden global i32
+
+extern int __attribute__((__visibility__("protected"))) glob_protected;
+// CIR: cir.global "private" protected external @glob_protected : !s32i
+// LLVM: @glob_protected = external protected global i32
+
+int call_glob()
+{
+  return glob_default + glob_hidden + glob_protected;
+}
+
+void foo_default();
+// CIR: cir.func no_proto private @foo_default(...)
+// LLVM: declare {{.*}} void @foo_default(...)
+
+void __attribute__((__visibility__("hidden"))) foo_hidden();
+// CIR: cir.func no_proto private hidden @foo_hidden(...)
+// LLVM: declare {{.*}} hidden void @foo_hidden(...)
+
+void __attribute__((__visibility__("protected"))) foo_protected();
+// CIR: cir.func no_proto private protected @foo_protected(...)
+// LLVM: declare {{.*}} protected void @foo_protected(...)
+
+void call_foo()
+{
+  foo_default();
+  foo_hidden();
+  foo_protected();
+}
diff --git a/clang/test/CIR/CodeGen/vla.c b/clang/test/CIR/CodeGen/vla.c
new file mode 100644
index 000000000000..d54c1300bd8d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/vla.c
@@ -0,0 +1,95 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o -  | FileCheck %s 
+
+// CHECK:  cir.func @f0(%arg0: !s32i
+// CHECK:    [[TMP0:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["len", init] {alignment = 4 : i64}
+// CHECK:    [[TMP1:%.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["saved_stack"] {alignment = 8 : i64}
+// CHECK:    cir.store %arg0, [[TMP0]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    [[TMP2:%.*]] = cir.load [[TMP0]] : !cir.ptr<!s32i>, !s32i
+// CHECK:    [[TMP3:%.*]] = cir.cast(integral, [[TMP2]] : !s32i), !u64i
+// CHECK:    [[TMP4:%.*]] = cir.stack_save : !cir.ptr<!u8i>
+// CHECK:    cir.store [[TMP4]], [[TMP1]] : !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>
+// CHECK:    [[TMP5:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, [[TMP3]] : !u64i, ["vla"] {alignment = 16 : i64}
+// CHECK:    [[TMP6:%.*]] = cir.load [[TMP1]] : !cir.ptr<!cir.ptr<!u8i>>, !cir.ptr<!u8i>
+// CHECK:    cir.stack_restore [[TMP6]] : !cir.ptr<!u8i>
+void f0(int len) {
+    int a[len];
+}
+
+//     CHECK: cir.func @f1
+// CHECK-NOT:   cir.stack_save
+// CHECK-NOT:   cir.stack_restore
+//     CHECK:   cir.return
+int f1(int n) {
+  return sizeof(int[n]);
+}
+
+// CHECK: cir.func @f2
+// CHECK:   cir.stack_save
+// DONT_CHECK:   cir.stack_restore
+// CHECK:   cir.return
+int f2(int x) {
+  int vla[x];
+  return vla[x-1];
+}
+
+// CHECK: cir.func @f3
+// CHECK:   cir.stack_save
+// CHECK:   cir.stack_restore
+// CHECK:   cir.return
+void f3(int count) {
+  int a[count];
+
+  do {  } while (0);
+  if (a[0] != 3) {}
+}
+
+
+//     CHECK: cir.func @f4
+// CHECK-NOT:   cir.stack_save
+// CHECK-NOT:   cir.stack_restore
+//     CHECK:   cir.return
+void f4(int count) {
+  // Make sure we emit sizes correctly in some obscure cases
+  int (*a[5])[count];
+  int (*b)[][count];
+}
+
+// FIXME(cir): the test is commented due to stack_restore operation 
+// is not emitted for the if branch
+// void f5(unsigned x) {
+//   while (1) {
+//     char s[x];
+//     if (x > 5) //: stack restore here is missed
+//       break;
+//   }
+// }
+
+// Check no errors happen
+void function1(short width, int data[][width]) {} 
+void function2(short width, int data[][width][width]) {}
+void f6(void) {
+     int bork[4][13][15];
+
+     function1(1, bork[2]);
+     function2(1, bork);    
+}
+
+static int GLOB;
+int f7(int n)
+{
+  GLOB = 0;
+  char b[1][n+3];
+
+  __typeof__(b[GLOB++]) c;
+  return GLOB;
+}
+
+double f8(int n, double (*p)[n][5]) {
+    return p[1][2][3];
+}
+
+int f9(unsigned n, char (*p)[n][n+1][6]) {
+    __typeof(p) p2 = (p + n/2) - n/4;
+
+  return p2 - p;
+}
diff --git a/clang/test/CIR/CodeGen/volatile.cpp b/clang/test/CIR/CodeGen/volatile.cpp
new file mode 100644
index 000000000000..5b8c13334ecf
--- /dev/null
+++ b/clang/test/CIR/CodeGen/volatile.cpp
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int test_load(volatile int *ptr) {
+  return *ptr;
+}
+
+// CHECK: cir.func @_Z9test_loadPVi
+// CHECK:   %{{.+}} = cir.load volatile
+
+void test_store(volatile int *ptr) {
+  *ptr = 42;
+}
+
+// CHECK: cir.func @_Z10test_storePVi
+// CHECK:   cir.store volatile
+
+struct Foo {
+  int x;
+  volatile int y;
+  volatile int z: 4;
+};
+
+int test_load_field1(volatile Foo *ptr) {
+  return ptr->x;
+}
+
+// CHECK: cir.func @_Z16test_load_field1PV3Foo
+// CHECK:   %[[MemberAddr:.*]] = cir.get_member
+// CHECK:   %{{.+}} = cir.load volatile %[[MemberAddr]]
+
+int test_load_field2(Foo *ptr) {
+  return ptr->y;
+}
+
+// CHECK: cir.func @_Z16test_load_field2P3Foo
+// CHECK:   %[[MemberAddr:.+]] = cir.get_member
+// CHECK:   %{{.+}} = cir.load volatile %[[MemberAddr]]
+
+int test_load_field3(Foo *ptr) {
+  return ptr->z;
+}
+
+// CHECK: cir.func @_Z16test_load_field3P3Foo
+// CHECK:   %[[MemberAddr:.+]] = cir.get_member
+// CHECK:   %{{.+}} = cir.get_bitfield(#bfi_z, %[[MemberAddr:.+]] {is_volatile}
+
+void test_store_field1(volatile Foo *ptr) {
+  ptr->x = 42;
+}
+
+// CHECK: cir.func @_Z17test_store_field1PV3Foo
+// CHECK:   %[[MemberAddr:.+]] = cir.get_member
+// CHECK:   cir.store volatile %{{.+}}, %[[MemberAddr]]
+
+void test_store_field2(Foo *ptr) {
+  ptr->y = 42;
+}
+
+// CHECK: cir.func @_Z17test_store_field2P3Foo
+// CHECK:   %[[MemberAddr:.+]] = cir.get_member
+// CHECK:   cir.store volatile %{{.+}}, %[[MemberAddr]]
+
+void test_store_field3(Foo *ptr) {
+  ptr->z = 4;
+}
+
+// CHECK: cir.func @_Z17test_store_field3P3Foo
+// CHECK:   %[[MemberAddr:.+]] = cir.get_member
+// CHECK:   cir.set_bitfield(#bfi_z, %[[MemberAddr:.+]] : !cir.ptr<!u8i>, %1 : !s32i) {is_volatile}
diff --git a/clang/test/CIR/CodeGen/vtable-rtti.cpp b/clang/test/CIR/CodeGen/vtable-rtti.cpp
new file mode 100644
index 000000000000..7e684df2bb58
--- /dev/null
+++ b/clang/test/CIR/CodeGen/vtable-rtti.cpp
@@ -0,0 +1,103 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -clangir-disable-emit-cxx-default -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -fno-rtti -mconstructor-aliases -clangir-disable-emit-cxx-default -emit-cir %s -o %t2.cir
+// RUN: FileCheck --input-file=%t2.cir --check-prefix=RTTI_DISABLED %s
+
+class A
+{
+public:
+    A() noexcept {}
+    A(const A&) noexcept = default;
+
+    virtual ~A() noexcept;
+    virtual const char* quack() const noexcept;
+};
+
+class B : public A
+{
+public:
+    virtual ~B() noexcept {}
+};
+
+// Type info B.
+// CHECK: ![[TypeInfoB:ty_.*]] = !cir.struct<struct {!cir.ptr<!cir.int<u, 8>>, !cir.ptr<!cir.int<u, 8>>, !cir.ptr<!cir.int<u, 8>>}>
+
+// vtable for A type
+// CHECK: ![[VTableTypeA:ty_.*]] = !cir.struct<struct {!cir.array<!cir.ptr<!cir.int<u, 8>> x 5>}>
+// RTTI_DISABLED: ![[VTableTypeA:ty_.*]] = !cir.struct<struct {!cir.array<!cir.ptr<!cir.int<u, 8>> x 5>}>
+
+// Class A
+// CHECK: ![[ClassA:ty_.*]] = !cir.struct<class "A" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>
+
+// Class B
+// CHECK: ![[ClassB:ty_.*]] = !cir.struct<class "B" {!cir.struct<class "A" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>}>
+// RTTI_DISABLED: ![[ClassB:ty_.*]] = !cir.struct<class "B" {!cir.struct<class "A" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>}>
+
+// B ctor => @B::B()
+// Calls @A::A() and initialize __vptr with address of B's vtable.
+//
+// CHECK: cir.func linkonce_odr @_ZN1BC2Ev(%arg0: !cir.ptr<![[ClassB]]>
+// RTTI_DISABLED: cir.func linkonce_odr @_ZN1BC2Ev(%arg0: !cir.ptr<![[ClassB]]>
+
+// CHECK:   %0 = cir.alloca !cir.ptr<![[ClassB]]>, !cir.ptr<!cir.ptr<![[ClassB]]>>, ["this", init] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %0 : !cir.ptr<![[ClassB]]>, !cir.ptr<!cir.ptr<![[ClassB]]>>
+// CHECK:   %1 = cir.load %0 : !cir.ptr<!cir.ptr<![[ClassB]]>>, !cir.ptr<![[ClassB]]>
+// CHECK:   %2 = cir.cast(bitcast, %1 : !cir.ptr<![[ClassB]]>), !cir.ptr<![[ClassA]]>
+// CHECK:   cir.call @_ZN1AC2Ev(%2) : (!cir.ptr<![[ClassA]]>) -> ()
+// CHECK:   %3 = cir.vtable.address_point(@_ZTV1B, vtable_index = 0, address_point_index = 2) : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>
+// CHECK:   %4 = cir.cast(bitcast, %1 : !cir.ptr<![[ClassB]]>), !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>
+// CHECK:   cir.store %3, %4 : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>, !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>
+// CHECK:   cir.return
+// CHECK: }
+
+// foo - zero initialize object B and call ctor (@B::B())
+//
+// CHECK: cir.func @_Z3foov()
+// CHECK:   cir.scope {
+// CHECK:     %0 = cir.alloca !ty_B, !cir.ptr<!ty_B>, ["agg.tmp.ensured"] {alignment = 8 : i64}
+// CHECK:     %1 = cir.const #cir.zero : ![[ClassB]]
+// CHECK:     cir.store %1, %0 : ![[ClassB]], !cir.ptr<![[ClassB]]>
+// CHECK:     cir.call @_ZN1BC2Ev(%0) : (!cir.ptr<![[ClassB]]>) -> ()
+// CHECK:   }
+// CHECK:   cir.return
+// CHECK: }
+
+// Vtable definition for A
+// CHECK: cir.global "private" external @_ZTV1A : ![[VTableTypeA]] {alignment = 8 : i64}
+
+// A ctor => @A::A()
+// Calls @A::A() and initialize __vptr with address of A's vtable
+//
+// CHECK:  cir.func linkonce_odr @_ZN1AC2Ev(%arg0: !cir.ptr<![[ClassA]]>
+// CHECK:    %0 = cir.alloca !cir.ptr<![[ClassA]]>, !cir.ptr<!cir.ptr<![[ClassA]]>>, ["this", init] {alignment = 8 : i64}
+// CHECK:    cir.store %arg0, %0 : !cir.ptr<![[ClassA]]>, !cir.ptr<!cir.ptr<![[ClassA]]>>
+// CHECK:    %1 = cir.load %0 : !cir.ptr<!cir.ptr<![[ClassA]]>>, !cir.ptr<![[ClassA]]>
+// CHECK:    %2 = cir.vtable.address_point(@_ZTV1A, vtable_index = 0, address_point_index = 2) : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>
+// CHECK:    %3 = cir.cast(bitcast, %1 : !cir.ptr<![[ClassA]]>), !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>
+// CHECK:    cir.store %2, %3 : !cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>, !cir.ptr<!cir.ptr<!cir.ptr<!cir.func<!u32i ()>>>>
+// CHECK:    cir.return
+// CHECK:  }
+
+// vtable for B
+// CHECK:   cir.global linkonce_odr @_ZTV1B = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1B> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1BD2Ev> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1BD0Ev> : !cir.ptr<!u8i>, #cir.global_view<@_ZNK1A5quackEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 5>}> : ![[VTableTypeA]]
+// RTTI_DISABLED:   cir.global linkonce_odr @_ZTV1B = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1BD2Ev> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1BD0Ev> : !cir.ptr<!u8i>, #cir.global_view<@_ZNK1A5quackEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 5>}> : ![[VTableTypeA]]
+
+// vtable for __cxxabiv1::__si_class_type_info
+// CHECK:   cir.global "private" external @_ZTVN10__cxxabiv120__si_class_type_infoE : !cir.ptr<!cir.ptr<!u8i>>
+// RTTI_DISABLED-NOT:   cir.global "private" external @_ZTVN10__cxxabiv120__si_class_type_infoE : !cir.ptr<!cir.ptr<!u8i>>
+
+// typeinfo name for B
+// CHECK:   cir.global linkonce_odr comdat @_ZTS1B = #cir.const_array<"1B" : !cir.array<!s8i x 2>> : !cir.array<!s8i x 2> {alignment = 1 : i64}
+// RTTI_DISABLED-NOT: cir.global linkonce_odr @_ZTS1B
+
+// typeinfo for A
+// CHECK:   cir.global "private" constant external @_ZTI1A : !cir.ptr<!u8i>
+// RTTI_DISABLED-NOT:   cir.global "private" constant external @_ZTI1A : !cir.ptr<!u8i>
+
+// typeinfo for B
+// CHECK: cir.global constant external @_ZTI1B = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv120__si_class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1B> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1A> : !cir.ptr<!u8i>}> : ![[TypeInfoB]]
+// RTTI_DISABLED-NOT: cir.global constant external @_ZTI1B
+
+// Checks for dtors in dtors.cpp
+
+void foo() { B(); }
diff --git a/clang/test/CIR/CodeGen/weak.c b/clang/test/CIR/CodeGen/weak.c
new file mode 100644
index 000000000000..398ac47c73f9
--- /dev/null
+++ b/clang/test/CIR/CodeGen/weak.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+extern void B (void);
+static __typeof(B) A __attribute__ ((__weakref__("B")));
+
+void active (void)
+{
+  A();
+}
+
+// LLVM: @y = weak_odr global
+// LLVM: @x = weak global
+
+// CIR:      cir.func extern_weak private @B()
+// CIR:      cir.func @active()
+// CIR-NEXT:   cir.call @B() : () -> ()
+
+// LLVM:     declare !dbg !{{.}} extern_weak void @B()
+// LLVM:     define dso_local void @active()
+// LLVM-NEXT:  call void @B()
+
+int __attribute__((selectany)) y;
+// CIR:      cir.global weak_odr comdat @y
+
+int __attribute__((weak)) x;
+// CIR:      cir.global weak
diff --git a/clang/test/CIR/CodeGen/wide-string.cpp b/clang/test/CIR/CodeGen/wide-string.cpp
new file mode 100644
index 000000000000..b02380041ce1
--- /dev/null
+++ b/clang/test/CIR/CodeGen/wide-string.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+const char16_t *test_utf16() {
+  return u"你好世界";
+}
+
+// CHECK: cir.global "private" constant internal dsolocal @{{.+}} = #cir.const_array<[#cir.int<20320> : !u16i, #cir.int<22909> : !u16i, #cir.int<19990> : !u16i, #cir.int<30028> : !u16i, #cir.int<0> : !u16i]> : !cir.array<!u16i x 5>
+
+const char32_t *test_utf32() {
+  return U"你好世界";
+}
+
+// CHECK: cir.global "private" constant internal dsolocal @{{.+}} = #cir.const_array<[#cir.int<20320> : !u32i, #cir.int<22909> : !u32i, #cir.int<19990> : !u32i, #cir.int<30028> : !u32i, #cir.int<0> : !u32i]> : !cir.array<!u32i x 5>
+
+const char16_t *test_zero16() {
+  return u"\0\0\0\0";
+}
+
+// CHECK: cir.global "private" constant internal dsolocal @{{.+}} = #cir.zero : !cir.array<!u16i x 5>
+
+const char32_t *test_zero32() {
+  return U"\0\0\0\0";
+}
+
+// CHECK: cir.global "private" constant internal dsolocal @{{.+}} = #cir.zero : !cir.array<!u32i x 5>
diff --git a/clang/test/CIR/IR/address-space.cir b/clang/test/CIR/IR/address-space.cir
new file mode 100644
index 000000000000..176cc6ca8c62
--- /dev/null
+++ b/clang/test/CIR/IR/address-space.cir
@@ -0,0 +1,41 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  // CHECK: @test_format1(%arg0: !cir.ptr<!s32i, addrspace(target<0>)>)
+  cir.func @test_format1(%arg0: !cir.ptr<!s32i, addrspace(target<0>)>) {
+    cir.return
+  }
+
+  // CHECK: @test_format2(%arg0: !cir.ptr<!s32i>)
+  cir.func @test_format2(%arg0: !cir.ptr<!s32i>) {
+    cir.return
+  }
+
+  // CHECK: @test_format3(%arg0: !cir.ptr<!s32i, addrspace(offload_private)>)
+  cir.func @test_format3(%arg0: !cir.ptr<!s32i, addrspace(offload_private)>) {
+    cir.return
+  }
+
+  // CHECK: @test_format4(%arg0: !cir.ptr<!s32i, addrspace(offload_local)>)
+  cir.func @test_format4(%arg0: !cir.ptr<!s32i, addrspace(offload_local)>) {
+    cir.return
+  }
+
+  // CHECK: @test_format5(%arg0: !cir.ptr<!s32i, addrspace(offload_global)>)
+  cir.func @test_format5(%arg0: !cir.ptr<!s32i, addrspace(offload_global)>) {
+    cir.return
+  }
+
+  // CHECK: @test_format6(%arg0: !cir.ptr<!s32i, addrspace(offload_constant)>)
+  cir.func @test_format6(%arg0: !cir.ptr<!s32i, addrspace(offload_constant)>) {
+    cir.return
+  }
+
+  // CHECK: @test_format7(%arg0: !cir.ptr<!s32i, addrspace(offload_generic)>)
+  cir.func @test_format7(%arg0: !cir.ptr<!s32i, addrspace(offload_generic)>) {
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/IR/aliases.cir b/clang/test/CIR/IR/aliases.cir
new file mode 100644
index 000000000000..6d2fd8190464
--- /dev/null
+++ b/clang/test/CIR/IR/aliases.cir
@@ -0,0 +1,15 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+module {
+  // CHECK: @testAnonRecordsAlias
+  cir.func @testAnonRecordsAlias() {
+    // CHECK: cir.alloca !ty_anon_struct, !cir.ptr<!ty_anon_struct>
+    %0 = cir.alloca !cir.struct<struct {!cir.int<s, 32>}>, !cir.ptr<!cir.struct<struct {!cir.int<s, 32>}>>, ["A"]
+    // CHECK: cir.alloca !ty_anon_struct1, !cir.ptr<!ty_anon_struct1>
+    %1 = cir.alloca !cir.struct<struct {!cir.int<u, 8>}>, !cir.ptr<!cir.struct<struct {!cir.int<u, 8>}>>, ["B"]
+    // CHECK: cir.alloca !ty_anon_union, !cir.ptr<!ty_anon_union>
+    %2 = cir.alloca !cir.struct<union {!cir.int<s, 32>}>, !cir.ptr<!cir.struct<union {!cir.int<s, 32>}>>, ["C"]
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/IR/alloca.cir b/clang/test/CIR/IR/alloca.cir
new file mode 100644
index 000000000000..3c5b7ab6036e
--- /dev/null
+++ b/clang/test/CIR/IR/alloca.cir
@@ -0,0 +1,21 @@
+// Test the CIR operations can parse and print correctly (roundtrip)
+
+// RUN: cir-opt %s | cir-opt | FileCheck %s
+!s32i = !cir.int<s, 32>
+!u64i = !cir.int<u, 64>
+
+module  {
+  cir.func @foo(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, %arg0 : !s32i, ["tmp"] {alignment = 16 : i64}
+    cir.return
+  }
+}
+
+//CHECK: module  {
+
+//CHECK-NEXT:  cir.func @foo(%arg0: !s32i) {
+//CHECK-NEXT:    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, %arg0 : !s32i, ["tmp"] {alignment = 16 : i64}
+//CHECK-NEXT:    cir.return
+//CHECK-NEXT:  }
+
+//CHECK: }
diff --git a/clang/test/CIR/IR/annotations.cir b/clang/test/CIR/IR/annotations.cir
new file mode 100644
index 000000000000..c1486e35aa71
--- /dev/null
+++ b/clang/test/CIR/IR/annotations.cir
@@ -0,0 +1,31 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+!s32i = !cir.int<s, 32>
+module attributes {cir.global_annotations =
+#cir<global_annotations [["a", #cir.annotation<name = "testanno", args = ["21", 12 : i32]>],
+["foo", #cir.annotation<name = "withargfunc", args = ["os", 22 : i32]>],
+["bar", #cir.annotation<name = "noargfunc", args = []>],
+["bar", #cir.annotation<name = "withargfunc", args = ["os", 23 : i32]>]]>}
+{
+cir.global external @a = #cir.int<0> : !s32i [#cir.annotation<name = "testanno", args = ["21", 12 : i32]>]
+cir.func @foo() attributes {annotations = [#cir.annotation<name = "withargfunc", args = ["os", 22 : i32]>]} {
+    cir.return
+}
+cir.func @bar() attributes {annotations = [#cir.annotation<name = "noargfunc", args = []>, #cir.annotation<name = "withargfunc", args = ["os", 23 : i32]>]} {
+    cir.return
+}
+}
+
+// CHECK: module attributes {cir.global_annotations = #cir<global_annotations [
+// CHECK-SAME: ["a", #cir.annotation<name = "testanno", args = ["21", 12 : i32]>],
+// CHECK-SAME: ["foo", #cir.annotation<name = "withargfunc", args = ["os", 22 : i32]>],
+// CHECK-SAME: ["bar", #cir.annotation<name = "noargfunc", args = []>],
+// CHECK-SAME: ["bar", #cir.annotation<name = "withargfunc", args = ["os", 23 : i32]>]]>}
+// CHECK: cir.global  external @a = #cir.int<0> : !s32i
+// CHECK-SAME: [#cir.annotation<name = "testanno", args = ["21", 12 : i32]>]
+// CHECK: cir.func @foo()
+// CHECK-SAME: [#cir.annotation<name = "withargfunc", args = ["os", 22 : i32]>]
+// CHECK: cir.func @bar()
+// CHECK-SAME: [#cir.annotation<name = "noargfunc", args = []>,
+// CHECK-SAME: #cir.annotation<name = "withargfunc", args = ["os", 23 : i32]>]
diff --git a/clang/test/CIR/IR/array.cir b/clang/test/CIR/IR/array.cir
new file mode 100644
index 000000000000..b69439924a41
--- /dev/null
+++ b/clang/test/CIR/IR/array.cir
@@ -0,0 +1,13 @@
+// RUN: cir-opt %s | cir-opt | FileCheck %s
+
+!u32i = !cir.int<u, 32>
+
+module  {
+  cir.func @arrays() {
+    %0 = cir.alloca !cir.array<!u32i x 10>, !cir.ptr<!cir.array<!u32i x 10>>, ["x", init]
+    cir.return
+  }
+}
+
+// CHECK: cir.func @arrays() {
+// CHECK-NEXT:     %0 = cir.alloca !cir.array<!u32i x 10>, !cir.ptr<!cir.array<!u32i x 10>>, ["x", init]
diff --git a/clang/test/CIR/IR/await.cir b/clang/test/CIR/IR/await.cir
new file mode 100644
index 000000000000..c62e6b7b88b6
--- /dev/null
+++ b/clang/test/CIR/IR/await.cir
@@ -0,0 +1,22 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+cir.func coroutine @checkPrintParse(%arg0 : !cir.bool) {
+  cir.await(user, ready : {
+    cir.condition(%arg0)
+  }, suspend : {
+    cir.yield
+  }, resume : {
+    cir.yield
+  },)
+  cir.return
+}
+
+// CHECK:  cir.func coroutine @checkPrintParse
+// CHECK:  cir.await(user, ready : {
+// CHECK:    cir.condition(%arg0)
+// CHECK:  }, suspend : {
+// CHECK:    cir.yield
+// CHECK:  }, resume : {
+// CHECK:    cir.yield
+// CHECK:  },)
diff --git a/clang/test/CIR/IR/bit.cir b/clang/test/CIR/IR/bit.cir
new file mode 100644
index 000000000000..7a6d4d2e4bdb
--- /dev/null
+++ b/clang/test/CIR/IR/bit.cir
@@ -0,0 +1,75 @@
+// RUN: cir-opt %s | cir-opt | FileCheck %s
+
+!s8i = !cir.int<s, 8>
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!u8i = !cir.int<u, 8>
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+
+module {
+  cir.func @test() {
+    %s8 = cir.const #cir.int<1> : !s8i
+    %s16 = cir.const #cir.int<1> : !s16i
+    %s32 = cir.const #cir.int<1> : !s32i
+    %s64 = cir.const #cir.int<1> : !s64i
+    %u8 = cir.const #cir.int<1> : !u8i
+    %u16 = cir.const #cir.int<1> : !u16i
+    %u32 = cir.const #cir.int<1> : !u32i
+    %u64 = cir.const #cir.int<1> : !u64i
+
+    %2 = cir.bit.clrsb(%s32 : !s32i) : !s32i
+    %3 = cir.bit.clrsb(%s64 : !s64i) : !s32i
+
+    %4 = cir.bit.clz(%u16 : !u16i) : !s32i
+    %5 = cir.bit.clz(%u32 : !u32i) : !s32i
+    %6 = cir.bit.clz(%u64 : !u64i) : !s32i
+
+    %7 = cir.bit.ctz(%u16 : !u16i) : !s32i
+    %8 = cir.bit.ctz(%u32 : !u32i) : !s32i
+    %9 = cir.bit.ctz(%u64 : !u64i) : !s32i
+
+    %10 = cir.bit.ffs(%s32 : !s32i) : !s32i
+    %11 = cir.bit.ffs(%s64 : !s64i) : !s32i
+
+    %12 = cir.bit.parity(%u32 : !u32i) : !s32i
+    %13 = cir.bit.parity(%u64 : !u64i) : !s32i
+
+    %14 = cir.bit.popcount(%u16 : !u16i) : !s32i
+    %15 = cir.bit.popcount(%u32 : !u32i) : !s32i
+    %16 = cir.bit.popcount(%u64 : !u64i) : !s32i
+
+    cir.return
+  }
+}
+
+//      CHECK: module {
+// CHECK-NEXT:   cir.func @test() {
+// CHECK-NEXT:     %0 = cir.const #cir.int<1> : !s8i
+// CHECK-NEXT:     %1 = cir.const #cir.int<1> : !s16i
+// CHECK-NEXT:     %2 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:     %3 = cir.const #cir.int<1> : !s64i
+// CHECK-NEXT:     %4 = cir.const #cir.int<1> : !u8i
+// CHECK-NEXT:     %5 = cir.const #cir.int<1> : !u16i
+// CHECK-NEXT:     %6 = cir.const #cir.int<1> : !u32i
+// CHECK-NEXT:     %7 = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT:     %8 = cir.bit.clrsb(%2 : !s32i) : !s32i
+// CHECK-NEXT:     %9 = cir.bit.clrsb(%3 : !s64i) : !s32i
+// CHECK-NEXT:     %10 = cir.bit.clz(%5 : !u16i) : !s32i
+// CHECK-NEXT:     %11 = cir.bit.clz(%6 : !u32i) : !s32i
+// CHECK-NEXT:     %12 = cir.bit.clz(%7 : !u64i) : !s32i
+// CHECK-NEXT:     %13 = cir.bit.ctz(%5 : !u16i) : !s32i
+// CHECK-NEXT:     %14 = cir.bit.ctz(%6 : !u32i) : !s32i
+// CHECK-NEXT:     %15 = cir.bit.ctz(%7 : !u64i) : !s32i
+// CHECK-NEXT:     %16 = cir.bit.ffs(%2 : !s32i) : !s32i
+// CHECK-NEXT:     %17 = cir.bit.ffs(%3 : !s64i) : !s32i
+// CHECK-NEXT:     %18 = cir.bit.parity(%6 : !u32i) : !s32i
+// CHECK-NEXT:     %19 = cir.bit.parity(%7 : !u64i) : !s32i
+// CHECK-NEXT:     %20 = cir.bit.popcount(%5 : !u16i) : !s32i
+// CHECK-NEXT:     %21 = cir.bit.popcount(%6 : !u32i) : !s32i
+// CHECK-NEXT:     %22 = cir.bit.popcount(%7 : !u64i) : !s32i
+// CHECK-NEXT:     cir.return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/IR/branch.cir b/clang/test/CIR/IR/branch.cir
new file mode 100644
index 000000000000..7f418908a94c
--- /dev/null
+++ b/clang/test/CIR/IR/branch.cir
@@ -0,0 +1,21 @@
+// RUN: cir-opt %s | FileCheck %s
+
+cir.func @test_branch_parsing(%arg0: !cir.bool) {
+  // CHECK: cir.br ^bb1
+  cir.br ^bb1
+^bb1:
+  // CHECK: cir.br ^bb2(%arg0 : !cir.bool)
+  cir.br ^bb2(%arg0 : !cir.bool)
+// CHECK: ^bb2(%0: !cir.bool):
+^bb2(%x: !cir.bool):
+  cir.return
+}
+
+cir.func @test_conditional_branch_parsing(%arg0 : !cir.bool) {
+  // CHEK: cir.brcond %arg0 ^bb1, ^bb2
+  cir.brcond %arg0 ^bb1, ^bb2
+^bb1:
+  cir.return
+^bb2:
+  cir.return
+}
diff --git a/clang/test/CIR/IR/call-op-call-conv.cir b/clang/test/CIR/IR/call-op-call-conv.cir
new file mode 100644
index 000000000000..b47e1226e10b
--- /dev/null
+++ b/clang/test/CIR/IR/call-op-call-conv.cir
@@ -0,0 +1,27 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+!s32i = !cir.int<s, 32>
+!fnptr = !cir.ptr<!cir.func<!s32i (!s32i)>>
+
+module {
+  cir.func @my_add(%a: !s32i, %b: !s32i) -> !s32i cc(spir_function) {
+    %c = cir.binop(add, %a, %b) : !s32i
+    cir.return %c : !s32i
+  }
+
+  cir.func @ind(%fnptr: !fnptr, %a : !s32i) {
+    %1 = cir.call %fnptr(%a) : (!fnptr, !s32i) -> !s32i cc(spir_kernel)
+    %2 = cir.call %fnptr(%a) : (!fnptr, !s32i) -> !s32i cc(spir_function)
+
+    %3 = cir.try_call @my_add(%1, %2) ^continue, ^landing_pad : (!s32i, !s32i) -> !s32i cc(spir_function)
+  ^continue:
+    cir.br ^landing_pad
+  ^landing_pad:
+    cir.return
+  }
+}
+
+// CHECK: %{{[0-9]+}} = cir.call %arg0(%arg1) : (!cir.ptr<!cir.func<!s32i (!s32i)>>, !s32i) -> !s32i cc(spir_kernel)
+// CHECK: %{{[0-9]+}} = cir.call %arg0(%arg1) : (!cir.ptr<!cir.func<!s32i (!s32i)>>, !s32i) -> !s32i cc(spir_function)
+// CHECK: %{{[0-9]+}} = cir.try_call @my_add(%{{[0-9]+}}, %{{[0-9]+}}) ^{{.+}}, ^{{.+}} : (!s32i, !s32i) -> !s32i cc(spir_function)
diff --git a/clang/test/CIR/IR/call.cir b/clang/test/CIR/IR/call.cir
new file mode 100644
index 000000000000..0b1fc68622f8
--- /dev/null
+++ b/clang/test/CIR/IR/call.cir
@@ -0,0 +1,31 @@
+// RUN: cir-opt %s | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!fnptr = !cir.ptr<!cir.func<!s32i (!s32i)>>
+
+#fn_attr = #cir<extra({inline = #cir.inline<no>, optnone = #cir.optnone})>
+#fn_attr1 = #cir<extra({nothrow = #cir.nothrow})>
+
+module {
+  // Excerpt of std::array<int, 8192ul>::operator[](unsigned long)
+  cir.func linkonce_odr @_ZNSt5arrayIiLm8192EEixEm(%arg0: !s32i) -> !s32i extra(#fn_attr) {
+    cir.return %arg0 : !s32i
+  }
+
+  cir.func private @my_add(%a: !s32i, %b: !s32i) -> !s32i cc(spir_function) extra(#fn_attr)
+
+  cir.func @ind(%fnptr: !fnptr, %a : !s32i) {
+    %r = cir.call %fnptr(%a) : (!fnptr, !s32i) -> !s32i
+// CHECK: %0 = cir.call %arg0(%arg1) : (!cir.ptr<!cir.func<!s32i (!s32i)>>, !s32i) -> !s32i
+    // Check parse->pretty-print round-trip on extra() attribute
+    %7 = cir.call @_ZNSt5arrayIiLm8192EEixEm(%a) : (!s32i) -> !s32i extra(#fn_attr1)
+// CHECK: %1 = cir.call @_ZNSt5arrayIiLm8192EEixEm(%arg1) : (!s32i) -> !s32i extra(#fn_attr1)
+    // Frankenstein's example from clang/test/CIR/Lowering/call-op-call-conv.cir
+    %3 = cir.try_call @my_add(%r, %7) ^continue, ^landing_pad : (!s32i, !s32i) -> !s32i cc(spir_function) extra(#fn_attr1)
+// CHECK: %2 = cir.try_call @my_add(%0, %1) ^bb1, ^bb2 : (!s32i, !s32i) -> !s32i cc(spir_function) extra(#fn_attr1)
+  ^continue:
+    cir.br ^landing_pad
+  ^landing_pad:
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/IR/cast.cir b/clang/test/CIR/IR/cast.cir
new file mode 100644
index 000000000000..8523439a7c2c
--- /dev/null
+++ b/clang/test/CIR/IR/cast.cir
@@ -0,0 +1,33 @@
+// RUN: cir-opt %s | cir-opt | FileCheck %s
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @yolo(%arg0 : !s32i) {
+    %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["x", init]
+    %a = cir.cast (int_to_bool, %arg0 : !s32i), !cir.bool
+
+    %3 = cir.cast(array_to_ptrdecay, %0 : !cir.ptr<!cir.array<!s32i x 10>>), !cir.ptr<!s32i>
+    %4 = cir.const #cir.int<0> : !s32i
+    cir.return
+  }
+
+  cir.func @bitcast(%p: !cir.ptr<!s32i>) {
+    %2 = cir.cast(bitcast, %p : !cir.ptr<!s32i>), !cir.ptr<f32>
+    cir.return
+  }
+
+  cir.func @addrspace_cast(%arg0: !cir.ptr<!s32i>) {
+    %0 = cir.cast(address_space, %arg0 : !cir.ptr<!s32i>), !cir.ptr<!s32i, addrspace(target<2>)>
+    cir.return
+  }
+}
+
+// CHECK: cir.func @yolo(%arg0: !s32i)
+// CHECK: %1 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+// CHECK: %2 = cir.cast(array_to_ptrdecay, %0 : !cir.ptr<!cir.array<!s32i x 10>>), !cir.ptr<!s32i>
+
+// CHECK: cir.func @bitcast
+// CHECK: %0 = cir.cast(bitcast, %arg0 : !cir.ptr<!s32i>), !cir.ptr<f32>
+
+// CHECK: cir.func @addrspace_cast
+// CHECK: %0 = cir.cast(address_space, %arg0 : !cir.ptr<!s32i>), !cir.ptr<!s32i, addrspace(target<2>)>
diff --git a/clang/test/CIR/IR/cir-ops.cir b/clang/test/CIR/IR/cir-ops.cir
new file mode 100644
index 000000000000..73a8de8c40cd
--- /dev/null
+++ b/clang/test/CIR/IR/cir-ops.cir
@@ -0,0 +1,105 @@
+// Test the CIR operations can parse and print correctly (roundtrip)
+
+// RUN: cir-opt %s | cir-opt | FileCheck %s
+!s32i = !cir.int<s, 32>
+!s8i = !cir.int<s, 8>
+!u64i = !cir.int<u, 64>
+
+module  {
+  cir.func @foo(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store align(1) atomic(seq_cst) %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %2 = cir.load align(1) atomic(seq_cst) %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %1 : !s32i
+  }
+
+  cir.func @f3() -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+    %1 = cir.const #cir.int<3> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %2 : !s32i
+  }
+
+  cir.func @if0(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    cir.store %arg0, %1 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.const #cir.int<0> : !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %4 = cir.cast(int_to_bool, %3 : !s32i), !cir.bool
+    cir.if %4 {
+      %6 = cir.const #cir.int<3> : !s32i
+      cir.store %6, %0 : !s32i, !cir.ptr<!s32i>
+    } else {
+      %6 = cir.const #cir.int<4> : !s32i
+      cir.store %6, %0 : !s32i, !cir.ptr<!s32i>
+    }
+    %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %5 : !s32i
+  }
+
+  cir.func @s0() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x"] {alignment = 4 : i64}
+    cir.scope {
+      %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y"] {alignment = 4 : i64}
+    }
+    cir.return
+  }
+
+  cir.func @os() {
+    %0 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["m", init] {alignment = 8 : i64}
+    %3 = cir.load %0 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+    %4 = cir.objsize(%3 : <!s8i>, max) -> !u64i
+    %5 = cir.objsize(%3 : <!s8i>, min) -> !u64i
+    cir.return
+  }
+}
+
+// CHECK: module  {
+
+// CHECK-NEXT:   cir.func @foo(%arg0: !s32i) -> !s32i {
+// CHECK-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+// CHECK-NEXT:     cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     cir.store align(1) atomic(seq_cst) %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     %2 = cir.load align(1) atomic(seq_cst) %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     cir.return %1 : !s32i
+// CHECK-NEXT:   }
+
+// CHECK-NEXT:   cir.func @f3() -> !s32i {
+// CHECK-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+// CHECK-NEXT:     %1 = cir.const #cir.int<3> : !s32i
+// CHECK-NEXT:     cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     cir.return %2 : !s32i
+// CHECK-NEXT:   }
+
+// CHECK:   @if0(%arg0: !s32i) -> !s32i {
+// CHECK:   %4 = cir.cast(int_to_bool, %3 : !s32i), !cir.bool
+// CHECK-NEXT:   cir.if %4 {
+// CHECK-NEXT:     %6 = cir.const #cir.int<3> : !s32i
+// CHECK-NEXT:     cir.store %6, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   } else {
+// CHECK-NEXT:     %6 = cir.const #cir.int<4> : !s32i
+// CHECK-NEXT:     cir.store %6, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   }
+
+// CHECK: cir.func @s0() {
+// CHECK-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x"] {alignment = 4 : i64}
+// CHECK-NEXT:     cir.scope {
+// CHECK-NEXT:       %1 = cir.alloca !s32i,  !cir.ptr<!s32i>, ["y"] {alignment = 4 : i64}
+// CHECK-NEXT:     }
+
+// CHECK: cir.func @os() {
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["m", init] {alignment = 8 : i64}
+// CHECK-NEXT:   %1 = cir.load %0 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CHECK-NEXT:   %2 = cir.objsize(%1 : <!s8i>, max) -> !u64i
+// CHECK-NEXT:   %3 = cir.objsize(%1 : <!s8i>, min) -> !u64i
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+// CHECK: }
diff --git a/clang/test/CIR/IR/constptrattr.cir b/clang/test/CIR/IR/constptrattr.cir
new file mode 100644
index 000000000000..21e14283b320
--- /dev/null
+++ b/clang/test/CIR/IR/constptrattr.cir
@@ -0,0 +1,10 @@
+// RUN: cir-opt %s | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+cir.global external @const_ptr = #cir.ptr<4660 : i64> : !cir.ptr<!s32i>
+// CHECK: cir.global external @const_ptr = #cir.ptr<4660 : i64> : !cir.ptr<!s32i>
+cir.global external @signed_ptr = #cir.ptr<-1 : i64> : !cir.ptr<!s32i>
+// CHECK: cir.global external @signed_ptr = #cir.ptr<-1 : i64> : !cir.ptr<!s32i>
+cir.global external @null_ptr = #cir.ptr<null> : !cir.ptr<!s32i>
+// CHECK: cir.global external @null_ptr = #cir.ptr<null> : !cir.ptr<!s32i>
diff --git a/clang/test/CIR/IR/copy.cir b/clang/test/CIR/IR/copy.cir
new file mode 100644
index 000000000000..9a689036985e
--- /dev/null
+++ b/clang/test/CIR/IR/copy.cir
@@ -0,0 +1,9 @@
+// RUN: cir-opt %s
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @shouldParseCopyOp(%arg0 : !cir.ptr<!s32i>, %arg1 : !cir.ptr<!s32i>) {
+    cir.copy %arg0 to %arg1 : !cir.ptr<!s32i>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/IR/data-member-ptr.cir b/clang/test/CIR/IR/data-member-ptr.cir
new file mode 100644
index 000000000000..7078510bc594
--- /dev/null
+++ b/clang/test/CIR/IR/data-member-ptr.cir
@@ -0,0 +1,32 @@
+// RUN: cir-opt %s | cir-opt | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!ty_Foo = !cir.struct<struct "Foo" {!s32i}>
+
+module {
+  cir.func @null_member() {
+    %0 = cir.const #cir.data_member<null> : !cir.data_member<!s32i in !ty_Foo>
+    cir.return
+  }
+
+  cir.func @get_runtime_member(%arg0: !cir.ptr<!ty_Foo>) {
+    %0 = cir.const #cir.data_member<0> : !cir.data_member<!s32i in !ty_Foo>
+    %1 = cir.get_runtime_member %arg0[%0 : !cir.data_member<!s32i in !ty_Foo>] : !cir.ptr<!ty_Foo> -> !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+//      CHECK: module {
+
+// CHECK-NEXT:   cir.func @null_member() {
+// CHECK-NEXT:     %0 = cir.const #cir.data_member<null> : !cir.data_member<!s32i in !ty_Foo>
+// CHECK-NEXT:     cir.return
+// CHECK-NEXT:   }
+
+// CHECK-NEXT:   cir.func @get_runtime_member(%arg0: !cir.ptr<!ty_Foo>) {
+// CHECK-NEXT:      %0 = cir.const #cir.data_member<0> : !cir.data_member<!s32i in !ty_Foo>
+// CHECK-NEXT:      %1 = cir.get_runtime_member %arg0[%0 : !cir.data_member<!s32i in !ty_Foo>] : !cir.ptr<!ty_Foo> -> !cir.ptr<!s32i>
+// CHECK-NEXT:      cir.return
+// CHECK-NEXT:   }
+
+//      CHECK: }
diff --git a/clang/test/CIR/IR/do-while.cir b/clang/test/CIR/IR/do-while.cir
new file mode 100644
index 000000000000..6664b4cfe4bf
--- /dev/null
+++ b/clang/test/CIR/IR/do-while.cir
@@ -0,0 +1,18 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+cir.func @testPrintingAndParsing (%arg0 : !cir.bool) -> !cir.void {
+  cir.do {
+    cir.yield
+  } while {
+    cir.condition(%arg0)
+  }
+  cir.return
+}
+
+// CHECK: testPrintingAndParsing
+// CHECK: cir.do {
+// CHECK:   cir.yield
+// CHECK: } while {
+// CHECK:   cir.condition(%arg0)
+// CHECK: }
diff --git a/clang/test/CIR/IR/exceptions.cir b/clang/test/CIR/IR/exceptions.cir
new file mode 100644
index 000000000000..25aa67357c7d
--- /dev/null
+++ b/clang/test/CIR/IR/exceptions.cir
@@ -0,0 +1,69 @@
+// RUN: cir-opt %s | FileCheck %s
+// XFAIL: *
+
+!s32i = !cir.int<s, 32>
+!s8i = !cir.int<s, 8>
+!u64i = !cir.int<u, 64>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+module {
+    // Generated from clang/test/CIR/CodeGen/try-catch.cpp
+    cir.func @_Z2tcv() -> !u64i {
+    %0 = cir.alloca !u64i, !cir.ptr<!u64i>, ["__retval"] {alignment = 8 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %3 = cir.alloca !u64i, !cir.ptr<!u64i>, ["z"] {alignment = 8 : i64}
+    %4 = cir.const #cir.int<50> : !s32i
+    cir.store %4, %1 : !s32i, !cir.ptr<!s32i>
+    %5 = cir.const #cir.int<3> : !s32i
+    cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %8 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["msg"] {alignment = 8 : i64}
+      %9 = cir.alloca !s32i, !cir.ptr<!s32i>, ["idx"] {alignment = 4 : i64}
+      cir.try {
+        %10 = cir.alloca !cir.ptr<!cir.eh.info>, !cir.ptr<!cir.ptr<!cir.eh.info>>, ["__exception_ptr"] {alignment = 1 : i64}
+        %11 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+        %12 = cir.const #cir.int<4> : !s32i
+        cir.store %12, %11 : !s32i, !cir.ptr<!s32i>
+        %13 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+        %14 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %15 = cir.try_call exception(%10) @_Z8divisionii(%13, %14) : (!s32i, !s32i) -> !cir.double
+        %16 = cir.cast(float_to_int, %15 : !cir.double), !u64i
+        cir.store %16, %3 : !u64i, !cir.ptr<!u64i>
+        %17 = cir.load %11 : !cir.ptr<!s32i>, !s32i
+        %18 = cir.unary(inc, %17) : !s32i, !s32i
+        cir.store %18, %11 : !s32i, !cir.ptr<!s32i>
+        %19 = cir.load %10 : !cir.ptr<!cir.ptr<!cir.eh.info>>, !cir.ptr<!cir.eh.info>
+        cir.yield
+      } catch [type #cir.global_view<@_ZTIi> : !cir.ptr<!u8i> {
+        %10 = cir.catch_param -> !cir.ptr<!s32i>
+        %11 = cir.load %10 : !cir.ptr<!s32i>, !s32i
+        cir.store %11, %9 : !s32i, !cir.ptr<!s32i>
+        %12 = cir.const #cir.int<98> : !s32i
+        %13 = cir.cast(integral, %12 : !s32i), !u64i
+        cir.store %13, %3 : !u64i, !cir.ptr<!u64i>
+        %14 = cir.load %9 : !cir.ptr<!s32i>, !s32i
+        %15 = cir.unary(inc, %14) : !s32i, !s32i
+        cir.store %15, %9 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }, type #cir.global_view<@_ZTIPKc> : !cir.ptr<!u8i> {
+        %10 = cir.catch_param -> !cir.ptr<!s8i>
+        cir.store %10, %8 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+        %11 = cir.const #cir.int<99> : !s32i
+        %12 = cir.cast(integral, %11 : !s32i), !u64i
+        cir.store %12, %3 : !u64i, !cir.ptr<!u64i>
+        %13 = cir.load %8 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+        %14 = cir.const #cir.int<0> : !s32i
+        %15 = cir.ptr_stride(%13 : !cir.ptr<!s8i>, %14 : !s32i), !cir.ptr<!s8i>
+        cir.yield
+      }, #cir.unwind {
+        cir.resume
+      }]
+    }
+    %6 = cir.load %3 : !cir.ptr<!u64i>, !u64i
+    cir.store %6, %0 : !u64i, !cir.ptr<!u64i>
+    %7 = cir.load %0 : !cir.ptr<!u64i>, !u64i
+    cir.return %7 : !u64i
+  }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/IR/for.cir b/clang/test/CIR/IR/for.cir
new file mode 100644
index 000000000000..62b82976cc68
--- /dev/null
+++ b/clang/test/CIR/IR/for.cir
@@ -0,0 +1,22 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+cir.func @testPrintingParsing(%arg0 : !cir.bool) {
+  cir.for : cond {
+    cir.condition(%arg0)
+  } body {
+    cir.yield
+  } step {
+    cir.yield
+  }
+  cir.return
+}
+
+// CHECK: @testPrintingParsing
+// CHECK: cir.for : cond {
+// CHECK:   cir.condition(%arg0)
+// CHECK: } body {
+// CHECK:   cir.yield
+// CHECK: } step {
+// CHECK:   cir.yield
+// CHECK: }
diff --git a/clang/test/CIR/IR/func-call-conv.cir b/clang/test/CIR/IR/func-call-conv.cir
new file mode 100644
index 000000000000..331b8fa23d7a
--- /dev/null
+++ b/clang/test/CIR/IR/func-call-conv.cir
@@ -0,0 +1,27 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+!s32i = !cir.int<s, 32>
+
+#fn_attr = #cir<extra({inline = #cir.inline<no>})>
+
+module {
+    // CHECK: cir.func @foo() {
+    cir.func @foo() cc(c) {
+        cir.return
+    }
+
+    // CHECK: cir.func @bar() cc(spir_kernel)
+    cir.func @bar() cc(spir_kernel) {
+        cir.return
+    }
+
+    // CHECK: cir.func @bar_alias() alias(@bar) cc(spir_kernel)
+    cir.func @bar_alias() alias(@bar) cc(spir_kernel)
+
+    // CHECK: cir.func @baz() cc(spir_function) extra(#fn_attr)
+    cir.func @baz() cc(spir_function) extra(#fn_attr) {
+        cir.return
+    }
+}
+
diff --git a/clang/test/CIR/IR/func-dsolocal-parser.cir b/clang/test/CIR/IR/func-dsolocal-parser.cir
new file mode 100644
index 000000000000..9737279ce144
--- /dev/null
+++ b/clang/test/CIR/IR/func-dsolocal-parser.cir
@@ -0,0 +1,13 @@
+// RUN: cir-opt %s | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+#fn_attr = #cir<extra({inline = #cir.inline<no>, nothrow = #cir.nothrow, optnone = #cir.optnone})>
+module {
+    cir.func dsolocal @foo(%arg0: !s32i ) extra(#fn_attr) {
+        %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+        cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+        cir.return
+    }
+}
+
+// CHECK: cir.func @foo(%arg0: !s32i) extra(#fn_attr)
diff --git a/clang/test/CIR/IR/func.cir b/clang/test/CIR/IR/func.cir
new file mode 100644
index 000000000000..a1468e6462f4
--- /dev/null
+++ b/clang/test/CIR/IR/func.cir
@@ -0,0 +1,56 @@
+// RUN: cir-opt %s | FileCheck %s
+!s32i = !cir.int<s, 32>
+!u8i = !cir.int<u, 8>
+module {
+  cir.func @l0() {
+    cir.return
+  }
+
+  // Should print/parse function aliases.
+  // CHECK: cir.func @l1() alias(@l0)
+  cir.func @l1() alias(@l0)
+
+  // Should print/parse variadic function types.
+  // CHECK: cir.func private @variadic(!s32i, ...) -> !s32i
+  cir.func private @variadic(!s32i, ...) -> !s32i
+
+  // Should accept call with only the required parameters.
+  cir.func @variadic_call_1(%0: !s32i) -> !s32i {
+    %9 = cir.call @variadic(%0) : (!s32i) -> !s32i
+    cir.return %9 : !s32i
+  }
+
+  // Should accept calls with variadic parameters.
+  cir.func @variadic_call_2(%0: !s32i, %1: !s32i,  %2: !u8i) -> !s32i {
+    %9 = cir.call @variadic(%0, %1, %2) : (!s32i, !s32i, !u8i) -> !s32i
+    cir.return %9 : !s32i
+  }
+
+  // Should parse custom assembly format.
+  cir.func @parse_func_type() -> () {
+    %1 = cir.alloca !cir.ptr<!cir.func<!s32i (!s32i, ...)>>, !cir.ptr<!cir.ptr<!cir.func<!s32i (!s32i, ...)>>>, ["fn", init] {alignment = 8 : i64}
+    cir.return
+  }
+
+  // Should drop void return types.
+  // CHECK: cir.func @parse_explicit_void_func() {
+  cir.func @parse_explicit_void_func() -> !cir.void {
+    cir.return
+  }
+
+  // Should print/parse omitted void return type.
+  // CHECK: cir.func @parse_func_type_with_omitted_void() {
+  cir.func @parse_func_type_with_omitted_void() {
+    cir.return
+  }
+
+  // Should print/parse variadic no-proto functions.
+  // CHECK: cir.func no_proto private @no_proto(...) -> !s32i
+  cir.func no_proto private @no_proto(...) -> !s32i
+
+  // Should print/parse argument and result attributes.
+  // CHECK: cir.func @parse_arg_res_attrs(%arg0: !u8i {cir.zeroext}) -> (!u8i {cir.zeroext}) {
+  cir.func @parse_arg_res_attrs(%0: !u8i {cir.zeroext}) -> (!u8i {cir.zeroext}) {
+    cir.return %0 : !u8i
+  }
+}
diff --git a/clang/test/CIR/IR/getmember.cir b/clang/test/CIR/IR/getmember.cir
new file mode 100644
index 000000000000..d9cecc0dea9c
--- /dev/null
+++ b/clang/test/CIR/IR/getmember.cir
@@ -0,0 +1,24 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+
+!ty_Class = !cir.struct<class "Class" {!u16i, !u32i}>
+!ty_Incomplete = !cir.struct<struct "Incomplete" incomplete>
+!ty_Struct = !cir.struct<struct "Struct" {!u16i, !u32i}>
+
+module  {
+  cir.func @shouldGetStructMember(%arg0 : !cir.ptr<!ty_Struct>) {
+    // CHECK: cir.get_member %arg0[1] {name = "test"} : !cir.ptr<!ty_Struct> -> !cir.ptr<!u32i>
+    %0 = cir.get_member %arg0[1] {name = "test"} : !cir.ptr<!ty_Struct> -> !cir.ptr<!u32i>
+    cir.return
+  }
+
+  // FIXME: remove bypass once codegen for CIR class records is patched.
+  cir.func @shouldBypassMemberTypeCheckForClassRecords(%arg0 : !cir.ptr<!ty_Class>) {
+    // CHECK: cir.get_member %arg0[1] {name = "test"} : !cir.ptr<!ty_Class> -> !cir.ptr<!cir.ptr<!u32i>> 
+    %0 = cir.get_member %arg0[1] {name = "test"} : !cir.ptr<!ty_Class> -> !cir.ptr<!cir.ptr<!u32i>>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/IR/global.cir b/clang/test/CIR/IR/global.cir
new file mode 100644
index 000000000000..cb75684886af
--- /dev/null
+++ b/clang/test/CIR/IR/global.cir
@@ -0,0 +1,114 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+!s8i = !cir.int<s, 8>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!ty_Init = !cir.struct<class "Init" {!s8i} #cir.record.decl.ast>
+module {
+  cir.global external @a = #cir.int<3> : !s32i
+  cir.global external @rgb = #cir.const_array<[#cir.int<0> : !s8i, #cir.int<-23> : !s8i, #cir.int<33> : !s8i] : !cir.array<!s8i x 3>>
+  cir.global external @b = #cir.const_array<"example\00" : !cir.array<!s8i x 8>>
+  cir.global external @rgb2 = #cir.const_struct<{#cir.int<0> : !s8i, #cir.int<5> : !s64i, #cir.ptr<null> : !cir.ptr<!s8i>}> : !cir.struct<struct {!s8i, !s64i, !cir.ptr<!s8i>}>
+  cir.global "private" constant internal @".str" : !cir.array<!s8i x 8> {alignment = 1 : i64}
+  cir.global "private" internal @c : !s32i
+  cir.global "private" constant internal @".str2" = #cir.const_array<"example\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8> {alignment = 1 : i64}
+  cir.global external @s = #cir.global_view<@".str2"> : !cir.ptr<!s8i>
+  cir.func @use_global() {
+    %0 = cir.get_global @a : !cir.ptr<!s32i>
+    cir.return
+  }
+  cir.global external @table = #cir.global_view<@s> : !cir.ptr<!s8i>
+  cir.global external @elt_ptr = #cir.global_view<@rgb, [1]> : !cir.ptr<!s8i>
+  cir.global external @table_of_ptrs = #cir.const_array<[#cir.global_view<@rgb, [1]> : !cir.ptr<!s8i>] : !cir.array<!cir.ptr<!s8i> x 1>>
+
+  // Note MLIR requires "private" for global declarations, should get
+  // rid of this somehow in favor of clarity?
+  cir.global "private" external @_ZTVN10__cxxabiv120__si_class_type_infoE : !cir.ptr<!s32i>
+  cir.global "private" constant external @type_info_A : !cir.ptr<!s8i>
+  cir.global constant external @type_info_name_B = #cir.const_array<"1B\00" : !cir.array<!s8i x 3>>
+
+  cir.global external @type_info_B = #cir.typeinfo<{
+    #cir.global_view<@_ZTVN10__cxxabiv120__si_class_type_infoE, [2]> : !cir.ptr<!s8i>,
+    #cir.global_view<@type_info_name_B> : !cir.ptr<!s8i>,
+    #cir.global_view<@type_info_A> : !cir.ptr<!s8i>}>
+  : !cir.struct<struct {!cir.ptr<!s8i>, !cir.ptr<!s8i>, !cir.ptr<!s8i>}>
+  cir.func private @_ZN4InitC1Eb(!cir.ptr<!ty_Init>, !s8i)
+  cir.func private @_ZN4InitD1Ev(!cir.ptr<!ty_Init>)
+  cir.global "private" internal @_ZL8__ioinit = ctor : !ty_Init {
+    %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!ty_Init>
+    %1 = cir.const #cir.int<3> : !s8i
+    cir.call @_ZN4InitC1Eb(%0, %1) : (!cir.ptr<!ty_Init>, !s8i) -> ()
+  } dtor {
+    %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!ty_Init>
+    cir.call @_ZN4InitD1Ev(%0) : (!cir.ptr<!ty_Init>) -> ()
+  }
+
+  cir.func @f31() global_ctor {
+    cir.return
+  }
+
+  cir.func @f32() global_ctor(777) {
+    cir.return
+  }
+
+  cir.func @f33() global_dtor {
+    cir.return
+  }
+
+  cir.func @f34() global_dtor(777) {
+    cir.return
+  }
+
+  cir.global external tls_dyn @model0 = #cir.int<0> : !s32i
+  cir.global external tls_local_dyn @model1 = #cir.int<0> : !s32i
+  cir.global external tls_init_exec @model2 = #cir.int<0> : !s32i
+  cir.global external tls_local_exec @model3 = #cir.int<0> : !s32i
+
+  cir.global "private" external tls_dyn @batata : !s32i
+  cir.func @f35() {
+    %0 = cir.get_global thread_local @batata : !cir.ptr<!s32i>
+    cir.return
+  }
+
+  cir.global external addrspace(offload_global) @addrspace1 = #cir.int<1> : !s32i
+  cir.global "private" internal addrspace(offload_local) @addrspace2 : !s32i
+  cir.global external addrspace(target<1>) @addrspace3 = #cir.int<3> : !s32i
+}
+
+// CHECK: cir.global external @a = #cir.int<3> : !s32i
+// CHECK: cir.global external @rgb = #cir.const_array<[#cir.int<0> : !s8i, #cir.int<-23> : !s8i, #cir.int<33> : !s8i]> : !cir.array<!s8i x 3>
+// CHECK: cir.global external @b = #cir.const_array<"example\00" : !cir.array<!s8i x 8>>
+// CHECK: cir.global "private" constant internal @".str" : !cir.array<!s8i x 8> {alignment = 1 : i64}
+// CHECK: cir.global "private" internal @c : !s32i
+// CHECK: cir.global "private" constant internal @".str2" = #cir.const_array<"example\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8> {alignment = 1 : i64}
+// CHECK: cir.global external @s = #cir.global_view<@".str2"> : !cir.ptr<!s8i>
+
+
+// CHECK: cir.func @use_global()
+// CHECK-NEXT: %0 = cir.get_global @a : !cir.ptr<!s32i>
+
+// CHECK:      cir.global "private" internal @_ZL8__ioinit = ctor : !ty_Init {
+// CHECK-NEXT:  %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!ty_Init>
+// CHECK-NEXT:  %1 = cir.const #cir.int<3> : !s8i
+// CHECK-NEXT:  cir.call @_ZN4InitC1Eb(%0, %1) : (!cir.ptr<!ty_Init>, !s8i) -> ()
+// CHECK-NEXT: }
+
+// CHECK: cir.func @f31() global_ctor
+// CHECK: cir.func @f32() global_ctor(777)
+// CHECK: cir.func @f33() global_dtor
+// CHECK: cir.func @f34() global_dtor(777)
+
+// CHECK: cir.global external tls_dyn @model0 = #cir.int<0> : !s32i
+// CHECK: cir.global external tls_local_dyn @model1 = #cir.int<0> : !s32i
+// CHECK: cir.global external tls_init_exec @model2 = #cir.int<0> : !s32i
+// CHECK: cir.global external tls_local_exec @model3 = #cir.int<0> : !s32i
+
+// CHECK: cir.global "private" external tls_dyn @batata : !s32i
+// CHECK: cir.func @f35() {
+// CHECK:   %0 = cir.get_global thread_local @batata : !cir.ptr<!s32i>
+// CHECK:   cir.return
+// CHECK: }
+
+// CHECK: cir.global external addrspace(offload_global) @addrspace1 = #cir.int<1> : !s32i
+// CHECK: cir.global "private" internal addrspace(offload_local) @addrspace2 : !s32i
+// CHECK: cir.global external addrspace(target<1>) @addrspace3 = #cir.int<3> : !s32i
diff --git a/clang/test/CIR/IR/inlineAttr.cir b/clang/test/CIR/IR/inlineAttr.cir
new file mode 100644
index 000000000000..3d51efd2b258
--- /dev/null
+++ b/clang/test/CIR/IR/inlineAttr.cir
@@ -0,0 +1,15 @@
+// RUN: cir-opt %s | FileCheck %s -check-prefix=CIR
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+
+#fn_attr = #cir<extra({inline = #cir.inline<no>})>
+
+module {
+  cir.func @l0() extra(#fn_attr)  {
+    cir.return
+  }
+}
+
+// CIR: #fn_attr = #cir<extra({inline = #cir.inline<no>})>
+// CIR: cir.func @l0() extra(#fn_attr) {
+
+// MLIR: llvm.func @l0() attributes {cir.extra_attrs = #fn_attr, global_visibility = #cir<visibility default>}
diff --git a/clang/test/CIR/IR/int.cir b/clang/test/CIR/IR/int.cir
new file mode 100644
index 000000000000..af972da9340b
--- /dev/null
+++ b/clang/test/CIR/IR/int.cir
@@ -0,0 +1,39 @@
+// module {
+//     cir.global external @a = #cir.int<255> : !cir.int<u, 8>
+// }
+
+// RUN: cir-opt %s | FileCheck %s
+!s8i = !cir.int<s, 8>
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+
+!u8i = !cir.int<u, 8>
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+
+cir.func @validIntTypesAndAttributes() -> () {
+
+    %1 = cir.const #cir.int<-128> : !cir.int<s, 8>
+    %2 = cir.const #cir.int<127> : !cir.int<s, 8>
+    %3 = cir.const #cir.int<255> : !cir.int<u, 8>
+
+    %4 = cir.const #cir.int<-32768> : !cir.int<s, 16>
+    %5 = cir.const #cir.int<32767> : !cir.int<s, 16>
+    %6 = cir.const #cir.int<65535> : !cir.int<u, 16>
+
+    %7 = cir.const #cir.int<-2147483648> : !cir.int<s, 32>
+    %8 = cir.const #cir.int<2147483647> : !cir.int<s, 32>
+    %9 = cir.const #cir.int<4294967295> : !cir.int<u, 32>
+
+    // FIXME: MLIR is emitting a "too large" error for this one. Not sure why.
+    // %10 = cir.const #cir.int<-9223372036854775808> : !cir.int<s, 64>
+    %11 = cir.const #cir.int<9223372036854775807> : !cir.int<s, 64>
+    %12 = cir.const #cir.int<18446744073709551615> : !cir.int<u, 64>
+
+    cir.return
+}
+
+// No need to check stuff. If it parses, it's fine.
+// CHECK: cir.func @validIntTypesAndAttributes()
diff --git a/clang/test/CIR/IR/invalid-annotations.cir b/clang/test/CIR/IR/invalid-annotations.cir
new file mode 100644
index 000000000000..d7de2d5c5602
--- /dev/null
+++ b/clang/test/CIR/IR/invalid-annotations.cir
@@ -0,0 +1,32 @@
+// Test attempt to construct ill-formed global annotations
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+
+// expected-error @below {{invalid kind of attribute specified}}
+// expected-error @below {{failed to parse AnnotationAttr parameter 'name' which is to be a `StringAttr`}}
+cir.global  external @a = #cir.ptr<null> : !cir.ptr<!cir.double> [#cir.annotation<name = 18, args = ["21", 12 : i32]>]
+
+// -----
+
+// expected-error @below {{GlobalAnnotationValuesAttr should at least have one annotation}}
+module attributes {cir.global_annotations = #cir<global_annotations []>} {}
+
+// -----
+
+// expected-error @below {{Element of GlobalAnnotationValuesAttr annotations array must be an array}}
+module attributes {cir.global_annotations = #cir<global_annotations ["2"]>} {}
+
+// -----
+
+// expected-error @below {{Element of GlobalAnnotationValuesAttr annotations array must be a 2-element array}}
+module attributes {cir.global_annotations = #cir<global_annotations [["2",1,2]]>} {}
+
+// -----
+
+// expected-error @below {{Element of GlobalAnnotationValuesAttr annotationsarray must start with a string}}
+module attributes {cir.global_annotations = #cir<global_annotations [[2,[1,2]]]>} {}
+
+// -----
+
+// expected-error @below {{The second element of GlobalAnnotationValuesAttrannotations array element must be of type AnnotationValueAttr}}
+module attributes {cir.global_annotations = #cir<global_annotations [["2",[1,2]]]>} {}
diff --git a/clang/test/CIR/IR/invalid-llvm-intrinsic.cir b/clang/test/CIR/IR/invalid-llvm-intrinsic.cir
new file mode 100644
index 000000000000..38b53a4e1b30
--- /dev/null
+++ b/clang/test/CIR/IR/invalid-llvm-intrinsic.cir
@@ -0,0 +1,11 @@
+// Test attempt to construct ill-formed global annotations
+// RUN: cir-opt %s -verify-diagnostics
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+cir.func @foo()  {
+    %a = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"] {alignment = 4 : i64}
+    // expected-error @below {{'cir.llvm.intrinsic' op intrinsic name must start with 'llvm.'}}
+    %i = cir.llvm.intrinsic "ll.aarch64.ldxr" %a : (!cir.ptr<!s32i>) -> !s64i
+    cir.return
+}
diff --git a/clang/test/CIR/IR/invalid-opencl-vec-type-hint.cir b/clang/test/CIR/IR/invalid-opencl-vec-type-hint.cir
new file mode 100644
index 000000000000..9e57ad793bf8
--- /dev/null
+++ b/clang/test/CIR/IR/invalid-opencl-vec-type-hint.cir
@@ -0,0 +1,7 @@
+// RUN: cir-opt %s -verify-diagnostics -allow-unregistered-dialect
+
+// expected-error@+1 {{vec_type_hint must be a type from the CIR or LLVM dialect}}
+#fn_attr = #cir.cl.kernel_metadata<
+  vec_type_hint = !tensor<7xi8>,
+  vec_type_hint_signedness = 0
+>
diff --git a/clang/test/CIR/IR/invalid.cir b/clang/test/CIR/IR/invalid.cir
new file mode 100644
index 000000000000..a28569ac0b46
--- /dev/null
+++ b/clang/test/CIR/IR/invalid.cir
@@ -0,0 +1,1330 @@
+// Test attempts to build bogus CIR
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+!u32i = !cir.int<u, 32>
+
+cir.func @p0() {
+  // expected-error @below {{invalid kind of type specified}}
+  %1 = cir.const #cir.ptr<null> : !u32i
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+// expected-error@+2 {{invalid kind of type specified}}
+cir.func @b0() {
+  %1 = cir.const #cir.bool<true> : !u32i
+  cir.return
+}
+
+// -----
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+!u32i = !cir.int<u, 32>
+cir.func @if0() {
+  %0 = cir.const #true
+  // expected-error@+1 {{'cir.if' op  region control flow edge from Region #0 to parent results: source has 1 operands, but target successor needs 0}}
+  cir.if %0 {
+    %6 = cir.const #cir.int<3> : !u32i
+    cir.yield %6 : !u32i
+  }
+  cir.return
+}
+
+// -----
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+cir.func @yield0() {
+  %0 = cir.const #true
+  cir.if %0 { // expected-error {{custom op 'cir.if' multi-block region must not omit terminator}}
+    cir.br ^a
+  ^a:
+  }
+  cir.return
+}
+
+// -----
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+cir.func @yieldbreak() {
+  %0 = cir.const #true
+  cir.if %0 {
+    cir.break // expected-error {{op must be within a loop or switch}}
+  }
+  cir.return
+}
+
+// -----
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+cir.func @yieldcontinue() {
+  %0 = cir.const #true
+  cir.if %0 {
+    cir.continue // expected-error {{op must be within a loop}}
+  }
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @s0() {
+  %1 = cir.const #cir.int<2> : !s32i
+  cir.switch (%1 : !s32i) [
+    case (equal, 5) { // expected-error {{custom op 'cir.switch' case regions must be explicitly terminated}}
+      %2 = cir.const #cir.int<3> : !s32i
+    }
+  ]
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @s1() {
+  %1 = cir.const #cir.int<2> : !s32i
+  cir.switch (%1 : !s32i) [
+    case (equal, 5) {
+    }
+  ] // expected-error {{case region shall not be empty}}
+  cir.return
+}
+
+// -----
+
+cir.func @badstride(%x: !cir.ptr<!cir.int<s, 32>>) {
+  %idx = cir.const #cir.int<2> : !cir.int<s, 32>
+  %4 = cir.ptr_stride(%x : !cir.ptr<!cir.int<s, 32>>, %idx : !cir.int<s, 32>), !cir.ptr<!cir.float> // expected-error {{requires the same type for first operand and result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast0(%arg0: !u32i) {
+  %1 = cir.cast(int_to_bool, %arg0 : !u32i), !u32i // expected-error {{requires !cir.bool type for result}}
+  cir.return
+}
+
+// -----
+
+cir.func @cast1(%arg1: !cir.float) {
+  %1 = cir.cast(int_to_bool, %arg1 : !cir.float), !cir.bool // expected-error {{requires !cir.int type for source}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast2(%p: !cir.ptr<!u32i>) {
+  %2 = cir.cast(array_to_ptrdecay, %p : !cir.ptr<!u32i>), !cir.ptr<!u32i> // expected-error {{requires !cir.array pointee}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast3(%p: !cir.ptr<!u32i>) {
+  %0 = cir.alloca !cir.array<!u32i x 10>, !cir.ptr<!cir.array<!u32i x 10>>, ["x", init]
+  %2 = cir.cast(array_to_ptrdecay, %0 : !cir.ptr<!cir.array<!u32i x 10>>), !cir.ptr<!cir.float> // expected-error {{requires same type for array element and pointee result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast4(%p: !cir.ptr<!u32i>) {
+  %2 = cir.cast(bitcast, %p : !cir.ptr<!u32i>), !u32i // expected-error {{requires !cir.ptr or !cir.vector type for source and result}}
+  cir.return
+}
+
+// -----
+
+cir.func @cast5(%p: !cir.float) {
+  %2 = cir.cast(bool_to_float, %p : !cir.float), !cir.float // expected-error {{requires !cir.bool type for source}}
+  cir.return
+}
+
+// -----
+
+cir.func @cast6(%p: !cir.bool) {
+  %2 = cir.cast(bool_to_float, %p : !cir.bool), !cir.int<u, 32> // expected-error {{requires !cir.float type for result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast7(%p: !cir.ptr<!u32i>) {
+  %2 = cir.cast(ptr_to_bool, %p : !cir.ptr<!u32i>), !u32i // expected-error {{requires !cir.bool type for result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast8(%p: !u32i) {
+  %2 = cir.cast(ptr_to_bool, %p : !u32i), !cir.bool // expected-error {{requires !cir.ptr type for source}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast9(%p : !u32i) {
+  %2 = cir.cast(integral, %p : !u32i), !cir.float // expected-error {{requires !cir.int type for result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast10(%p : !cir.float) {
+  %2 = cir.cast(integral, %p : !cir.float), !u32i // expected-error {{requires !cir.int type for source}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast11(%p : !cir.float) {
+  %2 = cir.cast(floating, %p : !cir.float), !u32i // expected-error {{requires !cir.float type for source and result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast12(%p : !u32i) {
+  %2 = cir.cast(floating, %p : !u32i), !cir.float // expected-error {{requires !cir.float type for source and result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast13(%p : !u32i) {
+  %2 = cir.cast(float_to_int, %p : !u32i), !u32i // expected-error {{requires !cir.float type for source}}
+  cir.return
+}
+
+// -----
+
+cir.func @cast14(%p : !cir.float) {
+  %2 = cir.cast(float_to_int, %p : !cir.float), !cir.float // expected-error {{requires !cir.int type for result}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @cast15(%p : !cir.ptr<!u64i>) {
+  %2 = cir.cast(int_to_ptr, %p : !cir.ptr<!u64i>), !cir.ptr<!u64i> // expected-error {{requires !cir.int type for source}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @cast16(%p : !u64i) {
+  %2 = cir.cast(int_to_ptr, %p : !u64i), !u64i // expected-error {{requires !cir.ptr type for result}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @cast17(%p : !u64i) {
+  %2 = cir.cast(ptr_to_int, %p : !u64i), !u64i // expected-error {{requires !cir.ptr type for source}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @cast18(%p : !cir.ptr<!u64i>) {
+  %2 = cir.cast(ptr_to_int, %p : !cir.ptr<!u64i>), !cir.ptr<!u64i> // expected-error {{requires !cir.int type for result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast19(%p : !u32i) {
+  %2 = cir.cast(float_to_bool, %p : !u32i), !cir.bool // expected-error {{requires !cir.float type for source}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast20(%p : !cir.float) {
+  %2 = cir.cast(float_to_bool, %p : !cir.float), !u32i // expected-error {{requires !cir.bool type for result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast21(%p : !u32i) {
+  %2 = cir.cast(bool_to_int, %p : !u32i), !u32i // expected-error {{requires !cir.bool type for source}}
+  cir.return
+}
+
+// -----
+
+cir.func @cast22(%p : !cir.bool) {
+  %2 = cir.cast(bool_to_int, %p : !cir.bool), !cir.float // expected-error {{requires !cir.int type for result}}
+  cir.return
+}
+
+// -----
+
+cir.func @cast23(%p : !cir.bool) {
+  %2 = cir.cast(int_to_float, %p : !cir.bool), !cir.float // expected-error {{requires !cir.int type for source}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast24(%p : !u32i) {
+  %2 = cir.cast(int_to_float, %p : !u32i), !cir.bool // expected-error {{requires !cir.float type for result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+cir.func @cast25(%p : !cir.ptr<!u32i, addrspace(target<1>)>) {
+  %0 = cir.cast(address_space, %p : !cir.ptr<!u32i, addrspace(target<1>)>), !cir.ptr<!u64i, addrspace(target<2>)> // expected-error {{requires two types differ in addrspace only}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @cast26(%p : !cir.ptr<!u64i, addrspace(target<1>)>) {
+  %0 = cir.cast(address_space, %p : !cir.ptr<!u64i, addrspace(target<1>)>), !u64i // expected-error {{requires !cir.ptr type for source and result}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @cast27(%p : !u64i) {
+  %0 = cir.cast(address_space, %p : !u64i), !cir.ptr<!u64i, addrspace(target<1>)> // expected-error {{requires !cir.ptr type for source and result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+!u8i = !cir.int<u, 8>
+module {
+  // expected-error@+1 {{constant array element should match array element type}}
+  cir.global external @a = #cir.const_array<[#cir.int<0> : !u8i, #cir.int<23> : !u8i, #cir.int<33> : !u8i] : !cir.array<!u32i x 3>>
+}
+
+// -----
+
+!u8i = !cir.int<u, 8>
+module {
+  // expected-error@+1 {{constant array size should match type size}}
+  cir.global external @a = #cir.const_array<[#cir.int<0> : !u8i, #cir.int<23> : !u8i, #cir.int<33> : !u8i] : !cir.array<!u8i x 4>>
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+module {
+  // expected-error@+1 {{constant array element for string literals expects !cir.int<u, 8> element type}}
+  cir.global external @b = #cir.const_array<"example\00" : !cir.array<!u32i x 8>>
+}
+
+// -----
+
+module {
+  // expected-error@+1 {{expected type declaration for string literal}}
+  cir.global "private" constant external @".str2" = #cir.const_array<"example\00"> {alignment = 1 : i64}
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+module {
+  // expected-error@+1 {{expected string or keyword containing one of the following enum values for attribute 'linkage' [external, available_externally, linkonce, linkonce_odr, weak, weak_odr, internal, cir_private, extern_weak, common]}}
+  cir.global @a = #cir.const_array<[0 : !u8i, -23 : !u8i, 33 : !u8i] : !cir.array<!u32i x 3>>
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.global "private" external @v = #cir.int<3> : !u32i // expected-error {{private visibility not allowed with 'external' linkage}}
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.global "public" internal @v = #cir.int<3> : !u32i // expected-error {{public visibility not allowed with 'internal' linkage}}
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.global external @v = #cir.zero : !u32i // expected-error {{zero expects struct or array type}}
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_op_size() {
+  %0 = cir.const #cir.int<1> : !s32i
+  %1 = cir.vec.create(%0 : !s32i) : !cir.vector<!s32i x 2> // expected-error {{'cir.vec.create' op operand count of 1 doesn't match vector type '!cir.vector<!cir.int<s, 32> x 2>' element count of 2}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+cir.func @vec_op_type() {
+  %0 = cir.const #cir.int<1> : !s32i
+  %1 = cir.const #cir.int<2> : !u32i
+  %2 = cir.vec.create(%0, %1 : !s32i, !u32i) : !cir.vector<!s32i x 2> // expected-error {{'cir.vec.create' op operand type '!cir.int<u, 32>' doesn't match vector element type '!cir.int<s, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_extract_non_int_idx() {
+  %0 = cir.const 1.5e+00 : f64
+  %1 = cir.const #cir.int<0> : !s32i
+  %2 = cir.vec.create(%1, %1 : !s32i, !s32i) : !cir.vector<!s32i x 2>
+  %3 = cir.vec.extract %2[%0 : f64] : !cir.vector<!s32i x 2> // expected-error {{expected '<'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+cir.func @vec_extract_bad_type() {
+  %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+  %1 = cir.const #cir.int<0> : !s32i
+  %2 = cir.vec.create(%1, %1 : !s32i, !s32i) : !cir.vector<!s32i x 2>
+  %3 = cir.vec.extract %2[%1 : !s32i] : !cir.vector<!s32i x 2> // expected-note {{prior use here}}
+  cir.store %3, %0 : !u32i, !cir.ptr<!u32i> // expected-error {{use of value '%3' expects different type than prior uses: '!cir.int<u, 32>' vs '!cir.int<s, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_extract_non_vector() {
+  %0 = cir.const #cir.int<0> : !s32i
+  %1 = cir.vec.extract %0[%0 : !s32i] : !s32i // expected-error {{custom op 'cir.vec.extract' 'vec' must be CIR vector type, but got '!cir.int<s, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+cir.func @vec_insert_bad_type() {
+  %0 = cir.const #cir.int<0> : !s32i
+  %1 = cir.vec.create(%0, %0 : !s32i, !s32i) : !cir.vector<!s32i x 2>
+  %2 = cir.const #cir.int<0> : !u32i // expected-note {{prior use here}}
+  %3 = cir.vec.insert %2, %1[%0 : !s32i] : !cir.vector<!s32i x 2> // expected-error {{use of value '%2' expects different type than prior uses: '!cir.int<s, 32>' vs '!cir.int<u, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_insert_non_vector() {
+  %0 = cir.const #cir.int<0> : !s32i
+  %1 = cir.vec.insert %0, %0[%0 : !s32i] : !s32i // expected-error {{custom op 'cir.vec.insert' 'vec' must be CIR vector type, but got '!cir.int<s, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_ternary_non_vector1() {
+  %0 = cir.const #cir.int<0> : !s32i
+  %1 = cir.vec.create(%0, %0 : !s32i, !s32i) : !cir.vector<!s32i x 2>
+  %2 = cir.vec.ternary(%0, %1, %1) : !s32i, !cir.vector<!s32i x 2> // expected-error {{'cir.vec.ternary' op operand #0 must be !cir.vector of !cir.int, but got '!cir.int<s, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_ternary_non_vector2() {
+  %0 = cir.const #cir.int<0> : !s32i
+  %1 = cir.vec.create(%0, %0 : !s32i, !s32i) : !cir.vector<!s32i x 2>
+  %2 = cir.vec.ternary(%1, %0, %0) : !cir.vector<!s32i x 2>, !s32i // expected-error {{'cir.vec.ternary' op operand #1 must be CIR vector type, but got '!cir.int<s, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_ternary_different_size() {
+  %0 = cir.const #cir.int<0> : !s32i
+  %1 = cir.vec.create(%0, %0 : !s32i, !s32i) : !cir.vector<!s32i x 2>
+  %2 = cir.vec.create(%0, %0, %0, %0 : !s32i, !s32i, !s32i, !s32i) : !cir.vector<!s32i x 4>
+  %3 = cir.vec.ternary(%1, %2, %2) : !cir.vector<!s32i x 2>, !cir.vector<!s32i x 4> // expected-error {{'cir.vec.ternary' op : the number of elements in '!cir.vector<!cir.int<s, 32> x 2>' and '!cir.vector<!cir.int<s, 32> x 4>' don't match}}
+  cir.return
+}
+
+// -----
+
+cir.func @vec_ternary_not_int(%p : !cir.float) {
+  %0 = cir.vec.create(%p, %p : !cir.float, !cir.float) : !cir.vector<!cir.float x 2>
+  %1 = cir.vec.ternary(%0, %0, %0) : !cir.vector<!cir.float x 2>, !cir.vector<!cir.float x 2> // expected-error {{'cir.vec.ternary' op operand #0 must be !cir.vector of !cir.int, but got '!cir.vector<!cir.float x 2>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_shuffle_mismatch_args(%f : !cir.float, %n : !s32i) {
+  %0 = cir.vec.create(%f, %f : !cir.float, !cir.float) : !cir.vector<!cir.float x 2>
+  %1 = cir.vec.create(%n, %n : !s32i, !s32i) : !cir.vector<!s32i x 2> // expected-note {{prior use here}}
+  %2 = cir.vec.shuffle(%0, %1 : !cir.vector<!cir.float x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.float x 2> // expected-error {{use of value '%1' expects different type than prior uses: '!cir.vector<!cir.float x 2>' vs '!cir.vector<!cir.int<s, 32> x 2>}}
+  cir.return
+}
+
+// -----
+
+cir.func @vec_shuffle_non_ints(%f : !cir.float) {
+  %0 = cir.vec.create(%f, %f : !cir.float, !cir.float) : !cir.vector<!cir.float x 2>
+  %1 = cir.vec.shuffle(%0, %0 : !cir.vector<!cir.float x 2>) [#cir.fp<1.000000e+00> : !cir.float, #cir.fp<1.000000e+00> : !cir.float] : !cir.vector<!cir.float x 2> // expected-error {{'cir.vec.shuffle' op all index values must be integers}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_shuffle_result_size(%f : !cir.float) {
+  %0 = cir.vec.create(%f, %f : !cir.float, !cir.float) : !cir.vector<!cir.float x 2>
+  %1 = cir.vec.shuffle(%0, %0 : !cir.vector<!cir.float x 2>) [#cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.float x 4> // expected-error {{'cir.vec.shuffle' op : the number of elements in [#cir.int<1> : !cir.int<s, 32>, #cir.int<1> : !cir.int<s, 32>] and '!cir.vector<!cir.float x 4>' don't match}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_shuffle_result_element(%f : !cir.float) {
+  %0 = cir.vec.create(%f, %f : !cir.float, !cir.float) : !cir.vector<!cir.float x 2>
+  %1 = cir.vec.shuffle(%0, %0 : !cir.vector<!cir.float x 2>) [#cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 2> // expected-error {{'cir.vec.shuffle' op : element types of '!cir.vector<!cir.float x 2>' and '!cir.vector<!cir.int<s, 32> x 2>' don't match}}
+  cir.return
+}
+
+// -----
+
+cir.func coroutine @bad_task() { // expected-error {{coroutine body must use at least one cir.await op}}
+  cir.return
+}
+
+// -----
+
+cir.func coroutine @missing_condition() {
+  cir.scope {
+    cir.await(user, ready : { // expected-error {{ready region must end with cir.condition}}
+      cir.yield
+    }, suspend : {
+      cir.yield
+    }, resume : {
+      cir.yield
+    },)
+  }
+  cir.return
+}
+
+// -----
+
+!u8i = !cir.int<u, 8>
+!u32i = !cir.int<u, 32>
+module {
+  // Note MLIR requires "private" for global declarations, should get
+  // rid of this somehow in favor of clarity?
+  cir.global "private" external @_ZTVN10__cxxabiv120__si_class_type_infoE : !cir.ptr<!u32i>
+
+  // expected-error@+1 {{element at index 0 has type '!cir.ptr<!cir.int<u, 8>>' but return type for this element is '!cir.ptr<!cir.int<u, 32>>'}}
+  cir.global external @type_info_B = #cir.typeinfo<{
+    #cir.global_view<@_ZTVN10__cxxabiv120__si_class_type_infoE, [2]> : !cir.ptr<!u8i>}>
+    : !cir.struct<struct {!cir.ptr<!u32i>}>
+}
+
+// -----
+
+module {
+  cir.func @l0() {
+    cir.return
+  }
+
+  cir.func @l1() alias(@l0) { // expected-error {{function alias shall not have a body}}
+    cir.return
+  }
+}
+
+// -----
+
+module {
+  // expected-error@below {{expected 's' or 'u'}}
+  cir.func @l0(%arg0: !cir.int<x, 32>) -> () {
+    cir.return
+  }
+}
+
+// // -----
+
+module {
+  // expected-error@below {{expected integer width to be from 1 up to 64}}
+  cir.func @l0(%arg0: !cir.int<s, 128>) -> () {
+    cir.return
+  }
+}
+
+// -----
+
+module {
+  // expected-error@below {{integer value too large for the given type}}
+  cir.global external @a = #cir.int<256> : !cir.int<u, 8>
+  // expected-error@below {{integer value too large for the given type}}
+  cir.global external @b = #cir.int<-129> : !cir.int<s, 8>
+}
+
+// -----
+
+module {
+  // expected-error@+1 {{prototyped function must have at least one non-variadic input}}
+  cir.func private @variadic(...) -> !cir.int<s, 32>
+}
+
+// -----
+
+module {
+  // expected-error@+1 {{custom op 'cir.func' variadic arguments must be in the end of the argument list}}
+  cir.func @variadic(..., !cir.int<s, 32>) -> !cir.int<s, 32>
+}
+
+// -----
+
+module {
+  // expected-error@+1 {{functions only supports zero or one results}}
+  cir.func @variadic() -> (!cir.int<s, 32>, !cir.int<s, 32>)
+}
+
+// -----
+
+module {
+  cir.func private @variadic(!cir.int<s, 32>, !cir.int<s, 32>, ...) -> !cir.int<s, 32>
+  cir.func @call_variadic(%0: !cir.int<s, 32>) -> !cir.int<s, 32> {
+    // expected-error@+1 {{'cir.call' op too few operands for callee}}
+    %1 = cir.call @variadic(%0) : (!cir.int<s, 32>) -> !cir.int<s, 32>
+    cir.return %1 : !cir.int<s, 32>
+  }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @test_br() -> !s32i {
+    %0 = cir.const #cir.int<0>: !s32i
+    // expected-error@below {{branch has 1 operands for successor #0, but target block has 0}}
+    cir.br ^bb1(%0 : !s32i)
+  ^bb1:
+    cir.return %0 : !s32i
+}
+
+// -----
+
+module {
+  cir.func private @test() -> !cir.void
+  cir.func @invalid_call() {
+    // expected-error@+1 {{'cir.call' op callee returns void but call has results}}
+    %1 = cir.call @test() : () -> (!cir.int<s, 32>)
+    cir.return
+  }
+}
+
+// -----
+
+module {
+  cir.func private @test() -> !cir.int<u, 8>
+  cir.func @invalid_call() {
+    // expected-error@+1 {{'cir.call' op result type mismatch: expected '!cir.int<u, 8>', but provided '!cir.int<s, 32>'}}
+    %1 = cir.call @test() : () -> (!cir.int<s, 32>)
+    cir.return
+  }
+}
+
+// -----
+
+module {
+  cir.func @invalid_return_type(%0 : !cir.int<u, 64>) -> !cir.int<s, 32> {
+    // expected-error@+1 {{'cir.return' op returns '!cir.int<u, 64>' but enclosing function returns '!cir.int<s, 32>'}}
+    cir.return %0 : !cir.int<u, 64>
+  }
+}
+
+// -----
+
+// expected-error@+1 {{invalid language keyword 'dummy'}}
+module attributes {cir.lang = #cir.lang<dummy>} { }
+
+// -----
+
+module {
+  // Should not copy types with no data layout (unkonwn byte size).
+  cir.func @invalid_copy(%arg0 : !cir.ptr<!cir.void>, %arg1 : !cir.ptr<!cir.void>) {
+    // expected-error@+1 {{missing data layout for pointee type}}
+    cir.copy %arg0 to %arg1 : !cir.ptr<!cir.void>
+    cir.return
+  }
+}
+
+// -----
+
+module {
+  // Should not copy to same address.
+  cir.func @invalid_copy(%arg0 : !cir.ptr<!cir.int<s, 8>>) {
+    // expected-error@+1 {{source and destination are the same}}
+    cir.copy %arg0 to %arg0 : !cir.ptr<!cir.int<s, 8>>
+    cir.return
+  }
+}
+
+// -----
+
+!s8i = !cir.int<s, 8>
+module {
+  // Should not memcpy with invalid length type.
+  cir.func @invalid_memcpy_len(%arg0 : !cir.ptr<!cir.void>, %arg1 : !s8i) {
+    // expected-error@+1 {{memcpy length must be an unsigned integer}}
+    cir.libc.memcpy %arg1 bytes from %arg0 to %arg0 : !s8i, !cir.ptr<!cir.void> -> !cir.ptr<!cir.void>
+    cir.return
+  }
+}
+
+// -----
+
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+module {
+  // Should not memcpy non-void pointers.
+  cir.func @invalid_memcpy_len(%arg0 : !cir.ptr<!s8i>, %arg1 : !u32i) {
+    // expected-error@+1 {{memcpy src and dst must be void pointers}}
+    cir.libc.memcpy %arg1 bytes from %arg0 to %arg0 : !u32i, !cir.ptr<!s8i> -> !cir.ptr<!s8i>
+    cir.return
+  }
+}
+
+// -----
+!s8i = !cir.int<s, 8>
+!ty_Init = !cir.struct<class "Init" {!s8i} #cir.record.decl.ast>
+module {
+  cir.global "private" internal @_ZL8__ioinit = ctor : !ty_Init {
+  }
+  // expected-error@+1 {{custom op 'cir.global' ctor region must have exactly one block}}
+}
+
+// -----
+!s8i = !cir.int<s, 8>
+#true = #cir.bool<true> : !cir.bool
+!ty_Init = !cir.struct<class "Init" {!s8i} #cir.record.decl.ast>
+module {
+  cir.func private @_ZN4InitC1Eb(!cir.ptr<!ty_Init>)
+  cir.global "private" internal @_ZL8__ioinit = ctor : !ty_Init {
+    %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!ty_Init>
+    cir.call @_ZN4InitC1Eb(%0) : (!cir.ptr<!ty_Init>) -> ()
+  } dtor {}
+  // expected-error@+1 {{custom op 'cir.global' dtor region must have exactly one block}}
+}
+
+// -----
+!s32i = !cir.int<s, 32>
+!u8i = !cir.int<u, 8>
+module {
+  cir.global "private" constant internal @".str" = #cir.const_array<"Division by zero condition!\00" : !cir.array<!u8i x 28>> : !cir.array<!u8i x 28> {alignment = 1 : i64}
+  cir.global "private" constant external @_ZTIPKc : !cir.ptr<!u8i>
+  cir.func @_Z8divisionii() {
+    %11 = cir.alloc.exception 8 -> !cir.ptr<!cir.ptr<!u8i>>
+    %12 = cir.get_global @".str" : !cir.ptr<!cir.array<!u8i x 28>>
+    %13 = cir.cast(array_to_ptrdecay, %12 : !cir.ptr<!cir.array<!u8i x 28>>), !cir.ptr<!u8i>
+    cir.store %13, %11 : !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>
+    cir.throw %11 : !cir.ptr<!cir.ptr<!u8i>> // expected-error {{'type_info' symbol attribute missing}}
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!struct = !cir.struct<struct "Struct" {!u16i, !u32i}>
+module {
+  cir.func @memeber_index_out_of_bounds(%arg0 : !cir.ptr<!struct>) {
+    // expected-error@+1 {{member index out of bounds}}
+    %0 = cir.get_member %arg0[2] {name = "test"} : !cir.ptr<!struct> -> !cir.ptr<!u32i>
+    cir.return
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!struct = !cir.struct<struct "Struct" {!u16i, !u32i}>
+module {
+  cir.func @memeber_type_mismatch(%arg0 : !cir.ptr<!struct>) {
+    // expected-error@+1 {{member type mismatch}}
+    %0 = cir.get_member %arg0[0] {name = "test"} : !cir.ptr<!struct> -> !cir.ptr<!u32i>
+    cir.return
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+// expected-error@+1 {{anonymous structs must be complete}}
+!struct = !cir.struct<struct incomplete>
+
+// -----
+
+!u16i = !cir.int<u, 16>
+// expected-error@+1 {{identified structs cannot have an empty name}}
+!struct = !cir.struct<struct "" incomplete>
+
+// -----
+
+// expected-error@+1 {{invalid self-reference within record}}
+!struct = !cir.struct<struct {!cir.struct<struct "SelfReference">}>
+
+// -----
+
+// expected-error@+1 {{record already defined}}
+!struct = !cir.struct<struct "SelfReference" {!cir.struct<struct "SelfReference" {}>}>
+
+// -----
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @tmp(%arg0: !cir.float) {
+    // expected-error@+1 {{operand #0 must be primitive int}}
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, %arg0 : !cir.float, ["tmp"]
+    cir.return
+  }
+}
+
+// -----
+
+!u8i = !cir.int<u, 8>
+module {
+  cir.func @stack_save_type_mismatch() {
+    // expected-error@+1 {{must be CIR pointer type}}
+    %1 = cir.stack_save : !u8i
+    cir.return
+  }
+}
+// -----
+
+!u8i = !cir.int<u, 8>
+module {
+  cir.func @stack_restore_type_mismatch(%arg0 : !u8i) {
+    // expected-error@+1 {{must be CIR pointer type}}
+    cir.stack_restore %arg0 : !u8i
+    cir.return
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+
+// expected-error@+1 {{invalid kind of type specified}}
+#invalid_type = #cir.data_member<0> : !u16i
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!struct1 = !cir.struct<struct "Struct1" {!u16i, !u32i}>
+
+// expected-error@+1 {{member type of a #cir.data_member attribute must match the attribute type}}
+#invalid_member_ty = #cir.data_member<0> : !cir.data_member<!u32i in !struct1>
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!struct1 = !cir.struct<struct "Struct1" {!u16i, !u32i}>
+
+module {
+  cir.func @invalid_base_type(%arg0 : !cir.data_member<!u32i in !struct1>) {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["tmp"] {alignment = 4 : i64}
+    // expected-error@+1 {{'cir.get_runtime_member' op operand #0 must be !cir.struct*}}
+    %1 = cir.get_runtime_member %0[%arg0 : !cir.data_member<!u32i in !struct1>] : !cir.ptr<!u32i> -> !cir.ptr<!u32i>
+    cir.return
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!struct1 = !cir.struct<struct "Struct1" {!u16i, !u32i}>
+!struct2 = !cir.struct<struct "Struct2" {!u16i, !u32i}>
+
+module {
+  cir.func @invalid_base_type(%arg0 : !cir.data_member<!u32i in !struct1>) {
+    %0 = cir.alloca !struct2, !cir.ptr<!struct2>, ["tmp"] {alignment = 4 : i64}
+    // expected-error@+1 {{record type does not match the member pointer type}}
+    %1 = cir.get_runtime_member %0[%arg0 : !cir.data_member<!u32i in !struct1>] : !cir.ptr<!struct2> -> !cir.ptr<!u32i>
+    cir.return
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!struct1 = !cir.struct<struct "Struct1" {!u16i, !u32i}>
+
+module {
+  cir.func @invalid_base_type(%arg0 : !cir.data_member<!u32i in !struct1>) {
+    %0 = cir.alloca !struct1, !cir.ptr<!struct1>, ["tmp"] {alignment = 4 : i64}
+    // expected-error@+1 {{result type does not match the member pointer type}}
+    %1 = cir.get_runtime_member %0[%arg0 : !cir.data_member<!u32i in !struct1>] : !cir.ptr<!struct1> -> !cir.ptr<!u16i>
+    cir.return
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!incomplete_struct = !cir.struct<struct "Incomplete" incomplete>
+
+// expected-error@+1 {{incomplete 'cir.struct' cannot be used to build a non-null data member pointer}}
+#incomplete_cls_member = #cir.data_member<0> : !cir.data_member<!u16i in !incomplete_struct>
+
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+cir.func @clrsb_invalid_input_ty(%arg0 : !u32i) -> () {
+  // expected-error@+1 {{'cir.bit.clrsb' op operand #0 must be 32-bit signed integer or 64-bit signed integer, but got '!cir.int<u, 32>'}}
+  %0 = cir.bit.clrsb(%arg0 : !u32i) : !s32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+cir.func @clrsb_invalid_result_ty(%arg0 : !s32i) -> () {
+  // expected-error@+1 {{'cir.bit.clrsb' op result #0 must be 32-bit signed integer, but got '!cir.int<u, 32>'}}
+  %0 = cir.bit.clrsb(%arg0 : !s32i) : !u32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+cir.func @clz_invalid_input_ty(%arg0 : !s32i) -> () {
+  // expected-error@+1 {{'cir.bit.clz' op operand #0 must be 16-bit unsigned integer or 32-bit unsigned integer or 64-bit unsigned integer, but got '!cir.int<s, 32>'}}
+  %0 = cir.bit.clz(%arg0 : !s32i) : !s32i
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+
+cir.func @clz_invalid_result_ty(%arg0 : !u32i) -> () {
+  // expected-error@+1 {{'cir.bit.clz' op result #0 must be 32-bit signed integer, but got '!cir.int<u, 32>}}
+  %0 = cir.bit.clz(%arg0 : !u32i) : !u32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+cir.func @ctz_invalid_input_ty(%arg0 : !s32i) -> () {
+  // expected-error@+1 {{'cir.bit.ctz' op operand #0 must be 16-bit unsigned integer or 32-bit unsigned integer or 64-bit unsigned integer, but got '!cir.int<s, 32>'}}
+  %0 = cir.bit.ctz(%arg0 : !s32i) : !s32i
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+
+cir.func @ctz_invalid_result_ty(%arg0 : !u32i) -> () {
+  // expected-error@+1 {{'cir.bit.ctz' op result #0 must be 32-bit signed integer, but got '!cir.int<u, 32>'}}
+  %0 = cir.bit.ctz(%arg0 : !u32i) : !u32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+cir.func @ffs_invalid_input_ty(%arg0 : !u32i) -> () {
+  // expected-error@+1 {{'cir.bit.ffs' op operand #0 must be 32-bit signed integer or 64-bit signed integer, but got '!cir.int<u, 32>'}}
+  %0 = cir.bit.ffs(%arg0 : !u32i) : !s32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+cir.func @ffs_invalid_result_ty(%arg0 : !s32i) -> () {
+  // expected-error@+1 {{'cir.bit.ffs' op result #0 must be 32-bit signed integer, but got '!cir.int<u, 32>'}}
+  %0 = cir.bit.ffs(%arg0 : !s32i) : !u32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+cir.func @parity_invalid_input_ty(%arg0 : !s32i) -> () {
+  // expected-error@+1 {{'cir.bit.parity' op operand #0 must be 32-bit unsigned integer or 64-bit unsigned integer, but got '!cir.int<s, 32>'}}
+  %0 = cir.bit.parity(%arg0 : !s32i) : !s32i
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+
+cir.func @parity_invalid_result_ty(%arg0 : !u32i) -> () {
+  // expected-error@+1 {{'cir.bit.parity' op result #0 must be 32-bit signed integer, but got '!cir.int<u, 32>}}
+  %0 = cir.bit.parity(%arg0 : !u32i) : !u32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+cir.func @popcount_invalid_input_ty(%arg0 : !s32i) -> () {
+  // expected-error@+1 {{'cir.bit.popcount' op operand #0 must be 16-bit unsigned integer or 32-bit unsigned integer or 64-bit unsigned integer, but got '!cir.int<s, 32>'}}
+  %0 = cir.bit.popcount(%arg0 : !s32i) : !s32i
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+
+cir.func @popcount_invalid_result_ty(%arg0 : !u32i) -> () {
+  // expected-error@+1 {{'cir.bit.popcount' op result #0 must be 32-bit signed integer, but got '!cir.int<u, 32>'}}
+  %0 = cir.bit.popcount(%arg0 : !u32i) : !u32i
+  cir.return
+}
+
+// -----
+
+cir.func @bad_fetch(%x: !cir.ptr<!cir.float>, %y: !cir.float) -> () {
+  // expected-error@+1 {{only operates on integer values}}
+  %12 = cir.atomic.fetch(xor, %x : !cir.ptr<!cir.float>, %y : !cir.float, seq_cst) : !cir.float
+  cir.return
+}
+
+// -----
+
+cir.func @bad_operands_for_nowrap(%x: !cir.float, %y: !cir.float) {
+  // expected-error@+1 {{only operations on integer values may have nsw/nuw flags}}
+  %0 = cir.binop(add, %x, %y) nsw : !cir.float
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+
+cir.func @bad_binop_for_nowrap(%x: !u32i, %y: !u32i) {
+  // expected-error@+1 {{The nsw/nuw flags are applicable to opcodes: 'add', 'sub' and 'mul'}}
+  %0 = cir.binop(div, %x, %y) nsw : !u32i
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.global "private" external @batata : !s32i
+  cir.func @f35() {
+    // expected-error@+1 {{access to global not marked thread local}}
+    %0 = cir.get_global thread_local @batata : !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+// -----
+
+// expected-error@+1 {{invalid underlying type for long double}}
+cir.func @bad_long_double(%arg0 : !cir.long_double<!cir.float>) -> () {
+  cir.return
+}
+
+// -----
+
+!s64i = !cir.int<s, 64>
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+!Base = !cir.struct<struct "Base" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>}>
+!Derived = !cir.struct<struct "Derived" {!cir.struct<struct "Base" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>}>}>
+
+module {
+  cir.global "private" constant external @_ZTI4Base : !cir.ptr<!u32i>
+  cir.global "private" constant external @_ZTI7Derived : !cir.ptr<!u8i>
+  cir.func private @__dynamic_cast(!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void>
+  cir.func private @__cxa_bad_cast()
+  cir.func @test(%arg0 : !cir.ptr<!Base>) {
+    // expected-error@+1 {{srcRtti must be an RTTI pointer}}
+    %0 = cir.dyn_cast(ptr, %arg0 : !cir.ptr<!Base>, #cir.dyn_cast_info<#cir.global_view<@_ZTI4Base> : !cir.ptr<!u32i>, #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i>, @__dynamic_cast, @__cxa_bad_cast, #cir.int<0> : !s64i>) -> !cir.ptr<!Derived>
+  }
+}
+
+// -----
+
+!s64i = !cir.int<s, 64>
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+!Base = !cir.struct<struct "Base" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>}>
+!Derived = !cir.struct<struct "Derived" {!cir.struct<struct "Base" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>}>}>
+
+module {
+  cir.global "private" constant external @_ZTI4Base : !cir.ptr<!u8i>
+  cir.global "private" constant external @_ZTI7Derived : !cir.ptr<!u32i>
+  cir.func private @__dynamic_cast(!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void>
+  cir.func private @__cxa_bad_cast()
+  cir.func @test(%arg0 : !cir.ptr<!Base>) {
+    // expected-error@+1 {{destRtti must be an RTTI pointer}}
+    %0 = cir.dyn_cast(ptr, %arg0 : !cir.ptr<!Base>, #cir.dyn_cast_info<#cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u32i>, @__dynamic_cast, @__cxa_bad_cast, #cir.int<0> : !s64i>) -> !cir.ptr<!Derived>
+  }
+}
+
+
+// -----
+
+// expected-error@+1 {{goto/label mismatch}}
+cir.func @bad_goto() -> () {
+  cir.goto "somewhere"
+^bb1:
+  cir.label "label"
+  cir.return
+} 
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @address_space1(%p : !cir.ptr<!u64i, addrspace()>) { // expected-error {{expected keyword for addrspace kind}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @address_space2(%p : !cir.ptr<!u64i, addrspace(target<>)>) { // expected-error {{expected integer value}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @address_space3(%p : !cir.ptr<!u64i, addrspace(target)>) { // expected-error {{expected '<'}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @address_space4(%p : !cir.ptr<!u64i, addrspace(foobar)>) { // expected-error {{invalid addrspace kind keyword: foobar}}
+  cir.return
+}
+
+// -----
+
+// expected-error@+1 {{metadata attribute without any field present is invalid}}
+#fn_attr = #cir.cl.kernel_metadata<>
+
+// -----
+
+// expected-error@+1 {{work_group_size_hint must have exactly 3 integer elements}}
+#fn_attr = #cir.cl.kernel_metadata<
+  work_group_size_hint = [2 : i32]
+>
+
+// -----
+
+// expected-error@+1 {{reqd_work_group_size must have exactly 3 integer elements}}
+#fn_attr = #cir.cl.kernel_metadata<
+  reqd_work_group_size = [3.0 : f32, 1.7 : f32]
+>
+
+// -----
+
+// expected-error@+1 {{vec_type_hint_signedness should be present if and only if vec_type_hint is set}}
+#fn_attr = #cir.cl.kernel_metadata<
+  vec_type_hint_signedness = 1
+>
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+// expected-error@+1 {{vec_type_hint_signedness should be present if and only if vec_type_hint is set}}
+#fn_attr = #cir.cl.kernel_metadata<
+  vec_type_hint = !s32i
+>
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+// expected-error@+1 {{vec_type_hint_signedness must match the signedness of the vec_type_hint type}}
+#fn_attr = #cir.cl.kernel_metadata<
+  vec_type_hint = !s32i,
+  vec_type_hint_signedness = 0
+>
+
+// -----
+
+// expected-error@+1 {{addr_space must be integer arrays}}
+#fn_attr = #cir.cl.kernel_arg_metadata<
+  addr_space = ["none"],
+  access_qual = ["none"],
+  type = ["uint*"],
+  base_type = ["uint*"],
+  type_qual = [""]
+>
+
+// -----
+
+// expected-error@+1 {{access_qual, type, base_type, type_qual must be string arrays}}
+#fn_attr = #cir.cl.kernel_arg_metadata<
+  addr_space = [0 : i32],
+  access_qual = [42 : i32],
+  type = ["uint*"],
+  base_type = ["uint*"],
+  type_qual = [""]
+>
+
+// -----
+
+// expected-error@+1 {{name must be a string array}}
+#fn_attr = #cir.cl.kernel_arg_metadata<
+  addr_space = [0 : i32],
+  access_qual = ["none"],
+  type = ["uint*"],
+  base_type = ["uint*"],
+  type_qual = [""],
+  name = [33 : i32]
+>
+
+// -----
+
+// expected-error@+1 {{all arrays must have the same number of elements}}
+#fn_attr = #cir.cl.kernel_arg_metadata<
+  addr_space = [0 : i32],
+  access_qual = ["none"],
+  type = ["uint*", "myunsignedint*"],
+  base_type = ["uint*", "uint*"],
+  type_qual = [""],
+  name = ["foo"]
+>
+
+// -----
+
+module {
+    // expected-error@+1 {{unknown calling convention}}
+    cir.func @foo() cc(foobar) {
+        cir.return
+    }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.global external addrspace(offload_global) @gv = #cir.int<0> : !s32i
+
+  cir.func @test_get_global() {
+    // expected-error@+1 {{'cir.get_global' op result type address space does not match the address space of the global @gv}}
+    %addr = cir.get_global @gv : !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @array_to_ptrdecay_addrspace() {
+    %0 = cir.alloca !cir.array<!s32i x 32>, !cir.ptr<!cir.array<!s32i x 32>, addrspace(offload_private)>, ["x", init]
+    // expected-error@+1 {{requires same address space for source and result}}
+    %1 = cir.cast(array_to_ptrdecay, %0 : !cir.ptr<!cir.array<!s32i x 32>, addrspace(offload_private)>), !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @subroutine() cc(spir_function) {
+    cir.return
+  }
+
+  cir.func @call_conv_match() {
+    // expected-error@+1 {{'cir.call' op calling convention mismatch: expected spir_function, but provided spir_kernel}}
+    cir.call @subroutine(): () -> !cir.void cc(spir_kernel)
+    cir.return
+  }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @test_bitcast_addrspace() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["tmp"] {alignment = 4 : i64}
+    // expected-error@+1 {{'cir.cast' op result type address space does not match the address space of the operand}}
+    %1 = cir.cast(bitcast, %0 : !cir.ptr<!s32i>), !cir.ptr<!s32i, addrspace(offload_local)>
+  }
+
+}
+
diff --git a/clang/test/CIR/IR/invalid_xfail.cir b/clang/test/CIR/IR/invalid_xfail.cir
new file mode 100644
index 000000000000..c29dbf075b6b
--- /dev/null
+++ b/clang/test/CIR/IR/invalid_xfail.cir
@@ -0,0 +1,42 @@
+// Test attempts to build bogus CIR
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+// XFAIL: *
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+cir.func @b0() {
+  cir.scope {
+    cir.while {  // expected-error {{expected condition region to terminate with 'cir.condition'}}
+      cir.yield
+    } do {
+      cir.br ^bb1
+    ^bb1:
+      cir.return
+    }
+  }
+  cir.return
+}
+
+// -----
+
+cir.func @invalid_cond_region_terminator(%arg0 : !cir.bool) -> !cir.void {
+  cir.do { // expected-error {{op expected condition region to terminate with 'cir.condition'}}
+    cir.yield
+  } while {
+    cir.yield
+  }
+  cir.return
+}
+
+// -----
+
+cir.func @invalidConditionTerminator (%arg0 : !cir.bool) -> !cir.void {
+  cir.for : cond { // expected-error {{op expected condition region to terminate with 'cir.condition'}}
+    cir.yield
+  } body {
+    cir.yield
+  } step {
+    cir.yield
+  }
+  cir.return
+}
diff --git a/clang/test/CIR/IR/libc-fabs.cir b/clang/test/CIR/IR/libc-fabs.cir
new file mode 100644
index 000000000000..691849e0c3a5
--- /dev/null
+++ b/clang/test/CIR/IR/libc-fabs.cir
@@ -0,0 +1,9 @@
+// RUN: cir-opt %s
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.func @foo(%arg0: !cir.double) -> !cir.double {
+    %0 = cir.fabs %arg0 : !cir.double
+    cir.return %0 : !cir.double
+  }
+}
diff --git a/clang/test/CIR/IR/libc-memchr.cir b/clang/test/CIR/IR/libc-memchr.cir
new file mode 100644
index 000000000000..014414322819
--- /dev/null
+++ b/clang/test/CIR/IR/libc-memchr.cir
@@ -0,0 +1,11 @@
+// RUN: cir-opt %s
+
+!voidptr = !cir.ptr<!cir.void>
+!s32i = !cir.int<s, 32>
+!u64i = !cir.int<u, 64>
+module {
+  cir.func @f(%src : !voidptr, %pattern : !s32i, %len : !u64i) -> !voidptr {
+    %ptr = cir.libc.memchr(%src, %pattern, %len)
+    cir.return %ptr : !voidptr
+  }
+}
diff --git a/clang/test/CIR/IR/libc-memcpy.cir b/clang/test/CIR/IR/libc-memcpy.cir
new file mode 100644
index 000000000000..737f56d533e3
--- /dev/null
+++ b/clang/test/CIR/IR/libc-memcpy.cir
@@ -0,0 +1,9 @@
+// RUN: cir-opt %s
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.func @shouldParseLibcMemcpyOp(%arg0 : !cir.ptr<!cir.void>, %arg1 : !u32i) {
+    cir.libc.memcpy %arg1 bytes from %arg0 to %arg0 : !u32i, !cir.ptr<!cir.void> -> !cir.ptr<!cir.void>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/IR/llvm-intrinsic.cir b/clang/test/CIR/IR/llvm-intrinsic.cir
new file mode 100644
index 000000000000..687db64ee09b
--- /dev/null
+++ b/clang/test/CIR/IR/llvm-intrinsic.cir
@@ -0,0 +1,12 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+cir.func @foo()  {
+    %a = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"] {alignment = 4 : i64}
+    %i = cir.llvm.intrinsic "llvm.aarch64.ldxr" %a : (!cir.ptr<!s32i>) -> !s64i
+    cir.return
+}
+
+// CHECK: %1 = cir.llvm.intrinsic "llvm.aarch64.ldxr" %0 : (!cir.ptr<!s32i>) -> !s64i
diff --git a/clang/test/CIR/IR/module.cir b/clang/test/CIR/IR/module.cir
new file mode 100644
index 000000000000..7ce2c0ba21cb
--- /dev/null
+++ b/clang/test/CIR/IR/module.cir
@@ -0,0 +1,12 @@
+// RUN: cir-opt %s -split-input-file -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Should parse and print C source language attribute.
+module attributes {cir.lang = #cir.lang<c>} { }
+// CHECK: module attributes {cir.lang = #cir.lang<c>}
+
+// -----
+
+// Should parse and print C++ source language attribute.
+module attributes {cir.lang = #cir.lang<cxx>} { }
+// CHECK: module attributes {cir.lang = #cir.lang<cxx>}
diff --git a/clang/test/CIR/IR/ptr_stride.cir b/clang/test/CIR/IR/ptr_stride.cir
new file mode 100644
index 000000000000..6791f830fd48
--- /dev/null
+++ b/clang/test/CIR/IR/ptr_stride.cir
@@ -0,0 +1,22 @@
+// RUN: cir-opt %s | cir-opt | FileCheck %s
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @arraysubscript(%arg0: !s32i) {
+    %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["x", init]
+    %1 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    %2 = cir.cast(array_to_ptrdecay, %0 : !cir.ptr<!cir.array<!s32i x 10>>), !cir.ptr<!s32i>
+    %3 = cir.const #cir.int<0> : !s32i
+    %4 = cir.ptr_stride(%2 : !cir.ptr<!s32i>, %3 : !s32i), !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+// CHECK: cir.func @arraysubscript(%arg0: !s32i) {
+// CHECK-NEXT:    %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["x", init]
+// CHECK-NEXT:    %1 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+// CHECK-NEXT:    %2 = cir.cast(array_to_ptrdecay, %0 : !cir.ptr<!cir.array<!s32i x 10>>), !cir.ptr<!s32i>
+// CHECK-NEXT:    %3 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:    %4 = cir.ptr_stride(%2 : !cir.ptr<!s32i>, %3 : !s32i), !cir.ptr<!s32i>
+// CHECK-NEXT:    cir.return
+// CHECK-NEXT:  }
diff --git a/clang/test/CIR/IR/scope.cir b/clang/test/CIR/IR/scope.cir
new file mode 100644
index 000000000000..f756355be0a0
--- /dev/null
+++ b/clang/test/CIR/IR/scope.cir
@@ -0,0 +1,27 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+!u32i = !cir.int<u, 32>
+
+module {
+  // Should properly print/parse scope with implicit empty yield.
+  cir.func @implicit_yield() {
+    cir.scope {
+    }
+    // CHECK: cir.scope {
+    // CHECK: }
+    cir.return
+  }
+
+  // Should properly print/parse scope with explicit yield.
+  cir.func @explicit_yield() {
+    %0 = cir.scope {
+      %1 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+      cir.yield %1 : !cir.ptr<!u32i>
+    } : !cir.ptr<!u32i>
+    // CHECK: %0 = cir.scope {
+    //          [...]
+    // CHECK:   cir.yield %1 : !cir.ptr<!u32i>
+    // CHECK: } : !cir.ptr<!u32i>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/IR/stack-save-restore.cir b/clang/test/CIR/IR/stack-save-restore.cir
new file mode 100644
index 000000000000..f6027258786d
--- /dev/null
+++ b/clang/test/CIR/IR/stack-save-restore.cir
@@ -0,0 +1,23 @@
+// Test the CIR operations can parse and print correctly (roundtrip)
+
+// RUN: cir-opt %s | cir-opt | FileCheck %s
+
+!u8i = !cir.int<u, 8>
+
+module  {
+  cir.func @stack_save_restore() {
+    %0 = cir.stack_save : !cir.ptr<!u8i>
+    cir.stack_restore %0 : !cir.ptr<!u8i>
+    cir.return
+  }
+}
+
+//CHECK: module  {
+
+//CHECK-NEXT: cir.func @stack_save_restore() {
+//CHECK-NEXT:   %0 = cir.stack_save : !cir.ptr<!u8i>
+//CHECK-NEXT:   cir.stack_restore %0 : !cir.ptr<!u8i>
+//CHECK-NEXT:   cir.return
+//CHECK-NEXT: }
+
+//CHECK-NEXT: }
diff --git a/clang/test/CIR/IR/struct.cir b/clang/test/CIR/IR/struct.cir
new file mode 100644
index 000000000000..a793e38b1a92
--- /dev/null
+++ b/clang/test/CIR/IR/struct.cir
@@ -0,0 +1,41 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+!u8i = !cir.int<u, 8>
+!u16i = !cir.int<u, 16>
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+!ty_2222 = !cir.struct<struct {!cir.array<!cir.ptr<!u8i> x 5>}>
+!ty_22221 = !cir.struct<struct {!cir.ptr<!u8i>, !cir.ptr<!u8i>, !cir.ptr<!u8i>}>
+!ty_A = !cir.struct<class "A" incomplete #cir.record.decl.ast>
+!ty_i = !cir.struct<union "i" incomplete>
+!ty_S = !cir.struct<struct "S" {!u8i, !u16i, !u32i}>
+!ty_S1_ = !cir.struct<struct "S1" {!s32i, !s32i}>
+
+// Test recursive struct parsing/printing.
+!ty_Node = !cir.struct<struct "Node" {!cir.ptr<!cir.struct<struct "Node">>} #cir.record.decl.ast>
+// CHECK-DAG: !cir.struct<struct "Node" {!cir.ptr<!cir.struct<struct "Node">>} #cir.record.decl.ast>
+
+module  {
+  // Dummy function to use types and force them to be printed.
+  cir.func @useTypes(%arg0: !ty_Node) {
+    cir.return
+  }
+
+  cir.func @structs() {
+    %0 = cir.alloca !cir.ptr<!cir.struct<struct "S" {!u8i, !u16i, !u32i}>>, !cir.ptr<!cir.ptr<!cir.struct<struct "S" {!u8i, !u16i, !u32i}>>>, ["s", init]
+    %1 = cir.alloca !cir.ptr<!cir.struct<union "i" incomplete>>, !cir.ptr<!cir.ptr<!cir.struct<union "i" incomplete>>>, ["i", init]
+    cir.return
+  }
+
+// CHECK: cir.func @structs() {
+// CHECK:     %0 = cir.alloca !cir.ptr<!ty_S>, !cir.ptr<!cir.ptr<!ty_S>>, ["s", init]
+// CHECK:     %1 = cir.alloca !cir.ptr<!ty_i>, !cir.ptr<!cir.ptr<!ty_i>>, ["i", init]
+
+  cir.func @shouldSuccessfullyParseConstStructAttrs() {
+    %0 = cir.const #cir.const_struct<{#cir.int<1> : !s32i, #cir.int<2> : !s32i}> : !ty_S1_
+    // CHECK: cir.const #cir.const_struct<{#cir.int<1> : !s32i, #cir.int<2> : !s32i}> : !ty_S1_
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/IR/switch.cir b/clang/test/CIR/IR/switch.cir
new file mode 100644
index 000000000000..b5c0c9cafb6c
--- /dev/null
+++ b/clang/test/CIR/IR/switch.cir
@@ -0,0 +1,36 @@
+// RUN: cir-opt %s | FileCheck %s
+!s32i = !cir.int<s, 32>
+
+cir.func @s0() {
+  %1 = cir.const #cir.int<2> : !s32i
+  cir.switch (%1 : !s32i) [
+    case (default) {
+      cir.return
+    },
+    case (equal, 3) {
+      cir.yield
+    },
+    case (anyof, [6, 7, 8] : !s32i) {
+      cir.break
+    },
+    case (equal, 5 : !s32i) {
+      cir.yield
+    }
+  ]
+  cir.return
+}
+
+// CHECK: cir.switch (%0 : !s32i) [
+// CHECK-NEXT: case (default)  {
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: },
+// CHECK-NEXT: case (equal, 3)  {
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT: },
+// CHECK-NEXT: case (anyof, [6, 7, 8] : !s32i) {
+// CHECK-NEXT:   cir.break
+// CHECK-NEXT: },
+// CHECK-NEXT: case (equal, 5)  {
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
diff --git a/clang/test/CIR/IR/ternary.cir b/clang/test/CIR/IR/ternary.cir
new file mode 100644
index 000000000000..3827dc77726d
--- /dev/null
+++ b/clang/test/CIR/IR/ternary.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s | cir-opt | FileCheck %s
+!u32i = !cir.int<u, 32>
+
+module  {
+  cir.func @blue(%arg0: !cir.bool) -> !u32i {
+    %0 = cir.ternary(%arg0, true {
+      %a = cir.const #cir.int<0> : !u32i
+      cir.yield %a : !u32i
+    }, false {
+      %b = cir.const #cir.int<1> : !u32i
+      cir.yield %b : !u32i
+    }) : (!cir.bool) -> !u32i
+    cir.return %0 : !u32i
+  }
+}
+
+// CHECK: module  {
+
+// CHECK: cir.func @blue(%arg0: !cir.bool) -> !u32i {
+// CHECK:   %0 = cir.ternary(%arg0, true {
+// CHECK:     %1 = cir.const #cir.int<0> : !u32i
+// CHECK:     cir.yield %1 : !u32i
+// CHECK:   }, false {
+// CHECK:     %1 = cir.const #cir.int<1> : !u32i
+// CHECK:     cir.yield %1 : !u32i
+// CHECK:   }) : (!cir.bool) -> !u32i
+// CHECK:   cir.return %0 : !u32i
+// CHECK: }
+
+// CHECK: }
diff --git a/clang/test/CIR/IR/try.cir b/clang/test/CIR/IR/try.cir
new file mode 100644
index 000000000000..9104cebd19db
--- /dev/null
+++ b/clang/test/CIR/IR/try.cir
@@ -0,0 +1,22 @@
+// Test attempts to build bogus CIR
+// RUN: cir-opt %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @div(%x : !s32i, %y : !s32i) -> !s32i {
+    %3 = cir.const #cir.int<0> : !s32i
+    cir.return %3 : !s32i
+  }
+
+  cir.func @foo(%x : !s32i, %y : !s32i) -> () {
+    cir.scope {
+      cir.scope {
+        %d = cir.call exception @div(%x, %y) : (!s32i, !s32i) -> !s32i
+        cir.yield
+      }
+      cir.yield
+    }
+    cir.return
+  }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/IR/types.cir b/clang/test/CIR/IR/types.cir
new file mode 100644
index 000000000000..b69439924a41
--- /dev/null
+++ b/clang/test/CIR/IR/types.cir
@@ -0,0 +1,13 @@
+// RUN: cir-opt %s | cir-opt | FileCheck %s
+
+!u32i = !cir.int<u, 32>
+
+module  {
+  cir.func @arrays() {
+    %0 = cir.alloca !cir.array<!u32i x 10>, !cir.ptr<!cir.array<!u32i x 10>>, ["x", init]
+    cir.return
+  }
+}
+
+// CHECK: cir.func @arrays() {
+// CHECK-NEXT:     %0 = cir.alloca !cir.array<!u32i x 10>, !cir.ptr<!cir.array<!u32i x 10>>, ["x", init]
diff --git a/clang/test/CIR/IR/unreachable.cir b/clang/test/CIR/IR/unreachable.cir
new file mode 100644
index 000000000000..d057f47ee2b3
--- /dev/null
+++ b/clang/test/CIR/IR/unreachable.cir
@@ -0,0 +1,9 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+cir.func @test() {
+  cir.unreachable
+}
+
+//      CHECK: cir.func @test
+// CHECK-NEXT:   cir.unreachable
diff --git a/clang/test/CIR/IR/vtableAttr.cir b/clang/test/CIR/IR/vtableAttr.cir
new file mode 100644
index 000000000000..f3792517eea4
--- /dev/null
+++ b/clang/test/CIR/IR/vtableAttr.cir
@@ -0,0 +1,8 @@
+// RUN: cir-opt %s | FileCheck %s
+
+!u8i = !cir.int<u, 8>
+module {
+    // Should parse VTable attribute.
+    cir.global external @testVTable = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 1>}> : !cir.struct<struct {!cir.array<!cir.ptr<!u8i> x 1>}>
+    // CHECK: cir.global external @testVTable = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 1>}> : !ty_anon_struct
+}
diff --git a/clang/test/CIR/IR/while.cir b/clang/test/CIR/IR/while.cir
new file mode 100644
index 000000000000..85897af76800
--- /dev/null
+++ b/clang/test/CIR/IR/while.cir
@@ -0,0 +1,18 @@
+// RUN: cir-opt %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+cir.func @testPrintingParsing(%arg0 : !cir.bool) {
+  cir.while {
+    cir.condition(%arg0)
+  } do {
+    cir.yield
+  }
+  cir.return
+}
+
+// CHECK: @testPrintingParsing
+// CHECK: cir.while {
+// CHECK:   cir.condition(%arg0)
+// CHECK: } do {
+// CHECK:   cir.yield
+// CHECK: }
diff --git a/clang/test/CIR/Inputs/skip-this-header.h b/clang/test/CIR/Inputs/skip-this-header.h
new file mode 100644
index 000000000000..bf94a9cfeb94
--- /dev/null
+++ b/clang/test/CIR/Inputs/skip-this-header.h
@@ -0,0 +1,12 @@
+#pragma clang system_header
+
+class String {
+  char *storage{nullptr};
+  long size;
+  long capacity;
+
+public:
+  String() : size{0} {}
+  String(int size) : size{size} {}
+  String(const char *s) {}
+};
\ No newline at end of file
diff --git a/clang/test/CIR/Inputs/std-cxx.h b/clang/test/CIR/Inputs/std-cxx.h
new file mode 100644
index 000000000000..1697e311bcb3
--- /dev/null
+++ b/clang/test/CIR/Inputs/std-cxx.h
@@ -0,0 +1,1321 @@
+// This header provides reduced versions of common standard library containers
+// and whatnots. It's a copy from
+// clang/test/Analysis/Inputs/system-header-simulator-cxx.h with some additions
+// for ClangIR use cases found along the way.
+
+// Like the compiler, the static analyzer treats some functions differently if
+// they come from a system header -- for example, it is assumed that system
+// functions do not arbitrarily free() their parameters, and that some bugs
+// found in system headers cannot be fixed by the user and should be
+// suppressed.
+#pragma clang system_header
+
+typedef unsigned char uint8_t;
+
+typedef __typeof__(sizeof(int)) size_t;
+typedef __typeof__((char*)0-(char*)0) ptrdiff_t;
+void *memmove(void *s1, const void *s2, size_t n);
+
+namespace std {
+  typedef size_t size_type;
+#if __cplusplus >= 201103L
+  using nullptr_t = decltype(nullptr);
+#endif
+}
+
+namespace std {
+  struct input_iterator_tag { };
+  struct output_iterator_tag { };
+  struct forward_iterator_tag : public input_iterator_tag { };
+  struct bidirectional_iterator_tag : public forward_iterator_tag { };
+  struct random_access_iterator_tag : public bidirectional_iterator_tag { };
+
+  template <typename Iterator> struct iterator_traits {
+    typedef typename Iterator::difference_type difference_type;
+    typedef typename Iterator::value_type value_type;
+    typedef typename Iterator::pointer pointer;
+    typedef typename Iterator::reference reference;
+    typedef typename Iterator::iterator_category iterator_category;
+  };
+}
+
+template <typename T, typename Ptr, typename Ref> struct __vector_iterator {
+  typedef __vector_iterator<T, T *, T &> iterator;
+  typedef __vector_iterator<T, const T *, const T &> const_iterator;
+
+  typedef ptrdiff_t difference_type;
+  typedef T value_type;
+  typedef Ptr pointer;
+  typedef Ref reference;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  __vector_iterator(const Ptr p = 0) : ptr(p) {}
+  __vector_iterator(const iterator &rhs): ptr(rhs.base()) {}
+  __vector_iterator<T, Ptr, Ref>& operator++() { ++ ptr; return *this; }
+  __vector_iterator<T, Ptr, Ref> operator++(int) {
+    auto tmp = *this;
+    ++ ptr;
+    return tmp;
+  }
+  __vector_iterator<T, Ptr, Ref> operator--() { -- ptr; return *this; }
+  __vector_iterator<T, Ptr, Ref> operator--(int) {
+    auto tmp = *this; -- ptr;
+    return tmp;
+  }
+  __vector_iterator<T, Ptr, Ref> operator+(difference_type n) {
+    return ptr + n;
+  }
+  friend __vector_iterator<T, Ptr, Ref> operator+(
+      difference_type n,
+      const __vector_iterator<T, Ptr, Ref> &iter) {
+    return n + iter.ptr;
+  }
+  __vector_iterator<T, Ptr, Ref> operator-(difference_type n) {
+    return ptr - n;
+  }
+  __vector_iterator<T, Ptr, Ref> operator+=(difference_type n) {
+    return ptr += n;
+  }
+  __vector_iterator<T, Ptr, Ref> operator-=(difference_type n) {
+    return ptr -= n;
+  }
+
+  template<typename U, typename Ptr2, typename Ref2>
+  difference_type operator-(const __vector_iterator<U, Ptr2, Ref2> &rhs);
+
+  Ref operator*() const { return *ptr; }
+  Ptr operator->() const { return ptr; }
+
+  Ref operator[](difference_type n) {
+    return *(ptr+n);
+  }
+
+  bool operator==(const iterator &rhs) const { return ptr == rhs.ptr; }
+  bool operator==(const const_iterator &rhs) const { return ptr == rhs.ptr; }
+
+  bool operator!=(const iterator &rhs) const { return ptr != rhs.ptr; }
+  bool operator!=(const const_iterator &rhs) const { return ptr != rhs.ptr; }
+
+  const Ptr& base() const { return ptr; }
+
+private:
+  Ptr ptr;
+};
+
+template <typename T, typename Ptr, typename Ref> struct __deque_iterator {
+  typedef __deque_iterator<T, T *, T &> iterator;
+  typedef __deque_iterator<T, const T *, const T &> const_iterator;
+
+  typedef ptrdiff_t difference_type;
+  typedef T value_type;
+  typedef Ptr pointer;
+  typedef Ref reference;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  __deque_iterator(const Ptr p = 0) : ptr(p) {}
+  __deque_iterator(const iterator &rhs): ptr(rhs.base()) {}
+  __deque_iterator<T, Ptr, Ref>& operator++() { ++ ptr; return *this; }
+  __deque_iterator<T, Ptr, Ref> operator++(int) {
+    auto tmp = *this;
+    ++ ptr;
+    return tmp;
+  }
+  __deque_iterator<T, Ptr, Ref> operator--() { -- ptr; return *this; }
+  __deque_iterator<T, Ptr, Ref> operator--(int) {
+    auto tmp = *this; -- ptr;
+    return tmp;
+  }
+  __deque_iterator<T, Ptr, Ref> operator+(difference_type n) {
+    return ptr + n;
+  }
+  friend __deque_iterator<T, Ptr, Ref> operator+(
+      difference_type n,
+      const __deque_iterator<T, Ptr, Ref> &iter) {
+    return n + iter.ptr;
+  }
+  __deque_iterator<T, Ptr, Ref> operator-(difference_type n) {
+    return ptr - n;
+  }
+  __deque_iterator<T, Ptr, Ref> operator+=(difference_type n) {
+    return ptr += n;
+  }
+  __deque_iterator<T, Ptr, Ref> operator-=(difference_type n) {
+    return ptr -= n;
+  }
+
+  Ref operator*() const { return *ptr; }
+  Ptr operator->() const { return ptr; }
+
+  Ref operator[](difference_type n) {
+    return *(ptr+n);
+  }
+
+  bool operator==(const iterator &rhs) const { return ptr == rhs.ptr; }
+  bool operator==(const const_iterator &rhs) const { return ptr == rhs.ptr; }
+
+  bool operator!=(const iterator &rhs) const { return ptr != rhs.ptr; }
+  bool operator!=(const const_iterator &rhs) const { return ptr != rhs.ptr; }
+
+  const Ptr& base() const { return ptr; }
+
+private:
+  Ptr ptr;
+};
+
+template <typename T, typename Ptr, typename Ref> struct __list_iterator {
+  typedef __list_iterator<T, __typeof__(T::data) *, __typeof__(T::data) &> iterator;
+  typedef __list_iterator<T, const __typeof__(T::data) *, const __typeof__(T::data) &> const_iterator;
+
+  typedef ptrdiff_t difference_type;
+  typedef T value_type;
+  typedef Ptr pointer;
+  typedef Ref reference;
+  typedef std::bidirectional_iterator_tag iterator_category;
+
+  __list_iterator(T* it = 0) : item(it) {}
+  __list_iterator(const iterator &rhs): item(rhs.item) {}
+  __list_iterator<T, Ptr, Ref>& operator++() { item = item->next; return *this; }
+  __list_iterator<T, Ptr, Ref> operator++(int) {
+    auto tmp = *this;
+    item = item->next;
+    return tmp;
+  }
+  __list_iterator<T, Ptr, Ref> operator--() { item = item->prev; return *this; }
+  __list_iterator<T, Ptr, Ref> operator--(int) {
+    auto tmp = *this;
+    item = item->prev;
+    return tmp;
+  }
+
+  Ref operator*() const { return item->data; }
+  Ptr operator->() const { return &item->data; }
+
+  bool operator==(const iterator &rhs) const { return item == rhs->item; }
+  bool operator==(const const_iterator &rhs) const { return item == rhs->item; }
+
+  bool operator!=(const iterator &rhs) const { return item != rhs->item; }
+  bool operator!=(const const_iterator &rhs) const { return item != rhs->item; }
+
+  const T* &base() const { return item; }
+
+  template <typename UT, typename UPtr, typename URef>
+  friend struct __list_iterator;
+
+private:
+  T* item;
+};
+
+template <typename T, typename Ptr, typename Ref> struct __fwdl_iterator {
+  typedef __fwdl_iterator<T, __typeof__(T::data) *, __typeof__(T::data) &> iterator;
+  typedef __fwdl_iterator<T, const __typeof__(T::data) *, const __typeof__(T::data) &> const_iterator;
+
+  typedef ptrdiff_t difference_type;
+  typedef T value_type;
+  typedef Ptr pointer;
+  typedef Ref reference;
+  typedef std::forward_iterator_tag iterator_category;
+
+  __fwdl_iterator(T* it = 0) : item(it) {}
+  __fwdl_iterator(const iterator &rhs): item(rhs.item) {}
+  __fwdl_iterator<T, Ptr, Ref>& operator++() { item = item->next; return *this; }
+  __fwdl_iterator<T, Ptr, Ref> operator++(int) {
+    auto tmp = *this;
+    item = item->next;
+    return tmp;
+  }
+  Ref operator*() const { return item->data; }
+  Ptr operator->() const { return &item->data; }
+
+  bool operator==(const iterator &rhs) const { return item == rhs->item; }
+  bool operator==(const const_iterator &rhs) const { return item == rhs->item; }
+
+  bool operator!=(const iterator &rhs) const { return item != rhs->item; }
+  bool operator!=(const const_iterator &rhs) const { return item != rhs->item; }
+
+  const T* &base() const { return item; }
+
+  template <typename UT, typename UPtr, typename URef>
+  friend struct __fwdl_iterator;
+
+private:
+  T* item;
+};
+
+namespace std {
+  template <class T1, class T2>
+  struct pair {
+    T1 first;
+    T2 second;
+
+    pair() : first(), second() {}
+    pair(const T1 &a, const T2 &b) : first(a), second(b) {}
+
+    template<class U1, class U2>
+    pair(const pair<U1, U2> &other) : first(other.first),
+                                      second(other.second) {}
+  };
+
+  typedef __typeof__(sizeof(int)) size_t;
+
+  template <class T> class initializer_list;
+
+  template< class T > struct remove_reference      {typedef T type;};
+  template< class T > struct remove_reference<T&>  {typedef T type;};
+  template< class T > struct remove_reference<T&&> {typedef T type;};
+
+  template<class T>
+  typename remove_reference<T>::type&& move(T&& a) {
+    typedef typename remove_reference<T>::type&& RvalRef;
+    return static_cast<RvalRef>(a);
+  }
+
+  template <class T>
+  void swap(T &a, T &b) {
+    T c(std::move(a));
+    a = std::move(b);
+    b = std::move(c);
+  }
+
+  template<typename T>
+  class vector {
+    T *_start;
+    T *_finish;
+    T *_end_of_storage;
+
+  public:
+    typedef T value_type;
+    typedef size_t size_type;
+    typedef __vector_iterator<T, T *, T &> iterator;
+    typedef __vector_iterator<T, const T *, const T &> const_iterator;
+
+    vector() : _start(0), _finish(0), _end_of_storage(0) {}
+    template <typename InputIterator>
+    vector(InputIterator first, InputIterator last);
+    vector(const vector &other);
+    vector(vector &&other);
+    explicit vector(size_type count);
+    ~vector();
+
+    size_t size() const {
+      return size_t(_finish - _start);
+    }
+    void resize(size_type __sz);
+
+    vector& operator=(const vector &other);
+    vector& operator=(vector &&other);
+    vector& operator=(std::initializer_list<T> ilist);
+
+    void assign(size_type count, const T &value);
+    template <typename InputIterator >
+    void assign(InputIterator first, InputIterator last);
+    void assign(std::initializer_list<T> ilist);
+
+    void clear();
+
+    void push_back(const T &value);
+    void push_back(T &&value);
+    template<class... Args>
+    void emplace_back(Args&&... args);
+    void pop_back();
+
+    iterator insert(const_iterator position, const value_type &val);
+    iterator insert(const_iterator position, size_type n,
+                    const value_type &val);
+    template <typename InputIterator>
+    iterator insert(const_iterator position, InputIterator first,
+                    InputIterator last);
+    iterator insert(const_iterator position, value_type &&val);
+    iterator insert(const_iterator position, initializer_list<value_type> il);
+
+    template <class... Args>
+    iterator emplace(const_iterator position, Args&&... args);
+
+    iterator erase(const_iterator position);
+    iterator erase(const_iterator first, const_iterator last);
+
+    T &operator[](size_t n) {
+      return _start[n];
+    }
+
+    const T &operator[](size_t n) const {
+      return _start[n];
+    }
+
+    iterator begin() { return iterator(_start); }
+    const_iterator begin() const { return const_iterator(_start); }
+    const_iterator cbegin() const { return const_iterator(_start); }
+    iterator end() { return iterator(_finish); }
+    const_iterator end() const { return const_iterator(_finish); }
+    const_iterator cend() const { return const_iterator(_finish); }
+    T& front() { return *begin(); }
+    const T& front() const { return *begin(); }
+    T& back() { return *(end() - 1); }
+    const T& back() const { return *(end() - 1); }
+  };
+
+  template<typename T>
+  class list {
+    struct __item {
+      T data;
+      __item *prev, *next;
+    } *_start, *_finish;
+
+  public:
+    typedef T value_type;
+    typedef size_t size_type;
+    typedef __list_iterator<__item, T *, T &> iterator;
+    typedef __list_iterator<__item, const T *, const T &> const_iterator;
+
+    list() : _start(0), _finish(0) {}
+    template <typename InputIterator>
+    list(InputIterator first, InputIterator last);
+    list(const list &other);
+    list(list &&other);
+    ~list();
+
+    list& operator=(const list &other);
+    list& operator=(list &&other);
+    list& operator=(std::initializer_list<T> ilist);
+
+    void assign(size_type count, const T &value);
+    template <typename InputIterator >
+    void assign(InputIterator first, InputIterator last);
+    void assign(std::initializer_list<T> ilist);
+
+    void clear();
+
+    void push_back(const T &value);
+    void push_back(T &&value);
+    template<class... Args>
+    void emplace_back(Args&&... args);
+    void pop_back();
+
+    void push_front(const T &value);
+    void push_front(T &&value);
+    template<class... Args>
+    void emplace_front(Args&&... args);
+    void pop_front();
+
+    iterator insert(const_iterator position, const value_type &val);
+    iterator insert(const_iterator position, size_type n,
+                    const value_type &val);
+    template <typename InputIterator>
+    iterator insert(const_iterator position, InputIterator first,
+                    InputIterator last);
+    iterator insert(const_iterator position, value_type &&val);
+    iterator insert(const_iterator position, initializer_list<value_type> il);
+
+    template <class... Args>
+    iterator emplace(const_iterator position, Args&&... args);
+
+    iterator erase(const_iterator position);
+    iterator erase(const_iterator first, const_iterator last);
+
+    iterator begin() { return iterator(_start); }
+    const_iterator begin() const { return const_iterator(_start); }
+    const_iterator cbegin() const { return const_iterator(_start); }
+    iterator end() { return iterator(_finish); }
+    const_iterator end() const { return const_iterator(_finish); }
+    const_iterator cend() const { return const_iterator(_finish); }
+
+    T& front() { return *begin(); }
+    const T& front() const { return *begin(); }
+    T& back() { return *--end(); }
+    const T& back() const { return *--end(); }
+  };
+
+  template<typename T>
+  class deque {
+    T *_start;
+    T *_finish;
+    T *_end_of_storage;
+
+  public:
+    typedef T value_type;
+    typedef size_t size_type;
+    typedef __deque_iterator<T, T *, T &> iterator;
+    typedef __deque_iterator<T, const T *, const T &> const_iterator;
+
+    deque() : _start(0), _finish(0), _end_of_storage(0) {}
+    template <typename InputIterator>
+    deque(InputIterator first, InputIterator last);
+    deque(const deque &other);
+    deque(deque &&other);
+    ~deque();
+
+    size_t size() const {
+      return size_t(_finish - _start);
+    }
+
+    deque& operator=(const deque &other);
+    deque& operator=(deque &&other);
+    deque& operator=(std::initializer_list<T> ilist);
+
+    void assign(size_type count, const T &value);
+    template <typename InputIterator >
+    void assign(InputIterator first, InputIterator last);
+    void assign(std::initializer_list<T> ilist);
+
+    void clear();
+
+    void push_back(const T &value);
+    void push_back(T &&value);
+    template<class... Args>
+    void emplace_back(Args&&... args);
+    void pop_back();
+
+    void push_front(const T &value);
+    void push_front(T &&value);
+    template<class... Args>
+    void emplace_front(Args&&... args);
+    void pop_front();
+
+    iterator insert(const_iterator position, const value_type &val);
+    iterator insert(const_iterator position, size_type n,
+                    const value_type &val);
+    template <typename InputIterator>
+    iterator insert(const_iterator position, InputIterator first,
+                    InputIterator last);
+    iterator insert(const_iterator position, value_type &&val);
+    iterator insert(const_iterator position, initializer_list<value_type> il);
+
+    template <class... Args>
+    iterator emplace(const_iterator position, Args&&... args);
+
+    iterator erase(const_iterator position);
+    iterator erase(const_iterator first, const_iterator last);
+
+    T &operator[](size_t n) {
+      return _start[n];
+    }
+
+    const T &operator[](size_t n) const {
+      return _start[n];
+    }
+
+    iterator begin() { return iterator(_start); }
+    const_iterator begin() const { return const_iterator(_start); }
+    const_iterator cbegin() const { return const_iterator(_start); }
+    iterator end() { return iterator(_finish); }
+    const_iterator end() const { return const_iterator(_finish); }
+    const_iterator cend() const { return const_iterator(_finish); }
+    T& front() { return *begin(); }
+    const T& front() const { return *begin(); }
+    T& back() { return *(end() - 1); }
+    const T& back() const { return *(end() - 1); }
+  };
+
+  template<typename T>
+  class forward_list {
+    struct __item {
+      T data;
+      __item *next;
+    } *_start;
+
+  public:
+    typedef T value_type;
+    typedef size_t size_type;
+    typedef __fwdl_iterator<__item, T *, T &> iterator;
+    typedef __fwdl_iterator<__item, const T *, const T &> const_iterator;
+
+    forward_list() : _start(0) {}
+    template <typename InputIterator>
+    forward_list(InputIterator first, InputIterator last);
+    forward_list(const forward_list &other);
+    forward_list(forward_list &&other);
+    ~forward_list();
+
+    forward_list& operator=(const forward_list &other);
+    forward_list& operator=(forward_list &&other);
+    forward_list& operator=(std::initializer_list<T> ilist);
+
+    void assign(size_type count, const T &value);
+    template <typename InputIterator >
+    void assign(InputIterator first, InputIterator last);
+    void assign(std::initializer_list<T> ilist);
+
+    void clear();
+
+    void push_front(const T &value);
+    void push_front(T &&value);
+    template<class... Args>
+    void emplace_front(Args&&... args);
+    void pop_front();
+
+    iterator insert_after(const_iterator position, const value_type &val);
+    iterator insert_after(const_iterator position, value_type &&val);
+    iterator insert_after(const_iterator position, size_type n,
+                          const value_type &val);
+    template <typename InputIterator>
+    iterator insert_after(const_iterator position, InputIterator first,
+                          InputIterator last);
+    iterator insert_after(const_iterator position,
+                          initializer_list<value_type> il);
+
+    template <class... Args>
+    iterator emplace_after(const_iterator position, Args&&... args);
+
+    iterator erase_after(const_iterator position);
+    iterator erase_after(const_iterator first, const_iterator last);
+
+    iterator begin() { return iterator(_start); }
+    const_iterator begin() const { return const_iterator(_start); }
+    const_iterator cbegin() const { return const_iterator(_start); }
+    iterator end() { return iterator(); }
+    const_iterator end() const { return const_iterator(); }
+    const_iterator cend() const { return const_iterator(); }
+
+    T& front() { return *begin(); }
+    const T& front() const { return *begin(); }
+  };
+
+  template <typename CharT>
+  class basic_string {
+    class Allocator {};
+
+  public:
+    basic_string() : basic_string(Allocator()) {}
+    explicit basic_string(const Allocator &alloc);
+    basic_string(size_type count, CharT ch,
+                 const Allocator &alloc = Allocator());
+    basic_string(const basic_string &other,
+                 size_type pos,
+                 const Allocator &alloc = Allocator());
+    basic_string(const basic_string &other,
+                 size_type pos, size_type count,
+                 const Allocator &alloc = Allocator());
+    basic_string(const CharT *s, size_type count,
+                 const Allocator &alloc = Allocator());
+    basic_string(const CharT *s,
+                 const Allocator &alloc = Allocator());
+    template <class InputIt>
+    basic_string(InputIt first, InputIt last,
+                 const Allocator &alloc = Allocator());
+    basic_string(const basic_string &other);
+    basic_string(const basic_string &other,
+                 const Allocator &alloc);
+    basic_string(basic_string &&other);
+    basic_string(basic_string &&other,
+                 const Allocator &alloc);
+    basic_string(std::initializer_list<CharT> ilist,
+                 const Allocator &alloc = Allocator());
+    template <class T>
+    basic_string(const T &t, size_type pos, size_type n,
+                 const Allocator &alloc = Allocator());
+    // basic_string(std::nullptr_t) = delete;
+
+    ~basic_string();
+    void clear();
+
+    basic_string &operator=(const basic_string &str);
+    basic_string &operator+=(const basic_string &str);
+
+    const CharT *c_str() const;
+    const CharT *data() const;
+    CharT *data();
+
+    const char *begin() const;
+    const char *end() const;
+
+    basic_string &append(size_type count, CharT ch);
+    basic_string &assign(size_type count, CharT ch);
+    basic_string &erase(size_type index, size_type count);
+    basic_string &insert(size_type index, size_type count, CharT ch);
+    basic_string &replace(size_type pos, size_type count, const basic_string &str);
+    void pop_back();
+    void push_back(CharT ch);
+    void reserve(size_type new_cap);
+    void resize(size_type count);
+    void shrink_to_fit();
+    void swap(basic_string &other);
+  };
+
+  typedef basic_string<char> string;
+  typedef basic_string<wchar_t> wstring;
+#if __cplusplus >= 201103L
+  typedef basic_string<char16_t> u16string;
+  typedef basic_string<char32_t> u32string;
+#endif
+
+  class exception {
+  public:
+    exception() throw();
+    virtual ~exception() throw();
+    virtual const char *what() const throw() {
+      return 0;
+    }
+  };
+
+  class bad_alloc : public exception {
+    public:
+    bad_alloc() throw();
+    bad_alloc(const bad_alloc&) throw();
+    bad_alloc& operator=(const bad_alloc&) throw();
+    virtual const char* what() const throw() {
+      return 0;
+    }
+  };
+
+  struct nothrow_t {};
+  extern const nothrow_t nothrow;
+
+  enum class align_val_t : size_t {};
+
+  // libc++'s implementation
+  template <class _E>
+  class initializer_list
+  {
+    const _E* __begin_;
+    size_t    __size_;
+
+    initializer_list(const _E* __b, size_t __s)
+      : __begin_(__b),
+        __size_(__s)
+    {}
+
+  public:
+    typedef _E        value_type;
+    typedef const _E& reference;
+    typedef const _E& const_reference;
+    typedef size_t    size_type;
+
+    typedef const _E* iterator;
+    typedef const _E* const_iterator;
+
+    initializer_list() : __begin_(0), __size_(0) {}
+
+    size_t    size()  const {return __size_;}
+    const _E* begin() const {return __begin_;}
+    const _E* end()   const {return __begin_ + __size_;}
+  };
+
+  template <bool, class _Tp = void> struct enable_if {};
+  template <class _Tp> struct enable_if<true, _Tp> {typedef _Tp type;};
+
+  template <class _Tp, _Tp __v>
+  struct integral_constant
+  {
+      static const _Tp      value = __v;
+      typedef _Tp               value_type;
+      typedef integral_constant type;
+
+     operator value_type() const {return value;}
+
+     value_type operator ()() const {return value;}
+  };
+
+  template <class _Tp, _Tp __v>
+  const _Tp integral_constant<_Tp, __v>::value;
+
+    template <class _Tp, class _Arg>
+    struct is_trivially_assignable
+      : integral_constant<bool, __is_trivially_assignable(_Tp, _Arg)>
+    {
+    };
+
+  typedef integral_constant<bool,true>  true_type;
+  typedef integral_constant<bool,false> false_type;
+
+  template <class _Tp> struct is_const            : public false_type {};
+  template <class _Tp> struct is_const<_Tp const> : public true_type {};
+
+  template <class _Tp> struct  is_reference        : public false_type {};
+  template <class _Tp> struct  is_reference<_Tp&>  : public true_type {};
+
+  template <class _Tp, class _Up> struct  is_same           : public false_type {};
+  template <class _Tp>            struct  is_same<_Tp, _Tp> : public true_type {};
+
+  template <class _Tp, bool = is_const<_Tp>::value || is_reference<_Tp>::value    >
+  struct __add_const             {typedef _Tp type;};
+
+  template <class _Tp>
+  struct __add_const<_Tp, false> {typedef const _Tp type;};
+
+  template <class _Tp> struct add_const {typedef typename __add_const<_Tp>::type type;};
+
+  template <class _Tp> struct  remove_const            {typedef _Tp type;};
+  template <class _Tp> struct  remove_const<const _Tp> {typedef _Tp type;};
+
+  template <class _Tp> struct  add_lvalue_reference    {typedef _Tp& type;};
+
+  template <class _Tp> struct is_trivially_copy_assignable
+      : public is_trivially_assignable<typename add_lvalue_reference<_Tp>::type,
+            typename add_lvalue_reference<typename add_const<_Tp>::type>::type> {};
+
+    template<class InputIter, class OutputIter>
+    OutputIter __copy(InputIter II, InputIter IE, OutputIter OI) {
+      while (II != IE)
+        *OI++ = *II++;
+
+      return OI;
+    }
+
+  template <class _Tp, class _Up>
+  inline
+  typename enable_if
+  <
+      is_same<typename remove_const<_Tp>::type, _Up>::value &&
+      is_trivially_copy_assignable<_Up>::value,
+      _Up*
+  >::type __copy(_Tp* __first, _Tp* __last, _Up* __result) {
+      size_t __n = __last - __first;
+
+      if (__n > 0)
+        memmove(__result, __first, __n * sizeof(_Up));
+
+      return __result + __n;
+    }
+
+  template<class InputIter, class OutputIter>
+  OutputIter copy(InputIter II, InputIter IE, OutputIter OI) {
+    return __copy(II, IE, OI);
+  }
+
+  template <class _BidirectionalIterator, class _OutputIterator>
+  inline
+  _OutputIterator
+  __copy_backward(_BidirectionalIterator __first, _BidirectionalIterator __last,
+                  _OutputIterator __result)
+  {
+      while (__first != __last)
+          *--__result = *--__last;
+      return __result;
+  }
+
+  template <class _Tp, class _Up>
+  inline
+  typename enable_if
+  <
+      is_same<typename remove_const<_Tp>::type, _Up>::value &&
+      is_trivially_copy_assignable<_Up>::value,
+      _Up*
+  >::type __copy_backward(_Tp* __first, _Tp* __last, _Up* __result) {
+      size_t __n = __last - __first;
+
+    if (__n > 0)
+    {
+        __result -= __n;
+        memmove(__result, __first, __n * sizeof(_Up));
+    }
+    return __result;
+  }
+
+  template<class InputIter, class OutputIter>
+  OutputIter copy_backward(InputIter II, InputIter IE, OutputIter OI) {
+    return __copy_backward(II, IE, OI);
+  }
+}
+
+template <class BidirectionalIterator, class Distance>
+void __advance(BidirectionalIterator& it, Distance n,
+               std::bidirectional_iterator_tag)
+#if !defined(STD_ADVANCE_INLINE_LEVEL) || STD_ADVANCE_INLINE_LEVEL > 2
+{
+  if (n >= 0) while(n-- > 0) ++it; else while (n++<0) --it;
+}
+#else
+    ;
+#endif
+
+template <class RandomAccessIterator, class Distance>
+void __advance(RandomAccessIterator& it, Distance n,
+               std::random_access_iterator_tag)
+#if !defined(STD_ADVANCE_INLINE_LEVEL) || STD_ADVANCE_INLINE_LEVEL > 2
+{
+  it += n;
+}
+#else
+    ;
+#endif
+
+namespace std {
+
+template <class InputIterator, class Distance>
+void advance(InputIterator& it, Distance n)
+#if !defined(STD_ADVANCE_INLINE_LEVEL) || STD_ADVANCE_INLINE_LEVEL > 1
+{
+  __advance(it, n, typename InputIterator::iterator_category());
+}
+#else
+    ;
+#endif
+
+template <class BidirectionalIterator>
+BidirectionalIterator
+prev(BidirectionalIterator it,
+     typename iterator_traits<BidirectionalIterator>::difference_type n =
+         1)
+#if !defined(STD_ADVANCE_INLINE_LEVEL) || STD_ADVANCE_INLINE_LEVEL > 0
+{
+  advance(it, -n);
+  return it;
+}
+#else
+    ;
+#endif
+
+template <class ForwardIterator>
+ForwardIterator
+next(ForwardIterator it,
+     typename iterator_traits<ForwardIterator>::difference_type n =
+         1)
+#if !defined(STD_ADVANCE_INLINE_LEVEL) || STD_ADVANCE_INLINE_LEVEL > 0
+{
+  advance(it, n);
+  return it;
+}
+#else
+    ;
+#endif
+
+  template <class InputIt, class T>
+  InputIt find(InputIt first, InputIt last, const T& value);
+
+  template <class ExecutionPolicy, class ForwardIt, class T>
+  ForwardIt find(ExecutionPolicy&& policy, ForwardIt first, ForwardIt last,
+                 const T& value);
+
+  template <class InputIt, class UnaryPredicate>
+  InputIt find_if (InputIt first, InputIt last, UnaryPredicate p);
+
+  template <class ExecutionPolicy, class ForwardIt, class UnaryPredicate>
+  ForwardIt find_if (ExecutionPolicy&& policy, ForwardIt first, ForwardIt last,
+                     UnaryPredicate p);
+
+  template <class InputIt, class UnaryPredicate>
+  InputIt find_if_not (InputIt first, InputIt last, UnaryPredicate q);
+
+  template <class ExecutionPolicy, class ForwardIt, class UnaryPredicate>
+  ForwardIt find_if_not (ExecutionPolicy&& policy, ForwardIt first,
+                         ForwardIt last, UnaryPredicate q);
+
+  template <class InputIt, class ForwardIt>
+  InputIt find_first_of(InputIt first, InputIt last,
+                         ForwardIt s_first, ForwardIt s_last);
+
+  template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2>
+  ForwardIt1 find_first_of (ExecutionPolicy&& policy,
+                            ForwardIt1 first, ForwardIt1 last,
+                            ForwardIt2 s_first, ForwardIt2 s_last);
+
+  template <class InputIt, class ForwardIt, class BinaryPredicate>
+  InputIt find_first_of (InputIt first, InputIt last,
+                         ForwardIt s_first, ForwardIt s_last,
+                         BinaryPredicate p );
+
+  template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2,
+            class BinaryPredicate>
+  ForwardIt1 find_first_of (ExecutionPolicy&& policy,
+                            ForwardIt1 first, ForwardIt1 last,
+                            ForwardIt2 s_first, ForwardIt2 s_last,
+                            BinaryPredicate p );
+
+  template <class InputIt, class ForwardIt>
+  InputIt find_end(InputIt first, InputIt last,
+                   ForwardIt s_first, ForwardIt s_last);
+
+  template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2>
+  ForwardIt1 find_end (ExecutionPolicy&& policy,
+                       ForwardIt1 first, ForwardIt1 last,
+                       ForwardIt2 s_first, ForwardIt2 s_last);
+
+  template <class InputIt, class ForwardIt, class BinaryPredicate>
+  InputIt find_end (InputIt first, InputIt last,
+                    ForwardIt s_first, ForwardIt s_last,
+                    BinaryPredicate p );
+
+  template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2,
+            class BinaryPredicate>
+  ForwardIt1 find_end (ExecutionPolicy&& policy,
+                       ForwardIt1 first, ForwardIt1 last,
+                       ForwardIt2 s_first, ForwardIt2 s_last,
+                       BinaryPredicate p );
+
+  template <class ForwardIt, class T>
+  ForwardIt lower_bound (ForwardIt first, ForwardIt last, const T& value);
+
+  template <class ForwardIt, class T, class Compare>
+  ForwardIt lower_bound (ForwardIt first, ForwardIt last, const T& value,
+                         Compare comp);
+
+  template <class ForwardIt, class T>
+  ForwardIt upper_bound (ForwardIt first, ForwardIt last, const T& value);
+
+  template <class ForwardIt, class T, class Compare>
+  ForwardIt upper_bound (ForwardIt first, ForwardIt last, const T& value,
+                         Compare comp);
+
+  template <class ForwardIt1, class ForwardIt2>
+  ForwardIt1 search (ForwardIt1 first, ForwardIt1 last,
+                     ForwardIt2 s_first, ForwardIt2 s_last);
+
+  template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2>
+  ForwardIt1 search (ExecutionPolicy&& policy,
+                     ForwardIt1 first, ForwardIt1 last,
+                     ForwardIt2 s_first, ForwardIt2 s_last);
+
+  template <class ForwardIt1, class ForwardIt2, class BinaryPredicate>
+  ForwardIt1 search (ForwardIt1 first, ForwardIt1 last,
+                     ForwardIt2 s_first, ForwardIt2 s_last, BinaryPredicate p);
+
+  template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2,
+            class BinaryPredicate >
+  ForwardIt1 search (ExecutionPolicy&& policy,
+                     ForwardIt1 first, ForwardIt1 last,
+                     ForwardIt2 s_first, ForwardIt2 s_last, BinaryPredicate p);
+
+  template <class ForwardIt, class Searcher>
+  ForwardIt search (ForwardIt first, ForwardIt last, const Searcher& searcher);
+
+  template <class ForwardIt, class Size, class T>
+  ForwardIt search_n (ForwardIt first, ForwardIt last, Size count,
+                      const T& value);
+
+  template <class ExecutionPolicy, class ForwardIt, class Size, class T>
+  ForwardIt search_n (ExecutionPolicy&& policy, ForwardIt first, ForwardIt last,
+                      Size count, const T& value);
+
+  template <class ForwardIt, class Size, class T, class BinaryPredicate>
+  ForwardIt search_n (ForwardIt first, ForwardIt last, Size count,
+                      const T& value, BinaryPredicate p);
+
+  template <class ExecutionPolicy, class ForwardIt, class Size, class T,
+            class BinaryPredicate>
+  ForwardIt search_n (ExecutionPolicy&& policy, ForwardIt first, ForwardIt last,
+                      Size count, const T& value, BinaryPredicate p);
+
+  template <class InputIterator, class OutputIterator>
+  OutputIterator copy(InputIterator first, InputIterator last,
+                      OutputIterator result);
+
+}
+
+#if __cplusplus >= 201103L
+namespace std {
+template <typename T> // TODO: Implement the stub for deleter.
+class unique_ptr {
+public:
+  unique_ptr() noexcept {}
+  unique_ptr(T *) noexcept {}
+  unique_ptr(const unique_ptr &) noexcept = delete;
+  unique_ptr(unique_ptr &&) noexcept;
+
+  T *get() const noexcept;
+  T *release() noexcept;
+  void reset(T *p = nullptr) noexcept;
+  void swap(unique_ptr<T> &p) noexcept;
+
+  typename std::add_lvalue_reference<T>::type operator*() const;
+  T *operator->() const noexcept;
+  operator bool() const noexcept;
+  unique_ptr<T> &operator=(unique_ptr<T> &&p) noexcept;
+  unique_ptr<T> &operator=(nullptr_t) noexcept;
+};
+
+// TODO :: Once the deleter parameter is added update with additional template parameter.
+template <typename T>
+void swap(unique_ptr<T> &x, unique_ptr<T> &y) noexcept {
+  x.swap(y);
+}
+
+template <typename T1, typename T2>
+bool operator==(const unique_ptr<T1> &x, const unique_ptr<T2> &y);
+
+template <typename T1, typename T2>
+bool operator!=(const unique_ptr<T1> &x, const unique_ptr<T2> &y);
+
+template <typename T1, typename T2>
+bool operator<(const unique_ptr<T1> &x, const unique_ptr<T2> &y);
+
+template <typename T1, typename T2>
+bool operator>(const unique_ptr<T1> &x, const unique_ptr<T2> &y);
+
+template <typename T1, typename T2>
+bool operator<=(const unique_ptr<T1> &x, const unique_ptr<T2> &y);
+
+template <typename T1, typename T2>
+bool operator>=(const unique_ptr<T1> &x, const unique_ptr<T2> &y);
+
+template <typename T>
+bool operator==(const unique_ptr<T> &x, nullptr_t y);
+
+template <typename T>
+bool operator!=(const unique_ptr<T> &x, nullptr_t y);
+
+template <typename T>
+bool operator<(const unique_ptr<T> &x, nullptr_t y);
+
+template <typename T>
+bool operator>(const unique_ptr<T> &x, nullptr_t y);
+
+template <typename T>
+bool operator<=(const unique_ptr<T> &x, nullptr_t y);
+
+template <typename T>
+bool operator>=(const unique_ptr<T> &x, nullptr_t y);
+
+template <typename T>
+bool operator==(nullptr_t x, const unique_ptr<T> &y);
+
+template <typename T>
+bool operator!=(nullptr_t x, const unique_ptr<T> &y);
+
+template <typename T>
+bool operator>(nullptr_t x, const unique_ptr<T> &y);
+
+template <typename T>
+bool operator<(nullptr_t x, const unique_ptr<T> &y);
+
+template <typename T>
+bool operator>=(nullptr_t x, const unique_ptr<T> &y);
+
+template <typename T>
+bool operator<=(nullptr_t x, const unique_ptr<T> &y);
+
+template <class T, class... Args>
+unique_ptr<T> make_unique(Args &&...args);
+
+#if __cplusplus >= 202002L
+
+template <class T>
+unique_ptr<T> make_unique_for_overwrite();
+
+#endif
+
+} // namespace std
+#endif
+
+namespace std {
+template <class CharT>
+class basic_ostream;
+
+using ostream = basic_ostream<char>;
+
+extern std::ostream cout;
+
+ostream &operator<<(ostream &, const string &);
+
+#if __cplusplus >= 202002L
+template <class T>
+ostream &operator<<(ostream &, const std::unique_ptr<T> &);
+#endif
+} // namespace std
+
+#ifdef TEST_INLINABLE_ALLOCATORS
+namespace std {
+  void *malloc(size_t);
+  void free(void *);
+}
+void* operator new(std::size_t size, const std::nothrow_t&) throw() { return std::malloc(size); }
+void* operator new[](std::size_t size, const std::nothrow_t&) throw() { return std::malloc(size); }
+void operator delete(void* ptr, const std::nothrow_t&) throw() { std::free(ptr); }
+void operator delete[](void* ptr, const std::nothrow_t&) throw() { std::free(ptr); }
+#else
+// C++20 standard draft 17.6.1, from "Header <new> synopsis", but with throw()
+// instead of noexcept:
+
+void *operator new(std::size_t size);
+void *operator new(std::size_t size, std::align_val_t alignment);
+void *operator new(std::size_t size, const std::nothrow_t &) throw();
+void *operator new(std::size_t size, std::align_val_t alignment,
+                   const std::nothrow_t &) throw();
+void operator delete(void *ptr) throw();
+void operator delete(void *ptr, std::size_t size) throw();
+void operator delete(void *ptr, std::align_val_t alignment) throw();
+void operator delete(void *ptr, std::size_t size, std::align_val_t alignment) throw();
+void operator delete(void *ptr, const std::nothrow_t &)throw();
+void operator delete(void *ptr, std::align_val_t alignment,
+                     const std::nothrow_t &)throw();
+void *operator new[](std::size_t size);
+void *operator new[](std::size_t size, std::align_val_t alignment);
+void *operator new[](std::size_t size, const std::nothrow_t &) throw();
+void *operator new[](std::size_t size, std::align_val_t alignment,
+                     const std::nothrow_t &) throw();
+void operator delete[](void *ptr) throw();
+void operator delete[](void *ptr, std::size_t size) throw();
+void operator delete[](void *ptr, std::align_val_t alignment) throw();
+void operator delete[](void *ptr, std::size_t size, std::align_val_t alignment) throw();
+void operator delete[](void *ptr, const std::nothrow_t &) throw();
+void operator delete[](void *ptr, std::align_val_t alignment,
+                       const std::nothrow_t &) throw();
+#endif
+
+void* operator new (std::size_t size, void* ptr) throw() { return ptr; };
+void* operator new[] (std::size_t size, void* ptr) throw() { return ptr; };
+void operator delete (void* ptr, void*) throw() {};
+void operator delete[] (void* ptr, void*) throw() {};
+
+namespace __cxxabiv1 {
+extern "C" {
+extern char *__cxa_demangle(const char *mangled_name,
+                            char *output_buffer,
+                            size_t *length,
+                            int *status);
+}}
+namespace abi = __cxxabiv1;
+
+namespace std {
+  template<class ForwardIt>
+  bool is_sorted(ForwardIt first, ForwardIt last);
+
+  template <class RandomIt>
+  void nth_element(RandomIt first, RandomIt nth, RandomIt last);
+
+  template<class RandomIt>
+  void partial_sort(RandomIt first, RandomIt middle, RandomIt last);
+
+  template<class RandomIt>
+  void sort (RandomIt first, RandomIt last);
+
+  template<class RandomIt>
+  void stable_sort(RandomIt first, RandomIt last);
+
+  template<class BidirIt, class UnaryPredicate>
+  BidirIt partition(BidirIt first, BidirIt last, UnaryPredicate p);
+
+  template<class BidirIt, class UnaryPredicate>
+  BidirIt stable_partition(BidirIt first, BidirIt last, UnaryPredicate p);
+}
+
+namespace std {
+
+template< class T = void >
+struct less;
+
+template< class T >
+struct allocator;
+
+template< class Key >
+struct hash;
+
+template<
+  class Key,
+  class Compare = std::less<Key>,
+  class Alloc = std::allocator<Key>
+> class set {
+  public:
+    set(initializer_list<Key> __list) {}
+
+    class iterator {
+    public:
+      iterator(Key *key): ptr(key) {}
+      iterator& operator++() { ++ptr; return *this; }
+      bool operator!=(const iterator &other) const { return ptr != other.ptr; }
+      const Key &operator*() const { return *ptr; }
+    private:
+      Key *ptr;
+    };
+
+  public:
+    Key *val;
+    iterator begin() const { return iterator(val); }
+    iterator end() const { return iterator(val + 1); }
+};
+
+template<
+  class Key,
+  class Hash = std::hash<Key>,
+  class Compare = std::less<Key>,
+  class Alloc = std::allocator<Key>
+> class unordered_set {
+  public:
+    unordered_set(initializer_list<Key> __list) {}
+
+    class iterator {
+    public:
+      iterator(Key *key): ptr(key) {}
+      iterator& operator++() { ++ptr; return *this; }
+      bool operator!=(const iterator &other) const { return ptr != other.ptr; }
+      const Key &operator*() const { return *ptr; }
+    private:
+      Key *ptr;
+    };
+
+  public:
+    Key *val;
+    iterator begin() const { return iterator(val); }
+    iterator end() const { return iterator(val + 1); }
+};
+
+namespace execution {
+class sequenced_policy {};
+}
+
+template <class T = void> struct equal_to {};
+
+template <class ForwardIt, class BinaryPredicate = std::equal_to<> >
+class default_searcher {
+public:
+  default_searcher (ForwardIt pat_first,
+                    ForwardIt pat_last,
+                    BinaryPredicate pred = BinaryPredicate());
+  template <class ForwardIt2>
+  std::pair <ForwardIt2, ForwardIt2>
+  operator()( ForwardIt2 first, ForwardIt2 last ) const;
+};
+
+template <typename> class packaged_task;
+template <typename Ret, typename... Args> class packaged_task<Ret(Args...)> {
+  // TODO: Add some actual implementation.
+};
+
+#if __has_feature(cxx_decltype)
+typedef decltype(nullptr) nullptr_t;
+
+template<class _Tp>
+class shared_ptr
+{
+public:
+  constexpr shared_ptr(nullptr_t);
+  explicit shared_ptr(_Tp* __p);
+
+  shared_ptr(shared_ptr&& __r) { }
+
+  ~shared_ptr();
+
+  // shared_ptr& operator=(shared_ptr&& __r);
+  shared_ptr<_Tp>& operator=(const shared_ptr& __r) noexcept
+  {
+      return *this;
+  }
+
+  template<class _Yp>
+  shared_ptr<_Tp>& operator=(const shared_ptr<_Yp>& __r) noexcept
+  {
+      return *this;
+  }
+
+  shared_ptr<_Tp>& operator=(shared_ptr&& __r) noexcept
+  {
+      return *this;
+  }
+
+  template<class _Yp>
+  shared_ptr<_Tp>& operator=(shared_ptr<_Yp>&& __r)
+  {
+      return *this;
+  }
+};
+
+template<class _Tp>
+inline
+constexpr
+shared_ptr<_Tp>::shared_ptr(nullptr_t) {
+}
+
+#endif // __has_feature(cxx_decltype)
+
+template <typename T, typename... Args>
+  shared_ptr<T> make_shared(Args &&...args) {
+    return shared_ptr<T>(new T(static_cast<Args &&>(args)...));
+  }
+
+template<typename T, unsigned N> struct array {
+  T arr[N];
+  typedef T value_type;
+  typedef value_type* iterator;
+  constexpr iterator begin() { return iterator(arr); }
+  constexpr iterator end() { return iterator(arr + N); }
+};
+
+} // namespace std
diff --git a/clang/test/CIR/Lowering/OpenMP/barrier.cir b/clang/test/CIR/Lowering/OpenMP/barrier.cir
new file mode 100644
index 000000000000..52fee8fff6c1
--- /dev/null
+++ b/clang/test/CIR/Lowering/OpenMP/barrier.cir
@@ -0,0 +1,15 @@
+
+// RUN: cir-translate %s -cir-to-llvmir | FileCheck %s
+
+
+module {
+  cir.func @omp_barrier_1() {
+    omp.barrier
+    cir.return
+  }
+}
+
+// CHECK: define void @omp_barrier_1()
+// CHECK: call i32 @__kmpc_global_thread_num(ptr {{.*}})
+// CHECK: call void @__kmpc_barrier(ptr {{.*}}, i32 {{.*}})
+// CHECK: ret void
diff --git a/clang/test/CIR/Lowering/OpenMP/parallel.cir b/clang/test/CIR/Lowering/OpenMP/parallel.cir
new file mode 100644
index 000000000000..da98868eddb1
--- /dev/null
+++ b/clang/test/CIR/Lowering/OpenMP/parallel.cir
@@ -0,0 +1,35 @@
+// RUN: cir-translate %s -cir-to-llvmir | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+module {
+    cir.func @omp_parallel() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<0> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    omp.parallel {
+      cir.scope {
+        %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+        %3 = cir.const #cir.int<1> : !s32i
+        cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+        %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.const #cir.int<1> : !s32i
+        %6 = cir.binop(add, %4, %5) : !s32i
+        cir.store %6, %0 : !s32i, !cir.ptr<!s32i>
+      }
+      omp.terminator
+    }
+    cir.return
+  }
+}
+// CHECK-LABEL: omp_parallel
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call({{.*}}, ptr @omp_parallel..omp_par,
+// CHECK: ret void
+// CHECK-NEXT: }
+// CHECK: define{{.*}} void @omp_parallel..omp_par(ptr
+// CHECK: %[[YVar:.*]] = load ptr, ptr %{{.*}}, align 8
+// CHECK: %[[XVar:.*]] = alloca i32, i64 1, align 4
+// CHECK: store i32 1, ptr %[[XVar]], align 4
+// CHECK: %[[XVal:.*]] = load i32, ptr %[[XVar]], align 4
+// CHECK: %[[BinOp:.*]] = add i32 %[[XVal]], 1
+// CHECK: store i32 %[[BinOp]], ptr %[[YVar]], align 4
+// CHECK: ret
diff --git a/clang/test/CIR/Lowering/OpenMP/taskwait.cir b/clang/test/CIR/Lowering/OpenMP/taskwait.cir
new file mode 100644
index 000000000000..336bbda4f1bf
--- /dev/null
+++ b/clang/test/CIR/Lowering/OpenMP/taskwait.cir
@@ -0,0 +1,14 @@
+// RUN: cir-translate %s -cir-to-llvmir | FileCheck %s
+
+
+module {
+  cir.func @omp_taskwait_1() {
+    omp.taskwait
+    cir.return
+  }
+}
+
+// CHECK: define void @omp_taskwait_1()
+// CHECK: call i32 @__kmpc_global_thread_num(ptr {{.*}})
+// CHECK: call i32 @__kmpc_omp_taskwait(ptr {{.*}}, i32 {{.*}})
+// CHECK: ret void
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/OpenMP/taskyield.cir b/clang/test/CIR/Lowering/OpenMP/taskyield.cir
new file mode 100644
index 000000000000..5104e9c31be1
--- /dev/null
+++ b/clang/test/CIR/Lowering/OpenMP/taskyield.cir
@@ -0,0 +1,14 @@
+// RUN: cir-translate %s -cir-to-llvmir | FileCheck %s
+
+
+module {
+  cir.func @omp_taskyield_1() {
+    omp.taskyield
+    cir.return
+  }
+}
+
+// CHECK: define void @omp_taskyield_1()
+// CHECK: call i32 @__kmpc_global_thread_num(ptr {{.*}})
+// CHECK: call i32 @__kmpc_omp_taskyield(ptr {{.*}}, i32 {{.*}}, i32 {{.*}})
+// CHECK: ret void
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/array.cir b/clang/test/CIR/Lowering/ThroughMLIR/array.cir
new file mode 100644
index 000000000000..dc1eb97c80b3
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/array.cir
@@ -0,0 +1,17 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["a"] {alignment = 16 : i64}
+    cir.return
+  }
+}
+
+// CHECK: module {
+// CHECK: func @foo() {
+// CHECK:    = memref.alloca() {alignment = 16 : i64} : memref<memref<10xi32>>
+// CHECK:    return
+// CHECK:  }
+// CHECK: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/binop.cpp b/clang/test/CIR/Lowering/ThroughMLIR/binop.cpp
new file mode 100644
index 000000000000..7f3c8b98cfd1
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/binop.cpp
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+void testSignedIntBinOps(int a, int b) {
+  int x = a * b;
+  x = x / b;
+  x = x % b;
+  x = x + b;
+  x = x - b;
+  x = x >> b;
+  x = x << b;
+  x = x & b;
+  x = x ^ b;
+  x = x | b;
+}
+
+// CHECK: func.func @_Z19testSignedIntBinOpsii
+// CHECK:   %[[VAR2:.*]] = arith.muli %[[VAR0:.*]], %[[VAR1:.*]] : i32
+// CHECK:   %[[VAR5:.*]] = arith.divsi %[[VAR3:.*]], %[[VAR4:.*]] : i32
+// CHECK:   %[[VAR8:.*]] = arith.remsi %[[VAR6:.*]], %[[VAR7:.*]] : i32
+// CHECK:   %[[VAR11:.*]] = arith.addi %[[VAR9:.*]], %[[VAR10:.*]] : i32
+// CHECK:   %[[VAR14:.*]] = arith.subi %[[VAR12:.*]], %[[VAR13:.*]] : i32
+// CHECK:   %[[VAR18:.*]] = arith.shrsi %[[VAR15:.*]], %[[VAR16:.*]] : i32
+// CHECK:   %[[VAR22:.*]] = arith.shli %[[VAR19:.*]], %[[VAR20:.*]] : i32
+// CHECK:   %[[VAR25:.*]] = arith.andi %[[VAR23:.*]], %[[VAR24:.*]] : i32
+// CHECK:   %[[VAR28:.*]] = arith.xori %[[VAR26:.*]], %[[VAR27:.*]] : i32
+// CHECK:   %[[VAR31:.*]] = arith.ori %[[VAR29:.*]], %[[VAR30:.*]] : i32
+// CHECK: }
+
+void testUnSignedIntBinOps(unsigned a, unsigned b) {
+  unsigned x = a * b;
+  x = x / b;
+  x = x % b;
+  x = x + b;
+  x = x - b;
+  x = x >> b;
+  x = x << b;
+  x = x & b;
+  x = x ^ b;
+  x = x | b;
+}
+
+// CHECK: func.func @_Z21testUnSignedIntBinOpsjj
+// CHECK:   %[[VAR2:.*]] = arith.muli %[[VAR0:.*]], %[[VAR1:.*]] : i32
+// CHECK:   %[[VAR5:.*]] = arith.divui %[[VAR3:.*]], %[[VAR4:.*]] : i32
+// CHECK:   %[[VAR8:.*]] = arith.remui %[[VAR6:.*]], %[[VAR7:.*]] : i32
+// CHECK:   %[[VAR11:.*]] = arith.addi %[[VAR9:.*]], %[[VAR10:.*]] : i32
+// CHECK:   %[[VAR14:.*]] = arith.subi %[[VAR12:.*]], %[[VAR13:.*]] : i32
+// CHECK:   %[[VAR18:.*]] = arith.shrui %[[VAR15:.*]], %[[VAR16:.*]] : i32
+// CHECK:   %[[VAR22:.*]] = arith.shli %[[VAR19:.*]], %[[VAR20:.*]] : i32
+// CHECK:   %[[VAR25:.*]] = arith.andi %[[VAR23:.*]], %[[VAR24:.*]] : i32
+// CHECK:   %[[VAR28:.*]] = arith.xori %[[VAR26:.*]], %[[VAR27:.*]] : i32
+// CHECK:   %[[VAR31:.*]] = arith.ori %[[VAR29:.*]], %[[VAR30:.*]] : i32
+// CHECK: }
+
+void testFloatingPointBinOps(float a, float b, double c, double d) {
+  float e = a * b;
+  e = a / b;
+  e = a + b;
+  e = a - b;
+
+  double f = a * b;
+  f = c * d;
+  f = c / d;
+  f = c + d;
+  f = c - d;
+}
+
+// CHECK: func.func @_Z23testFloatingPointBinOpsffdd
+// CHECK:   %[[VAR2:.*]] = arith.mulf %[[VAR0:.*]], %[[VAR1:.*]] : f32
+// CHECK:   %[[VAR5:.*]] = arith.divf %[[VAR3:.*]], %[[VAR4:.*]] : f32
+// CHECK:   %[[VAR8:.*]] = arith.addf %[[VAR6:.*]], %[[VAR7:.*]] : f32
+// CHECK:   %[[VAR11:.*]] = arith.subf %[[VAR9:.*]], %[[VAR10:.*]] : f32
+// CHECK:   %[[VAR14:.*]] = arith.mulf %[[VAR12:.*]], %[[VAR13:.*]] : f64
+// CHECK:   %[[VAR18:.*]] = arith.divf %[[VAR16:.*]], %[[VAR17:.*]] : f64
+// CHECK:   %[[VAR22:.*]] = arith.addf %[[VAR20:.*]], %[[VAR21:.*]] : f64
+// CHECK:   %[[VAR26:.*]] = arith.subf %[[VAR24:.*]], %[[VAR25:.*]] : f64
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/bit.c b/clang/test/CIR/Lowering/ThroughMLIR/bit.c
new file mode 100644
index 000000000000..7d21f991215a
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/bit.c
@@ -0,0 +1,153 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+int clz_u16(unsigned short x) {
+  return __builtin_clzs(x);   
+}
+// CHECK: func.func @clz_u16(%arg0: i16{{.*}}) -> i32 {
+// CHECK: %[[CTLZ:.+]] = math.ctlz %[[INPUT:.+]] : i16
+// CHECK: %[[EXTUI:.+]] = arith.extui %[[CTLZ]] : i16 to i32
+// CHECK: }
+
+int clz_u32(unsigned x) {
+  return __builtin_clz(x);
+}
+// CHECK: func.func @clz_u32(%arg0: i32{{.*}}) -> i32 {
+// CHECK:   %[[CTLZ:.+]] = math.ctlz %[[INPUT:.+]] : i32
+// CHECK:   %[[BITCAST:.+]] = arith.bitcast %[[CTLZ]] : i32 to i32
+// CHECK: }
+
+int clz_u64(unsigned long x) {
+  return __builtin_clzl(x);
+}
+// CHECK: func.func @clz_u64(%arg0: i64{{.*}}) -> i32 {
+// CHECK:   %[[CTLZ:.+]] = math.ctlz %[[INPUT:.+]] : i64
+// CHECK:   %[[TRUNCI:.+]] = arith.trunci %[[CTLZ]] : i64 to i32
+// CHECK: }
+
+int ctz_u16(unsigned short x) {
+  return __builtin_ctzs(x);   
+}
+// CHECK: func.func @ctz_u16(%arg0: i16{{.*}}) -> i32 {
+// CHECK:   %[[CTTZ:.+]] = math.cttz %[[INPUT:.+]] : i16
+// CHECK:   %[[EXTUI:.+]] = arith.extui %[[CTTZ]] : i16 to i32
+// CHECK: }
+
+int ctz_u32(unsigned x) {
+  return __builtin_ctz(x);   
+}
+// CHECK: func.func @ctz_u32(%arg0: i32{{.*}}) -> i32 {
+// CHECK:   %[[CTTZ:.+]] = math.cttz %[[INPUT:.+]] : i32
+// CHECK:   %[[BITCAST:.+]] = arith.bitcast %[[CTTZ]] : i32 to i32
+// CHECK: }
+
+int ctz_u64(unsigned long x) {
+  return __builtin_ctzl(x);   
+}
+// CHECK: func.func @ctz_u64(%arg0: i64{{.*}}) -> i32 {
+// CHECK:   %[[CTTZ:.+]] = math.cttz %[[INPUT:.+]] : i64
+// CHECK:   %[[TRUNCI:.+]] = arith.trunci %[[CTTZ]] : i64 to i32
+// CHECK: }
+
+int popcount_u16(unsigned short x) {
+  return __builtin_popcount(x);
+}
+// CHECK: func.func @popcount_u16(%arg0: i16{{.*}}) -> i32 {
+// CHECK:   %[[EXTUI:.+]] = arith.extui %[[INPUT:.+]] : i16 to i32
+// CHECK:   %[[CTPOP:.+]] = math.ctpop %[[EXTUI]] : i32
+// CHECK:   %[[BITCAST:.+]] = arith.bitcast %[[CTPOP]] : i32 to i32
+// CHECK: }
+
+int popcount_u32(unsigned x) {
+  return __builtin_popcount(x);
+}
+// CHECK: func.func @popcount_u32(%arg0: i32{{.*}}) -> i32 {
+// CHECK:   %[[CTPOP:.+]] = math.ctpop %[[INPUT:.+]] : i32
+// CHECK:   %[[BITCAST:.+]] = arith.bitcast %[[CTPOP]] : i32 to i32
+// CHECK: }
+
+int popcount_u64(unsigned long x) {
+  return __builtin_popcountl(x);
+}
+// CHECK: func.func @popcount_u64(%arg0: i64{{.*}}) -> i32 {
+// CHECK:   %[[CTPOP:.+]] = math.ctpop %[[INPUT:.+]] : i64
+// CHECK:   %[[TRUNCI:.+]] = arith.trunci %[[CTPOP]] : i64 to i32
+// CHECK: }
+
+int clrsb_s32(int x) {
+  return __builtin_clrsb(x);
+}
+// CHECK: func.func @clrsb_s32(%arg0: i32{{.*}}) -> i32 {
+// CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
+// CHECK:   %[[CMP:.+]] = arith.cmpi slt, %[[INPUT:.+]], %[[C0_I32]] : i32
+// CHECK:   %[[C_MINUS1_I32:.+]] = arith.constant -1 : i32
+// CHECK:   %[[XORI:.+]] = arith.xori %[[INPUT]], %[[C_MINUS1_I32]] : i32
+// CHECK:   %[[SELECT:.+]] = arith.select %[[CMP]], %[[XORI]], %[[INPUT]] : i32
+// CHECK:   %[[CTLZ:.+]] = math.ctlz %[[SELECT]] : i32
+// CHECK:   %[[BITCAST:.+]] = arith.bitcast %[[CTLZ]] : i32 to i32 
+// CHECK:   %[[C1_I32:.+]] = arith.constant 1 : i32
+// CHECK:   %[[SUBI:.+]] = arith.subi %[[BITCAST]], %[[C1_I32]] : i32
+// CHECK: }
+
+int clrsb_s64(long x) {
+  return __builtin_clrsbl(x);
+}
+// CHECK: func.func @clrsb_s64(%arg0: i64{{.*}}) -> i32 {
+// CHECK:   %[[C0_I64:.+]] = arith.constant 0 : i64
+// CHECK:   %[[CMP:.+]] = arith.cmpi slt, %[[INPUT:.+]], %[[C0_I64]] : i64
+// CHECK:   %[[C_MINUS1_I64:.+]] = arith.constant -1 : i64
+// CHECK:   %[[XORI:.+]] = arith.xori %[[INPUT]], %[[C_MINUS1_I64]] : i64
+// CHECK:   %[[SELECT:.+]] = arith.select %[[CMP]], %[[XORI]], %[[INPUT]] : i64
+// CHECK:   %[[CTLZ:.+]] = math.ctlz %[[SELECT]] : i64
+// CHECK:   %[[TRUNCI:.+]] = arith.trunci %[[CTLZ]] : i64 to i32
+// CHECK:   %[[C1_I32:.+]] = arith.constant 1 : i32
+// CHECK:   %[[SUBI:.+]] = arith.subi %[[TRUNCI]], %[[C1_I32]] : i32
+// CHECK: }
+
+int ffs_s32(int x) {
+  return __builtin_ffs(x);
+}
+// CHECK: func.func @ffs_s32(%arg0: i32{{.*}}) -> i32 {
+// CHECK:   %[[CTTZ:.+]] = math.cttz %[[INPUT:.+]] : i32
+// CHECK-NEXT:   %[[BITCAST:.+]] = arith.bitcast %[[CTTZ]] : i32 to i32
+// CHECK-NEXT:   %[[C1_I32:.+]] = arith.constant 1 : i32
+// CHECK-NEXT:   %[[ADDI:.+]] = arith.addi %[[BITCAST]], %[[C1_I32]] : i32
+// CHECK-NEXT:   %[[C0_I32:.+]] = arith.constant 0 : i32
+// CHECK-NEXT:   %[[CMPI:.+]] = arith.cmpi eq, %[[INPUT]], %[[C0_I32]] : i32
+// CHECK-NEXT:   %[[C0_I32_1:.+]] = arith.constant 0 : i32
+// CHECK-NEXT:   %[[SELECT:.+]] = arith.select %[[CMPI]], %[[C0_I32_1]], %[[ADDI]] : i32
+// CHECK: }
+
+int ffs_s64(long x) {
+  return __builtin_ffsl(x);
+}
+// CHECK: func.func @ffs_s64(%arg0: i64{{.*}}) -> i32 {
+// CHECK:   %[[CTTZ:.+]] = math.cttz %[[INPUT:.+]] : i64
+// CHECK-NEXT:   %[[TRUNCI:.+]] = arith.trunci %[[CTTZ]] : i64 to i32
+// CHECK-NEXT:   %[[C1_I32:.+]] = arith.constant 1 : i32
+// CHECK-NEXT:   %[[ADDI:.+]] = arith.addi %[[TRUNCI]], %[[C1_I32]] : i32
+// CHECK-NEXT:   %[[C0_I64:.+]] = arith.constant 0 : i64
+// CHECK-NEXT:   %[[CMPI:.+]] = arith.cmpi eq, %[[INPUT]], %[[C0_I64]] : i64
+// CHECK-NEXT:   %[[C0_I32:.+]] = arith.constant 0 : i32
+// CHECK-NEXT:   %[[SELECT:.+]] = arith.select %[[CMPI]], %[[C0_I32]], %[[ADDI]] : i32
+// CHECK: }
+
+int parity_u32(unsigned x) {
+  return __builtin_parity(x);
+}
+// CHECK: func.func @parity_u32(%arg0: i32{{.*}}) -> i32 {
+// CHECK:   %[[CTPOP:.+]] = math.ctpop %[[INPUT:.+]] : i32
+// CHECK-NEXT:   %[[C1_I32:.+]] = arith.constant 1 : i32
+// CHECK-NEXT:   %[[ANDI:.+]] = arith.andi %[[CTPOP]], %[[C1_I32]] : i32
+// CHECK-NEXT:   %[[BITCAST:.+]] = arith.bitcast %[[ANDI]] : i32 to i32
+// CHECK: }
+
+int parity_u64(unsigned long x) {
+  return __builtin_parityl(x);
+}
+// CHECK: func.func @parity_u64(%arg0: i64{{.*}}) -> i32 {
+// CHECK:   %[[CTPOP:.+]] = math.ctpop %[[INPUT:.+]] : i64
+// CHECK-NEXT:   %[[C1_I64:.+]] = arith.constant 1 : i64
+// CHECK-NEXT:   %[[ANDI:.+]] = arith.andi %[[CTPOP]], %[[C1_I64]] : i64
+// CHECK-NEXT:   %[[TRUNCI:.+]] = arith.trunci %[[ANDI]] : i64 to i32
+// CHECK: }
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/bool.cir b/clang/test/CIR/Lowering/ThroughMLIR/bool.cir
new file mode 100644
index 000000000000..408cac97ee41
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/bool.cir
@@ -0,0 +1,23 @@
+// RUN: cir-opt %s -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm -o - | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["a", init] {alignment = 1 : i64}
+    %1 = cir.const #true
+    cir.store %1, %0 : !cir.bool, !cir.ptr<!cir.bool>
+    cir.return
+  }
+}
+
+// MLIR: func @foo() {
+// MLIR: [[Value:%[a-z0-9]+]] = memref.alloca() {alignment = 1 : i64} : memref<i8>
+// MLIR: = arith.constant 1 : i8
+// MLIR: memref.store {{.*}}, [[Value]][] : memref<i8>
+// return
+
+// LLVM: = alloca i8, i64
+// LLVM: store i8 1, ptr %5
+// LLVM: ret
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/branch.cir b/clang/test/CIR/Lowering/ThroughMLIR/branch.cir
new file mode 100644
index 000000000000..2b78484627d5
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/branch.cir
@@ -0,0 +1,37 @@
+// RUN: cir-opt %s -cir-to-mlir | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+cir.func @foo(%arg0: !cir.bool) -> !s32i {
+  cir.brcond %arg0 ^bb1, ^bb2
+  ^bb1:
+    %0 = cir.const #cir.int<1>: !s32i
+    cir.return %0 : !s32i
+  ^bb2:
+    %1 = cir.const #cir.int<0>: !s32i
+    cir.return %1 : !s32i
+}
+
+//      MLIR: module {
+// MLIR-NEXT:   func.func @foo(%arg0: i8) -> i32
+// MLIR-NEXT:     %0 = arith.trunci %arg0 : i8 to i1
+// MLIR-NEXT:     cf.cond_br %0, ^bb1, ^bb2
+// MLIR-NEXT:   ^bb1:  // pred: ^bb0
+// MLIR-NEXT:     %c1_i32 = arith.constant 1 : i32
+// MLIR-NEXT:     return %c1_i32 : i32
+// MLIR-NEXT:   ^bb2:  // pred: ^bb0
+// MLIR-NEXT:     %c0_i32 = arith.constant 0 : i32
+// MLIR-NEXT:     return %c0_i32 : i32
+// MLIR-NEXT:   }
+// MLIR-NEXT: }
+
+//       LLVM: define i32 @foo(i8 %0)
+//  LLVM-NEXT:   %2 = trunc i8 %0 to i1
+//  LLVM-NEXT:   br i1 %2, label %3, label %4
+// LLVM-EMPTY:
+//  LLVM-NEXT: 3:                                                ; preds = %1
+//  LLVM-NEXT:   ret i32 1
+// LLVM-EMPTY:
+//  LLVM-NEXT: 4:                                                ; preds = %1
+//  LLVM-NEXT:   ret i32 0
+//  LLVM-NEXT: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/call.c b/clang/test/CIR/Lowering/ThroughMLIR/call.c
new file mode 100644
index 000000000000..a325db5f2dd4
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/call.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+void foo(int i) {}
+
+int test(void) {
+  foo(2);
+  return 0;
+}
+
+// CHECK-LABEL: func.func @test() -> i32 {
+//       CHECK:   %[[ARG:.+]] = arith.constant 2 : i32
+//  CHECK-NEXT:   call @foo(%[[ARG]]) : (i32) -> ()
+//       CHECK: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/cast.cir b/clang/test/CIR/Lowering/ThroughMLIR/cast.cir
new file mode 100644
index 000000000000..18452a456880
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/cast.cir
@@ -0,0 +1,147 @@
+// RUN: cir-opt %s -cir-to-mlir | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+!s16i = !cir.int<s, 16>
+!u32i = !cir.int<u, 32>
+!u16i = !cir.int<u, 16>
+!u8i  = !cir.int<u, 8>
+module {
+  // MLIR-LABEL: func.func @cast_int_to_bool(%arg0: i32) -> i8
+  // LLVM-LABEL: define i8 @cast_int_to_bool(i32 %0)
+  cir.func @cast_int_to_bool(%i : !u32i) -> !cir.bool {
+    // MLIR-NEXT: %[[ZERO:.*]] = arith.constant 0 : i32
+    // MLIR-NEXT: arith.cmpi ne, %arg0, %[[ZERO]]
+    // LLVM-NEXT: icmp ne i32 %0, 0
+
+    %1 = cir.cast(int_to_bool, %i : !u32i), !cir.bool
+    cir.return %1 : !cir.bool
+  }
+  // MLIR-LABEL: func.func @cast_integral_trunc(%arg0: i32) -> i16
+  // LLVM-LABEL: define i16 @cast_integral_trunc(i32 %0)
+  cir.func @cast_integral_trunc(%i : !u32i) -> !u16i {
+    // MLIR-NEXT: arith.trunci %arg0 : i32 to i16
+    // LLVM-NEXT: trunc i32 %0 to i16
+
+    %1 = cir.cast(integral, %i : !u32i), !u16i
+    cir.return %1 : !u16i
+  }
+  // MLIR-LABEL: func.func @cast_integral_extu(%arg0: i16) -> i32
+  // LLVM-LABEL: define i32 @cast_integral_extu(i16 %0)
+  cir.func @cast_integral_extu(%i : !u16i) -> !u32i {
+    // MLIR-NEXT: arith.extui %arg0 : i16 to i32
+    // LLVM-NEXT: zext i16 %0 to i32
+
+    %1 = cir.cast(integral, %i : !u16i), !u32i
+    cir.return %1 : !u32i
+  }
+  // MLIR-LABEL: func.func @cast_integral_exts(%arg0: i16) -> i32
+  // LLVM-LABEL: define i32 @cast_integral_exts(i16 %0)
+  cir.func @cast_integral_exts(%i : !s16i) -> !s32i {
+    // MLIR-NEXT: arith.extsi %arg0 : i16 to i32
+    // LLVM-NEXT: sext i16 %0 to i32
+
+    %1 = cir.cast(integral, %i : !s16i), !s32i
+    cir.return %1 : !s32i
+  }
+  // MLIR-LABEL: func.func @cast_integral_same_size(%arg0: i32) -> i32
+  // LLVM-LABEL: define i32 @cast_integral_same_size(i32 %0)
+  cir.func @cast_integral_same_size(%i : !u32i) -> !s32i {
+    // MLIR-NEXT: %0 = arith.bitcast %arg0 : i32 to i32
+    // LLVM-NEXT: ret i32 %0
+
+    %1 = cir.cast(integral, %i : !u32i), !s32i
+    cir.return %1 : !s32i
+  }
+  // MLIR-LABEL: func.func @cast_floating_trunc(%arg0: f64) -> f32
+  // LLVM-LABEL: define float @cast_floating_trunc(double %0)
+  cir.func @cast_floating_trunc(%d : !cir.double) -> !cir.float {
+    // MLIR-NEXT: arith.truncf %arg0 : f64 to f32
+    // LLVM-NEXT: fptrunc double %0 to float
+
+    %1 = cir.cast(floating, %d : !cir.double), !cir.float
+    cir.return %1 : !cir.float
+  }
+  // MLIR-LABEL: func.func @cast_floating_extf(%arg0: f32) -> f64
+  // LLVM-LABEL: define double @cast_floating_extf(float %0)
+  cir.func @cast_floating_extf(%f : !cir.float) -> !cir.double {
+    // MLIR-NEXT: arith.extf %arg0 : f32 to f64
+    // LLVM-NEXT: fpext float %0 to double
+
+    %1 = cir.cast(floating, %f : !cir.float), !cir.double
+    cir.return %1 : !cir.double
+  }
+  // MLIR-LABEL: func.func @cast_float_to_bool(%arg0: f32) -> i8
+  // LLVM-LABEL: define i8 @cast_float_to_bool(float %0)
+  cir.func @cast_float_to_bool(%f : !cir.float) -> !cir.bool {
+    // MLIR-NEXT: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
+    // MLIR-NEXT: arith.cmpf une, %arg0, %[[ZERO]] : f32
+    // LLVM-NEXT: fcmp une float %0, 0.000000e+00
+
+    %1 = cir.cast(float_to_bool, %f : !cir.float), !cir.bool
+    cir.return %1 : !cir.bool
+  }
+  // MLIR-LABEL: func.func @cast_bool_to_int8(%arg0: i8) -> i8
+  // LLVM-LABEL: define i8 @cast_bool_to_int8(i8 %0)
+  cir.func @cast_bool_to_int8(%b : !cir.bool) -> !u8i {
+    // MLIR-NEXT: arith.bitcast %arg0 : i8 to i8
+    // LLVM-NEXT: ret i8 %0
+
+    %1 = cir.cast(bool_to_int, %b : !cir.bool), !u8i
+    cir.return %1 : !u8i
+  }
+  // MLIR-LABEL: func.func @cast_bool_to_int(%arg0: i8) -> i32
+  // LLVM-LABEL: define i32 @cast_bool_to_int(i8 %0)
+  cir.func @cast_bool_to_int(%b : !cir.bool) -> !u32i {
+    // MLIR-NEXT: arith.extui %arg0 : i8 to i32
+    // LLVM-NEXT: zext i8 %0 to i32
+
+    %1 = cir.cast(bool_to_int, %b : !cir.bool), !u32i
+    cir.return %1 : !u32i
+  }
+  // MLIR-LABEL: func.func @cast_bool_to_float(%arg0: i8) -> f32
+  // LLVM-LABEL: define float @cast_bool_to_float(i8 %0)
+  cir.func @cast_bool_to_float(%b : !cir.bool) -> !cir.float {
+    // MLIR-NEXT: arith.uitofp %arg0 : i8 to f32
+    // LLVM-NEXT: uitofp i8 %0 to float
+
+    %1 = cir.cast(bool_to_float, %b : !cir.bool), !cir.float
+    cir.return %1 : !cir.float
+  }
+  // MLIR-LABEL: func.func @cast_signed_int_to_float(%arg0: i32) -> f32
+  // LLVM-LABEL: define float @cast_signed_int_to_float(i32 %0)
+  cir.func @cast_signed_int_to_float(%i : !s32i) -> !cir.float {
+    // MLIR-NEXT: arith.sitofp %arg0 : i32 to f32
+    // LLVM-NEXT: sitofp i32 %0 to float
+
+    %1 = cir.cast(int_to_float, %i : !s32i), !cir.float
+    cir.return %1 : !cir.float
+  }
+  // MLIR-LABEL: func.func @cast_unsigned_int_to_float(%arg0: i32) -> f32
+  // LLVM-LABEL: define float @cast_unsigned_int_to_float(i32 %0)
+  cir.func @cast_unsigned_int_to_float(%i : !u32i) -> !cir.float {
+    // MLIR-NEXT: arith.uitofp %arg0 : i32 to f32
+    // LLVM-NEXT: uitofp i32 %0 to float
+
+    %1 = cir.cast(int_to_float, %i : !u32i), !cir.float
+    cir.return %1 : !cir.float
+  }
+  // MLIR-LABEL: func.func @cast_float_to_int_signed(%arg0: f32) -> i32
+  // LLVM-LABEL: define i32 @cast_float_to_int_signed(float %0)
+  cir.func @cast_float_to_int_signed(%f : !cir.float) -> !s32i {
+    // MLIR-NEXT: arith.fptosi %arg0 : f32 to i32
+    // LLVM-NEXT: fptosi float %0 to i32
+
+    %1 = cir.cast(float_to_int, %f : !cir.float), !s32i
+    cir.return %1 : !s32i
+  }
+  // MLIR-LABEL: func.func @cast_float_to_int_unsigned(%arg0: f32) -> i32
+  // LLVM-LABEL: define i32 @cast_float_to_int_unsigned(float %0)
+  cir.func @cast_float_to_int_unsigned(%f : !cir.float) -> !u32i {
+    // MLIR-NEXT: arith.fptoui %arg0 : f32 to i32
+    // LLVM-NEXT: fptoui float %0 to i32
+
+    %1 = cir.cast(float_to_int, %f : !cir.float), !u32i
+    cir.return %1 : !u32i
+  }
+}
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/ceil.cir b/clang/test/CIR/Lowering/ThroughMLIR/ceil.cir
new file mode 100644
index 000000000000..dce0012a451b
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/ceil.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<1.31> : !cir.float
+    %1 = cir.const #cir.fp<3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<2.73> : !cir.double
+    %3 = cir.const #cir.fp<4.67> : !cir.long_double<!cir.double>
+    %4 = cir.ceil %0 : !cir.float
+    %5 = cir.ceil %1 : !cir.long_double<!cir.f80>
+    %6 = cir.ceil %2 : !cir.double
+    %7 = cir.ceil %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.310000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 2.730000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 4.670000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.ceil %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.ceil %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.ceil %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.ceil %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/cmp.cpp b/clang/test/CIR/Lowering/ThroughMLIR/cmp.cpp
new file mode 100644
index 000000000000..fcb9247bfb8f
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/cmp.cpp
@@ -0,0 +1,185 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+bool testSignedIntCmpOps(int a, int b) {
+    // CHECK: %[[ALLOC1:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+    // CHECK: %[[ALLOC2:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+    // CHECK: %[[ALLOC3:.+]] = memref.alloca() {alignment = 1 : i64} : memref<i8>
+    // CHECK: %[[ALLOC4:.+]] = memref.alloca() {alignment = 1 : i64} : memref<i8>
+    // CHECK: memref.store %arg0, %[[ALLOC1]][] : memref<i32>
+    // CHECK: memref.store %arg1, %[[ALLOC2]][] : memref<i32>  
+  
+    bool x = a == b;
+  
+    // CHECK: %[[LOAD0:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+    // CHECK: %[[LOAD1:.+]] = memref.load %[[ALLOC2]][] : memref<i32>
+    // CHECK: %[[CMP0:.+]] = arith.cmpi eq, %[[LOAD0]], %[[LOAD1]] : i32
+    // CHECK: %[[EXT0:.+]] = arith.extui %[[CMP0]] : i1 to i8
+    // CHECK: memref.store %[[EXT0]], %[[ALLOC4]][] : memref<i8>
+
+    x = a != b;
+
+    // CHECK: %[[LOAD2:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+    // CHECK: %[[LOAD3:.+]] = memref.load %[[ALLOC2]][] : memref<i32>
+    // CHECK: %[[CMP1:.+]] = arith.cmpi ne, %[[LOAD2]], %[[LOAD3]] : i32
+    // CHECK: %[[EXT1:.+]] = arith.extui %[[CMP1]] : i1 to i8
+    // CHECK: memref.store %[[EXT1]], %[[ALLOC4]][] : memref<i8>
+
+    x = a < b;
+
+    // CHECK: %[[LOAD4:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+    // CHECK: %[[LOAD5:.+]] = memref.load %[[ALLOC2]][] : memref<i32>
+    // CHECK: %[[CMP2:.+]] = arith.cmpi slt, %[[LOAD4]], %[[LOAD5]] : i32
+    // CHECK: %[[EXT2:.+]] = arith.extui %[[CMP2]] : i1 to i8
+    // CHECK: memref.store %[[EXT2]], %[[ALLOC4]][] : memref<i8>
+
+    x = a <= b;
+
+    // CHECK: %[[LOAD6:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+    // CHECK: %[[LOAD7:.+]] = memref.load %[[ALLOC2]][] : memref<i32>
+    // CHECK: %[[CMP3:.+]] = arith.cmpi sle, %[[LOAD6]], %[[LOAD7]] : i32
+    // CHECK: %[[EXT3:.+]] = arith.extui %[[CMP3]] : i1 to i8
+    // CHECK: memref.store %[[EXT3]], %[[ALLOC4]][] : memref<i8>
+
+    x = a > b;
+
+    // CHECK: %[[LOAD8:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+    // CHECK: %[[LOAD9:.+]] = memref.load %[[ALLOC2]][] : memref<i32>
+    // CHECK: %[[CMP4:.+]] = arith.cmpi sgt, %[[LOAD8]], %[[LOAD9]] : i32
+    // CHECK: %[[EXT4:.+]] = arith.extui %[[CMP4]] : i1 to i8
+    // CHECK: memref.store %[[EXT4]], %[[ALLOC4]][] : memref<i8>
+
+    x = a >= b;
+
+    // CHECK: %[[LOAD10:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+    // CHECK: %[[LOAD11:.+]] = memref.load %[[ALLOC2]][] : memref<i32>
+    // CHECK: %[[CMP5:.+]] = arith.cmpi sge, %[[LOAD10]], %[[LOAD11]] : i32
+    // CHECK: %[[EXT5:.+]] = arith.extui %[[CMP5]] : i1 to i8
+    // CHECK: memref.store %[[EXT5]], %[[ALLOC4]][] : memref<i8>
+
+    // CHECK: %[[LOAD12:.+]] = memref.load %[[ALLOC4]][] : memref<i8>
+    // CHECK: memref.store %[[LOAD12]], %[[ALLOC3]][] : memref<i8>
+    // CHECK: %[[LOAD13:.+]] = memref.load %[[ALLOC3]][] : memref<i8>
+    // CHECK: return %[[LOAD13]] : i8
+    return x;
+}
+
+bool testUnSignedIntBinOps(unsigned a, unsigned b) {
+    // CHECK: %[[ALLOC1:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+    // CHECK: %[[ALLOC2:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+    // CHECK: %[[ALLOC3:.+]] = memref.alloca() {alignment = 1 : i64} : memref<i8>
+    // CHECK: %[[ALLOC4:.+]] = memref.alloca() {alignment = 1 : i64} : memref<i8>
+    // CHECK: memref.store %arg0, %[[ALLOC1]][] : memref<i32>
+    // CHECK: memref.store %arg1, %[[ALLOC2]][] : memref<i32>
+    
+    bool x = a == b;
+
+    // CHECK: %[[LOAD0:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+    // CHECK: %[[LOAD1:.+]] = memref.load %[[ALLOC2]][] : memref<i32>
+    // CHECK: %[[CMP0:.+]] = arith.cmpi eq, %[[LOAD0]], %[[LOAD1]] : i32
+    // CHECK: %[[EXT0:.+]] = arith.extui %[[CMP0]] : i1 to i8
+    // CHECK: memref.store %[[EXT0]], %[[ALLOC4]][] : memref<i8>
+
+    x = a != b;
+
+    // CHECK: %[[LOAD2:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+    // CHECK: %[[LOAD3:.+]] = memref.load %[[ALLOC2]][] : memref<i32>
+    // CHECK: %[[CMP1:.+]] = arith.cmpi ne, %[[LOAD2]], %[[LOAD3]] : i32
+    // CHECK: %[[EXT1:.+]] = arith.extui %[[CMP1]] : i1 to i8
+    // CHECK: memref.store %[[EXT1]], %[[ALLOC4]][] : memref<i8>
+
+    x = a < b;
+
+    // CHECK: %[[LOAD4:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+    // CHECK: %[[LOAD5:.+]] = memref.load %[[ALLOC2]][] : memref<i32>
+    // CHECK: %[[CMP2:.+]] = arith.cmpi ult, %[[LOAD4]], %[[LOAD5]] : i32
+    // CHECK: %[[EXT2:.+]] = arith.extui %[[CMP2]] : i1 to i8
+    // CHECK: memref.store %[[EXT2]], %[[ALLOC4]][] : memref<i8>
+
+    x = a <= b;
+
+    // CHECK: %[[LOAD6:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+    // CHECK: %[[LOAD7:.+]] = memref.load %[[ALLOC2]][] : memref<i32>
+    // CHECK: %[[CMP3:.+]] = arith.cmpi ule, %[[LOAD6]], %[[LOAD7]] : i32
+    // CHECK: %[[EXT3:.+]] = arith.extui %[[CMP3]] : i1 to i8
+    // CHECK: memref.store %[[EXT3]], %[[ALLOC4]][] : memref<i8>
+
+    x = a > b;
+
+    // CHECK: %[[LOAD8:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+    // CHECK: %[[LOAD9:.+]] = memref.load %[[ALLOC2]][] : memref<i32>
+    // CHECK: %[[CMP4:.+]] = arith.cmpi ugt, %[[LOAD8]], %[[LOAD9]] : i32
+    // CHECK: %[[EXT4:.+]] = arith.extui %[[CMP4]] : i1 to i8
+    // CHECK: memref.store %[[EXT4]], %[[ALLOC4]][] : memref<i8>
+
+    x = a >= b;
+
+    // CHECK: %[[LOAD10:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+    // CHECK: %[[LOAD11:.+]] = memref.load %[[ALLOC2]][] : memref<i32>
+    // CHECK: %[[CMP5:.+]] = arith.cmpi uge, %[[LOAD10]], %[[LOAD11]] : i32
+    // CHECK: %[[EXT5:.+]] = arith.extui %[[CMP5]] : i1 to i8
+    // CHECK: memref.store %[[EXT5]], %[[ALLOC4]][] : memref<i8>
+
+    return x;
+    // CHECK: return
+}
+
+bool testFloatingPointCmpOps(float a, float b) {
+    // CHECK: %[[ALLOC1:.+]] = memref.alloca() {alignment = 4 : i64} : memref<f32>
+    // CHECK: %[[ALLOC2:.+]] = memref.alloca() {alignment = 4 : i64} : memref<f32>
+    // CHECK: %[[ALLOC3:.+]] = memref.alloca() {alignment = 1 : i64} : memref<i8>
+    // CHECK: %[[ALLOC4:.+]] = memref.alloca() {alignment = 1 : i64} : memref<i8>
+    // CHECK: memref.store %arg0, %[[ALLOC1]][] : memref<f32>
+    // CHECK: memref.store %arg1, %[[ALLOC2]][] : memref<f32>
+
+    bool x = a == b;
+
+    // CHECK: %[[LOAD0:.+]] = memref.load %[[ALLOC1]][] : memref<f32>
+    // CHECK: %[[LOAD1:.+]] = memref.load %[[ALLOC2]][] : memref<f32>
+    // CHECK: %[[CMP0:.+]] = arith.cmpf oeq, %[[LOAD0]], %[[LOAD1]] : f32
+    // CHECK: %[[EXT0:.+]] = arith.extui %[[CMP0]] : i1 to i8
+    // CHECK: memref.store %[[EXT0]], %[[ALLOC4]][] : memref<i8>
+
+    x = a != b;
+
+    // CHECK: %[[LOAD2:.+]] = memref.load %[[ALLOC1]][] : memref<f32>
+    // CHECK: %[[LOAD3:.+]] = memref.load %[[ALLOC2]][] : memref<f32>
+    // CHECK: %[[CMP1:.+]] = arith.cmpf une, %[[LOAD2]], %[[LOAD3]] : f32
+    // CHECK: %[[EXT1:.+]] = arith.extui %[[CMP1]] : i1 to i8
+    // CHECK: memref.store %[[EXT1]], %[[ALLOC4]][] : memref<i8>
+
+    x = a < b;
+
+    // CHECK: %[[LOAD4:.+]] = memref.load %[[ALLOC1]][] : memref<f32>
+    // CHECK: %[[LOAD5:.+]] = memref.load %[[ALLOC2]][] : memref<f32>
+    // CHECK: %[[CMP2:.+]] = arith.cmpf olt, %[[LOAD4]], %[[LOAD5]] : f32
+    // CHECK: %[[EXT2:.+]] = arith.extui %[[CMP2]] : i1 to i8
+    // CHECK: memref.store %[[EXT2]], %[[ALLOC4]][] : memref<i8>
+
+    x = a <= b;
+
+    // CHECK: %[[LOAD6:.+]] = memref.load %[[ALLOC1]][] : memref<f32>
+    // CHECK: %[[LOAD7:.+]] = memref.load %[[ALLOC2]][] : memref<f32>
+    // CHECK: %[[CMP3:.+]] = arith.cmpf ole, %[[LOAD6]], %[[LOAD7]] : f32
+    // CHECK: %[[EXT3:.+]] = arith.extui %[[CMP3]] : i1 to i8
+    // CHECK: memref.store %[[EXT3]], %[[ALLOC4]][] : memref<i8>
+
+    x = a > b;
+
+    // CHECK: %[[LOAD8:.+]] = memref.load %[[ALLOC1]][] : memref<f32>
+    // CHECK: %[[LOAD9:.+]] = memref.load %[[ALLOC2]][] : memref<f32>
+    // CHECK: %[[CMP4:.+]] = arith.cmpf ogt, %[[LOAD8]], %[[LOAD9]] : f32
+    // CHECK: %[[EXT4:.+]] = arith.extui %[[CMP4]] : i1 to i8
+    // CHECK: memref.store %[[EXT4]], %[[ALLOC4]][] : memref<i8>
+
+    x = a >= b;
+
+    // CHECK: %[[LOAD10:.+]] = memref.load %[[ALLOC1]][] : memref<f32>
+    // CHECK: %[[LOAD11:.+]] = memref.load %[[ALLOC2]][] : memref<f32>
+    // CHECK: %[[CMP5:.+]] = arith.cmpf oge, %[[LOAD10]], %[[LOAD11]] : f32
+    // CHECK: %[[EXT5:.+]] = arith.extui %[[CMP5]] : i1 to i8
+    // CHECK: memref.store %[[EXT5]], %[[ALLOC4]][] : memref<i8>
+
+    return x;
+    // CHECK: return
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/cos.cir b/clang/test/CIR/Lowering/ThroughMLIR/cos.cir
new file mode 100644
index 000000000000..93b102b7a854
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/cos.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %1 = cir.const #cir.fp<1.0> : !cir.float
+    %2 = cir.const #cir.fp<1.0> : !cir.double
+    %3 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.f80>
+    %4 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.double>
+    %5 = cir.cos %1 : !cir.float
+    %6 = cir.cos %2 : !cir.double
+    %7 = cir.cos %3 : !cir.long_double<!cir.f80>
+    %8 = cir.cos %4 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 1.000000e+00 : f80
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.cos %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.cos %[[C1]] : f64
+// CHECK-NEXT:     %{{.+}} = math.cos %[[C2]] : f80
+// CHECK-NEXT:     %{{.+}} = math.cos %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/doWhile.c b/clang/test/CIR/Lowering/ThroughMLIR/doWhile.c
new file mode 100644
index 000000000000..b6069e8a787e
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/doWhile.c
@@ -0,0 +1,107 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+int sum() {
+  int s = 0;
+  int i = 0;
+  do {
+    s += i;
+    ++i;
+  } while (i <= 10);
+  return s;
+}
+
+void nestedDoWhile() {
+  int a = 0;
+  do {
+    a++;
+    int b = 0;
+    while(b < 2) {
+      b++;
+    }
+  }while(a < 2);
+}
+
+// CHECK: func.func @sum() -> i32 {
+// CHECK: %[[ALLOC:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+// CHECK: %[[ALLOC0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+// CHECK: %[[ALLOC1:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+// CHECK: %[[C0_I32:.+]] = arith.constant 0 : i32
+// CHECK: memref.store %[[C0_I32]], %[[ALLOC0]][] : memref<i32>
+// CHECK: %[[C0_I32_2:.+]] = arith.constant 0 : i32
+// CHECK: memref.store %[[C0_I32_2]], %[[ALLOC1]][] : memref<i32>
+// CHECK: memref.alloca_scope {
+// CHECK:   scf.while : () -> () {
+// CHECK:     %[[VAR1:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+// CHECK:     %[[VAR2:.+]] = memref.load %[[ALLOC0]][] : memref<i32>
+// CHECK:     %[[ADD:.+]] = arith.addi %[[VAR2]], %[[VAR1]] : i32
+// CHECK:     memref.store %[[ADD]], %[[ALLOC0]][] : memref<i32>
+// CHECK:     %[[VAR3:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+// CHECK:     %[[C1_I32:.+]] = arith.constant 1 : i32
+// CHECK:     %[[ADD1:.+]] = arith.addi %[[VAR3]], %[[C1_I32]] : i32
+// CHECK:     memref.store %[[ADD1]], %[[ALLOC1]][] : memref<i32>
+// CHECK:     %[[VAR4:.+]] = memref.load %[[ALLOC1]][] : memref<i32>
+// CHECK:     %[[C10_I32:.+]] = arith.constant 10 : i32
+// CHECK:     %[[CMP:.+]] = arith.cmpi sle, %[[VAR4]], %[[C10_I32]] : i32
+// CHECK:     %[[EXT:.+]] = arith.extui %[[CMP]] : i1 to i32
+// CHECK:     %[[C0_I32_3:.+]] = arith.constant 0 : i32
+// CHECK:     %[[NE:.+]] = arith.cmpi ne, %[[EXT]], %[[C0_I32_3]] : i32
+// CHECK:     %[[EXT1:.+]] = arith.extui %[[NE]] : i1 to i8
+// CHECK:     %[[TRUNC:.+]] = arith.trunci %[[EXT1]] : i8 to i1
+// CHECK:     scf.condition(%[[TRUNC]])
+// CHECK:   } do {
+// CHECK:     scf.yield
+// CHECK:   }
+// CHECK: }
+// CHECK: %[[LOAD:.+]] = memref.load %[[ALLOC0]][] : memref<i32>
+// CHECK: memref.store %[[LOAD]], %[[ALLOC]][] : memref<i32>
+// CHECK: %[[RET:.+]] = memref.load %[[ALLOC]][] : memref<i32>
+// CHECK: return %[[RET]] : i32
+
+// CHECK: func.func @nestedDoWhile() {
+// CHECK:     %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
+// CHECK:     %[[C0_I32:.+]] = arith.constant 0 : i32 
+// CHECK:     memref.store %[[C0_I32]], %[[alloca]][] : memref<i32> 
+// CHECK:     memref.alloca_scope  {
+// CHECK:       %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
+// CHECK:       scf.while : () -> () {
+// CHECK:         %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32> 
+// CHECK:         %[[C1_I32:.+]] = arith.constant 1 : i32 
+// CHECK:         %[[ONE:.+]] = arith.addi %[[ZERO]], %[[C1_I32]] : i32 
+// CHECK:         memref.store %[[ONE]], %[[alloca]][] : memref<i32> 
+// CHECK:         %[[C0_I32_1:.+]] = arith.constant 0 : i32 
+// CHECK:         memref.store %[[C0_I32_1]], %[[alloca_0]][] : memref<i32> 
+// CHECK:         memref.alloca_scope  {
+// CHECK:           scf.while : () -> () {
+// CHECK:             %[[EIGHT:.+]] = memref.load %[[alloca_0]][] : memref<i32> 
+// CHECK:             %[[C2_I32_3:.+]] = arith.constant 2 : i32 
+// CHECK:             %[[NINE:.+]] = arith.cmpi slt, %[[EIGHT]], %[[C2_I32_3]] : i32 
+// CHECK:             %[[TEN:.+]] = arith.extui %9 : i1 to i32 
+// CHECK:             %[[C0_I32_4:.+]] = arith.constant 0 : i32 
+// CHECK:             %[[ELEVEN:.+]] = arith.cmpi ne, %[[TEN]], %[[C0_I32_4]] : i32 
+// CHECK:             %[[TWELVE:.+]] = arith.extui %[[ELEVEN]] : i1 to i8 
+// CHECK:             %[[THIRTEEN:.+]] = arith.trunci %[[TWELVE]] : i8 to i1 
+// CHECK:             scf.condition(%[[THIRTEEN]]) 
+// CHECK:           } do {
+// CHECK:             %[[EIGHT]] = memref.load %[[alloca_0]][] : memref<i32> 
+// CHECK:             %[[C1_I32_3:.+]] = arith.constant 1 : i32 
+// CHECK:             %[[NINE]] = arith.addi %[[EIGHT]], %[[C1_I32_3]] : i32 
+// CHECK:             memref.store %[[NINE]], %[[alloca_0]][] : memref<i32> 
+// CHECK:             scf.yield 
+// CHECK:           } 
+// CHECK:         } 
+// CHECK:         %[[TWO:.+]] = memref.load %[[alloca]][] : memref<i32> 
+// CHECK:         %[[C2_I32:.+]] = arith.constant 2 : i32 
+// CHECK:         %[[THREE:.+]] = arith.cmpi slt, %[[TWO]], %[[C2_I32]] : i32 
+// CHECK:         %[[FOUR:.+]] = arith.extui %[[THREE]] : i1 to i32 
+// CHECK:         %[[C0_I32_2:.+]] = arith.constant 0 : i32 
+// CHECK:         %[[FIVE:.+]] = arith.cmpi ne, %[[FOUR]], %[[C0_I32_2]] : i32 
+// CHECK:         %[[SIX:.+]] = arith.extui %[[FIVE]] : i1 to i8 
+// CHECK:         %[[SEVEN:.+]] = arith.trunci %[[SIX]] : i8 to i1 
+// CHECK:         scf.condition(%[[SEVEN]]) 
+// CHECK:       } do {
+// CHECK:         scf.yield 
+// CHECK:       } 
+// CHECK:     } 
+// CHECK:     return 
+// CHECK:   } 
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/dot.cir b/clang/test/CIR/Lowering/ThroughMLIR/dot.cir
new file mode 100644
index 000000000000..5fc5311a65a9
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/dot.cir
@@ -0,0 +1,29 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @dot(%arg0: !cir.ptr<f64>) -> !s32i {
+    %0 = cir.alloca !cir.ptr<f64>, !cir.ptr<!cir.ptr<f64>>, ["x", init] {alignment = 8 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %2 = cir.alloca !cir.ptr<f64>, !cir.ptr<!cir.ptr<f64>>, ["y", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.ptr<f64>, !cir.ptr<!cir.ptr<f64>>
+    %3 = cir.load %0 : !cir.ptr<!cir.ptr<f64>>, !cir.ptr<f64>
+    cir.store %3, %2 : !cir.ptr<f64>, !cir.ptr<!cir.ptr<f64>>
+    %4 = cir.const #cir.int<0> : !s32i
+    %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %5 : !s32i
+  }
+}
+
+//      CHECK: module {
+// CHECK-NEXT:   func.func @dot(%arg0: memref<f64>) -> i32 {
+// CHECK-NEXT:     %alloca = memref.alloca() {alignment = 8 : i64} : memref<memref<f64>>
+// CHECK-NEXT:     %alloca_0 = memref.alloca() {alignment = 4 : i64} : memref<i32>
+// CHECK-NEXT:     %alloca_1 = memref.alloca() {alignment = 8 : i64} : memref<memref<f64>>
+// CHECK-NEXT:     memref.store %arg0, %alloca[] : memref<memref<f64>>
+// CHECK-NEXT:     %0 = memref.load %alloca[] : memref<memref<f64>>
+// CHECK-NEXT:     memref.store %0, %alloca_1[] : memref<memref<f64>>
+// CHECK-NEXT:     %c0_i32 = arith.constant 0 : i32
+// CHECK-NEXT:     %1 = memref.load %alloca_0[] : memref<i32>
+// CHECK-NEXT:     return %1 : i32
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/exp.cir b/clang/test/CIR/Lowering/ThroughMLIR/exp.cir
new file mode 100644
index 000000000000..13294b7532dc
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/exp.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<1.0> : !cir.float
+    %1 = cir.const #cir.fp<3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<2.0> : !cir.double
+    %3 = cir.const #cir.fp<4.00> : !cir.long_double<!cir.double>
+    %4 = cir.exp %0 : !cir.float
+    %5 = cir.exp %1 : !cir.long_double<!cir.f80>
+    %6 = cir.exp2 %2 : !cir.double
+    %7 = cir.exp2 %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 2.000000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 4.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.exp %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.exp %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.exp2 %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.exp2 %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/fabs.cir b/clang/test/CIR/Lowering/ThroughMLIR/fabs.cir
new file mode 100644
index 000000000000..9a6c33fd8ab6
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/fabs.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<-1.0> : !cir.float
+    %1 = cir.const #cir.fp<-3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<-2.0> : !cir.double
+    %3 = cir.const #cir.fp<-4.00> : !cir.long_double<!cir.double>
+    %4 = cir.fabs %0 : !cir.float
+    %5 = cir.fabs %1 : !cir.long_double<!cir.f80>
+    %6 = cir.fabs %2 : !cir.double
+    %7 = cir.fabs %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant -1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant -3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant -2.000000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant -4.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.absf %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.absf %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.absf %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.absf %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/floor.cir b/clang/test/CIR/Lowering/ThroughMLIR/floor.cir
new file mode 100644
index 000000000000..e4718468966c
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/floor.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<1.51> : !cir.float
+    %1 = cir.const #cir.fp<3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<2.73> : !cir.double
+    %3 = cir.const #cir.fp<4.67> : !cir.long_double<!cir.double>
+    %4 = cir.floor %0 : !cir.float
+    %5 = cir.floor %1 : !cir.long_double<!cir.f80>
+    %6 = cir.floor %2 : !cir.double
+    %7 = cir.floor %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.510000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 2.730000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 4.670000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.floor %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.floor %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.floor %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.floor %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/for.cpp b/clang/test/CIR/Lowering/ThroughMLIR/for.cpp
new file mode 100644
index 000000000000..3ed99718369a
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/for.cpp
@@ -0,0 +1,103 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+int a[101], b[101];
+
+void constantLoopBound() {
+  for (int i = 0; i < 100; ++i)
+    a[i] = 3;
+}
+// CHECK-LABEL: func.func @_Z17constantLoopBoundv() {
+// CHECK: %[[C0:.*]] = arith.constant 0 : i32
+// CHECK: %[[C100:.*]] = arith.constant 100 : i32
+// CHECK: %[[C1:.*]] = arith.constant 1 : i32
+// CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] : i32 {
+// CHECK:   %[[C3:.*]] = arith.constant 3 : i32
+// CHECK:   %[[BASE:.*]] = memref.get_global @a : memref<101xi32>
+// CHECK:   %[[C0_i32:.*]] = arith.constant 0 : i32
+// CHECK:   %[[IV:.*]] = arith.addi %[[I]], %[[C0_i32]] : i32
+// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[IV]] : i32 to index
+// CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
+// CHECK: }
+
+void constantLoopBound_LE() {
+  for (int i = 0; i <= 100; ++i)
+    a[i] = 3;
+}
+// CHECK-LABEL: func.func @_Z20constantLoopBound_LEv() {
+// CHECK: %[[C0:.*]] = arith.constant 0 : i32
+// CHECK: %[[C100:.*]] = arith.constant 100 : i32
+// CHECK: %[[C1:.*]] = arith.constant 1 : i32
+// CHECK: %[[C101:.*]] = arith.addi %c100_i32, %c1_i32 : i32
+// CHECK: %[[C1_STEP:.*]] = arith.constant 1 : i32
+// CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C101]] step %[[C1_STEP]] : i32 {
+// CHECK:   %[[C3:.*]] = arith.constant 3 : i32
+// CHECK:   %[[BASE:.*]] = memref.get_global @a : memref<101xi32>
+// CHECK:   %[[C0_i32:.*]] = arith.constant 0 : i32
+// CHECK:   %[[IV:.*]] = arith.addi %[[I]], %[[C0_i32]] : i32
+// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[IV]] : i32 to index
+// CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
+// CHECK: }
+
+void variableLoopBound(int l, int u) {
+  for (int i = l; i < u; ++i)
+    a[i] = 3;
+}
+// CHECK-LABEL: func.func @_Z17variableLoopBoundii
+// CHECK: memref.store %arg0, %alloca[] : memref<i32>
+// CHECK: memref.store %arg1, %alloca_0[] : memref<i32>
+// CHECK: %[[LOWER:.*]] = memref.load %alloca[] : memref<i32>
+// CHECK: %[[UPPER:.*]] = memref.load %alloca_0[] : memref<i32>
+// CHECK: %[[C1:.*]] = arith.constant 1 : i32
+// CHECK: scf.for %[[I:.*]] = %[[LOWER]] to %[[UPPER]] step %[[C1]] : i32 {
+// CHECK:   %[[C3:.*]] = arith.constant 3 : i32
+// CHECK:   %[[BASE:.*]] = memref.get_global @a : memref<101xi32>
+// CHECK:   %[[C0:.*]] = arith.constant 0 : i32
+// CHECK:   %[[IV:.*]] = arith.addi %[[I]], %[[C0]] : i32
+// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[IV]] : i32 to index
+// CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
+// CHECK: }
+
+void ariableLoopBound_LE(int l, int u) {
+  for (int i = l; i <= u; i+=4)
+    a[i] = 3;
+}
+// CHECK-LABEL: func.func @_Z19ariableLoopBound_LEii
+// CHECK: memref.store %arg0, %alloca[] : memref<i32>
+// CHECK: memref.store %arg1, %alloca_0[] : memref<i32>
+// CHECK: %[[LOWER:.*]] = memref.load %alloca[] : memref<i32>
+// CHECK: %[[UPPER_DEC_1:.*]] = memref.load %alloca_0[] : memref<i32>
+// CHECK: %[[C1:.*]] = arith.constant 1 : i32
+// CHECK: %[[UPPER:.*]] = arith.addi %[[UPPER_DEC_1]], %[[C1]] : i32
+// CHECK: %[[C4:.*]] = arith.constant 4 : i32
+// CHECK: scf.for %[[I:.*]] = %[[LOWER]] to %[[UPPER]] step %[[C4]] : i32 {
+// CHECK:   %[[C3:.*]] = arith.constant 3 : i32
+// CHECK:   %[[BASE:.*]] = memref.get_global @a : memref<101xi32>
+// CHECK:   %[[C0:.*]] = arith.constant 0 : i32
+// CHECK:   %[[IV:.*]] = arith.addi %[[I]], %[[C0]] : i32
+// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[IV]] : i32 to index
+// CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
+// CHECK: }
+
+void incArray() {
+  for (int i = 0; i < 100; ++i)
+    a[i] += b[i];
+}
+// CHECK-LABEL: func.func @_Z8incArrayv() {
+// CHECK: %[[C0:.*]] = arith.constant 0 : i32
+// CHECK: %[[C100:.*]] = arith.constant 100 : i32
+// CHECK: %[[C1:.*]] = arith.constant 1 : i32
+// CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] : i32 {
+// CHECK:   %[[B:.*]] = memref.get_global @b : memref<101xi32>
+// CHECK:   %[[C0_2:.*]] = arith.constant 0 : i32
+// CHECK:   %[[IV2:.*]] = arith.addi %[[I]], %[[C0_2]] : i32
+// CHECK:   %[[INDEX_2:.*]] = arith.index_cast %[[IV2]] : i32 to index
+// CHECK:   %[[B_VALUE:.*]] = memref.load %[[B]][%[[INDEX_2]]] : memref<101xi32>
+// CHECK:   %[[A:.*]] = memref.get_global @a : memref<101xi32>
+// CHECK:   %[[C0_1:.*]] = arith.constant 0 : i32
+// CHECK:   %[[IV1:.*]] = arith.addi %[[I]], %[[C0_1]] : i32
+// CHECK:   %[[INDEX_1:.*]] = arith.index_cast %[[IV1]] : i32 to index
+// CHECK:   %[[A_VALUE:.*]] = memref.load %[[A]][%[[INDEX_1]]] : memref<101xi32>
+// CHECK:   %[[SUM:.*]] = arith.addi %[[A_VALUE]], %[[B_VALUE]] : i32
+// CHECK:   memref.store %[[SUM]], %[[A]][%[[INDEX_1]]] : memref<101xi32>
+// CHECK: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/global.cir b/clang/test/CIR/Lowering/ThroughMLIR/global.cir
new file mode 100644
index 000000000000..4415c6409a0b
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/global.cir
@@ -0,0 +1,55 @@
+// RUN: cir-opt %s -cir-to-mlir | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.global external @i = #cir.int<2> : !u32i
+  cir.global external @f = #cir.fp<3.000000e+00> : !cir.float
+  cir.global external @b = #cir.bool<true> : !cir.bool
+  cir.global "private" external @a : !cir.array<!u32i x 100>
+  cir.global external @aa = #cir.zero : !cir.array<!cir.array<!u32i x 256> x 256>
+
+  cir.func @get_global_int_value() -> !u32i {
+    %0 = cir.get_global @i : !cir.ptr<!u32i>
+    %1 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    cir.return %1 : !u32i
+  }
+  cir.func @get_global_float_value() -> !cir.float {
+    %0 = cir.get_global @f : !cir.ptr<!cir.float>
+    %1 = cir.load %0 : !cir.ptr<!cir.float>, !cir.float
+    cir.return %1 : !cir.float
+  }
+  cir.func @get_global_bool_value() -> !cir.bool {
+    %0 = cir.get_global @b : !cir.ptr<!cir.bool>
+    %1 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
+    cir.return %1 : !cir.bool
+  }
+  cir.func @get_global_array_pointer() -> !cir.ptr<!cir.array<!u32i x 100>> {
+    %0 = cir.get_global @a : !cir.ptr<!cir.array<!u32i x 100>>
+    cir.return %0 : !cir.ptr<!cir.array<!u32i x 100>>
+  }
+  cir.func @get_global_multi_array_pointer() -> !cir.ptr<!cir.array<!cir.array<!u32i x 256> x 256>> {
+    %0 = cir.get_global @aa : !cir.ptr<!cir.array<!cir.array<!u32i x 256> x 256>>
+    cir.return %0 : !cir.ptr<!cir.array<!cir.array<!u32i x 256> x 256>>
+  }
+}
+
+// MLIR: memref.global "public" @i : memref<i32> = dense<2>
+// MLIR: memref.global "public" @f : memref<f32> = dense<3.000000e+00>
+// MLIR: memref.global "public" @b : memref<i8> = dense<1>
+// MLIR: memref.global "private" @a : memref<100xi32>
+// MLIR: memref.global "public" @aa : memref<256x256xi32> = dense<0>
+// MLIR: memref.get_global @i : memref<i32>
+// MLIR: memref.get_global @f : memref<f32>
+// MLIR: memref.get_global @b : memref<i8>
+// MLIR: memref.get_global @a : memref<100xi32>
+// MLIR: memref.get_global @aa : memref<256x256xi32>
+
+// LLVM: @i = global i32 2
+// LLVM: @f = global float 3.000000e+00
+// LLVM: @b = global i8 1
+// LLVM: @a = private global [100 x i32] undef
+// LLVM: @aa = global [256 x [256 x i32]] zeroinitializer
+// LLVM: load i32, ptr @i
+// LLVM: load float, ptr @f
+// LLVM: load i8, ptr @b
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/global.cpp b/clang/test/CIR/Lowering/ThroughMLIR/global.cpp
new file mode 100644
index 000000000000..d7627139ff6c
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/global.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+float f[32000];
+// CHECK: memref.global "public" @f : memref<32000xf32> = dense<0.000000e+00>
+double d;
+// CHECK: memref.global "public" @d : memref<f64> = dense<0.000000e+00>
+float f_init[] = {1.0, 2.0};
+// CHECK: memref.global "public" @f_init : memref<2xf32> = dense<[1.000000e+00, 2.000000e+00]>
+int i_init[2] = {0, 1};
+// CHECK: memref.global "public" @i_init : memref<2xi32> = dense<[0, 1]>
+char string[] = "whatnow";
+// CHECK: memref.global "public" @string : memref<8xi8> = dense<[119, 104, 97, 116, 110, 111, 119, 0]>
+int excess_sint[4] = {1, 2};
+// CHECK: memref.global "public" @excess_sint : memref<4xi32> = dense<[1, 2, 0, 0]>
+int sint[] = {123, 456, 789};
+// CHECK: memref.global "public" @sint : memref<3xi32> = dense<[123, 456, 789]>
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/goto.cir b/clang/test/CIR/Lowering/ThroughMLIR/goto.cir
new file mode 100644
index 000000000000..6c1d5c66fffa
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/goto.cir
@@ -0,0 +1,35 @@
+// RUN: cir-opt %s -canonicalize -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -canonicalize -cir-to-mlir -cir-mlir-to-llvm -o - | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["b", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<1> : !u32i
+    cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    cir.br ^bb2
+  ^bb1:  // no predecessors
+    %2 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %3 = cir.const #cir.int<1> : !u32i
+    %4 = cir.binop(add, %2, %3) : !u32i
+    cir.store %4, %0 : !u32i, !cir.ptr<!u32i>
+    cir.br ^bb2
+  ^bb2:  // 2 preds: ^bb0, ^bb1
+    %5 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %6 = cir.const #cir.int<2> : !u32i
+    %7 = cir.binop(add, %5, %6) : !u32i
+    cir.store %7, %0 : !u32i, !cir.ptr<!u32i>
+    cir.return
+  }
+}
+
+//      MLIR: module {
+// MLIR-NEXT: func @foo
+//      MLIR: cf.br ^bb1
+//      MLIR: ^bb1:
+//      MLIR: return
+
+//      LLVM: br label %[[Value:[0-9]+]]
+// LLVM-EMPTY:
+// LLVM-NEXT: [[Value]]:              ; preds =
+//      LLVM: ret void
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/if.c b/clang/test/CIR/Lowering/ThroughMLIR/if.c
new file mode 100644
index 000000000000..4ff228514cd6
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/if.c
@@ -0,0 +1,137 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+void foo() {
+  int a = 2;
+  int b = 0;
+  if (a > 0) {
+    b++;
+  } else {
+    b--;
+  }
+}
+
+//CHECK: func.func @foo() {
+//CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+//CHECK:   %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
+//CHECK:   %[[C2_I32:.+]] = arith.constant 2 : i32 
+//CHECK:   memref.store %[[C2_I32]], %[[alloca]][] : memref<i32> 
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32 
+//CHECK:   memref.store %[[C0_I32]], %[[alloca_0]][] : memref<i32> 
+//CHECK:   memref.alloca_scope  {
+//CHECK:     %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32> 
+//CHECK:     %[[C0_I32_1:.+]] = arith.constant 0 : i32 
+//CHECK:     %[[ONE:.+]] = arith.cmpi sgt, %[[ZERO]], %[[C0_I32_1]] : i32 
+//CHECK:     %[[TWO:.+]] = arith.extui %[[ONE]] : i1 to i32 
+//CHECK:     %[[C0_I32_2:.+]] = arith.constant 0 : i32 
+//CHECK:     %[[THREE:.+]] = arith.cmpi ne, %[[TWO]], %[[C0_I32_2]] : i32 
+//CHECK:     %[[FOUR:.+]] = arith.extui %[[THREE]] : i1 to i8 
+//CHECK:     %[[FIVE:.+]] = arith.trunci %[[FOUR]] : i8 to i1 
+//CHECK:     scf.if %[[FIVE]] {
+//CHECK:       %[[SIX:.+]] = memref.load %[[alloca_0]][] : memref<i32> 
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32 
+//CHECK:       %[[SEVEN:.+]] = arith.addi %[[SIX]], %[[C1_I32]] : i32 
+//CHECK:       memref.store %[[SEVEN]], %[[alloca_0]][] : memref<i32> 
+//CHECK:     } else {
+//CHECK:       %[[SIX:.+]] = memref.load %[[alloca_0]][] : memref<i32> 
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32 
+//CHECK:       %[[SEVEN:.+]] = arith.subi %[[SIX]], %[[C1_I32]] : i32 
+//CHECK:       memref.store %[[SEVEN]], %[[alloca_0]][] : memref<i32> 
+//CHECK:     } 
+//CHECK:   } 
+//CHECK:   return 
+//CHECK: } 
+
+void foo2() {
+  int a = 2;
+  int b = 0;
+  if (a < 3) {
+    b++;
+  }
+}
+
+//CHECK: func.func @foo2() {
+//CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
+//CHECK:   %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
+//CHECK:   %[[C2_I32:.+]] = arith.constant 2 : i32 
+//CHECK:   memref.store %[[C2_I32]], %[[alloca]][] : memref<i32> 
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32 
+//CHECK:   memref.store %[[C0_I32]], %[[alloca_0]][] : memref<i32> 
+//CHECK:   memref.alloca_scope  {
+//CHECK:     %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32> 
+//CHECK:     %[[C3_I32:.+]] = arith.constant 3 : i32 
+//CHECK:     %[[ONE:.+]] = arith.cmpi slt, %[[ZERO]], %[[C3_I32]] : i32 
+//CHECK:     %[[TWO:.+]] = arith.extui %[[ONE]] : i1 to i32 
+//CHECK:     %[[C0_I32_1]] = arith.constant 0 : i32 
+//CHECK:     %[[THREE:.+]] = arith.cmpi ne, %[[TWO]], %[[C0_I32_1]] : i32 
+//CHECK:     %[[FOUR:.+]] = arith.extui %[[THREE]] : i1 to i8 
+//CHECK:     %[[FIVE]] = arith.trunci %[[FOUR]] : i8 to i1 
+//CHECK:     scf.if %[[FIVE]] {
+//CHECK:       %[[SIX:.+]] = memref.load %[[alloca_0]][] : memref<i32> 
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32 
+//CHECK:       %[[SEVEN:.+]] = arith.addi %[[SIX]], %[[C1_I32]] : i32 
+//CHECK:       memref.store %[[SEVEN]], %[[alloca_0]][] : memref<i32> 
+//CHECK:     } 
+//CHECK:   } 
+//CHECK:   return 
+//CHECK: } 
+
+void foo3() {
+  int a = 2;
+  int b = 0;
+  if (a < 3) {
+    int c = 1;
+    if (c > 2) {
+      b++;
+    } else {
+      b--;
+    }
+  }
+}
+
+
+//CHECK: func.func @foo3() {
+//CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+//CHECK:   %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+//CHECK:   %[[C2_I32:.+]] = arith.constant 2 : i32 
+//CHECK:   memref.store %[[C2_I32]], %[[alloca]][] : memref<i32> 
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32 
+//CHECK:   memref.store %[[C0_I32]], %[[alloca_0]][] : memref<i32> 
+//CHECK:   memref.alloca_scope  {
+//CHECK:     %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32> 
+//CHECK:     %[[C3_I32:.+]] = arith.constant 3 : i32 
+//CHECK:     %[[ONE:.+]] = arith.cmpi slt, %[[ZERO]], %[[C3_I32]] : i32 
+//CHECK:     %[[TWO:.+]] = arith.extui %[[ONE]] : i1 to i32 
+//CHECK:     %[[C0_I32_1:.+]] = arith.constant 0 : i32 
+//CHECK:     %[[THREE:.+]] = arith.cmpi ne, %[[TWO:.+]], %[[C0_I32_1]] : i32 
+//CHECK:     %[[FOUR:.+]] = arith.extui %[[THREE]] : i1 to i8 
+//CHECK:     %[[FIVE]] = arith.trunci %[[FOUR]] : i8 to i1 
+//CHECK:     scf.if %[[FIVE]] {
+//CHECK:       %[[alloca_2:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32 
+//CHECK:       memref.store %[[C1_I32]], %[[alloca_2]][] : memref<i32> 
+//CHECK:       memref.alloca_scope  {
+//CHECK:         %[[SIX:.+]] = memref.load %[[alloca_2]][] : memref<i32> 
+//CHECK:         %[[C2_I32_3:.+]] = arith.constant 2 : i32 
+//CHECK:         %[[SEVEN:.+]] = arith.cmpi sgt, %[[SIX]], %[[C2_I32_3]] : i32 
+//CHECK:         %[[EIGHT:.+]] = arith.extui %[[SEVEN]] : i1 to i32 
+//CHECK:         %[[C0_I32_4:.+]] = arith.constant 0 : i32 
+//CHECK:         %[[NINE:.+]] = arith.cmpi ne, %[[EIGHT]], %[[C0_I32_4]] : i32 
+//CHECK:         %[[TEN:.+]] = arith.extui %[[NINE]] : i1 to i8 
+//CHECK:         %[[ELEVEN:.+]] = arith.trunci %[[TEN]] : i8 to i1 
+//CHECK:         scf.if %[[ELEVEN]] {
+//CHECK:           %[[TWELVE:.+]] = memref.load %[[alloca_0]][] : memref<i32> 
+//CHECK:           %[[C1_I32_5:.+]] = arith.constant 1 : i32 
+//CHECK:           %[[THIRTEEN:.+]] = arith.addi %[[TWELVE]], %[[C1_I32_5]] : i32 
+//CHECK:           memref.store %[[THIRTEEN]], %[[alloca_0]][] : memref<i32> 
+//CHECK:         } else {
+//CHECK:           %[[TWELVE:.+]] = memref.load %[[alloca_0]][] : memref<i32> 
+//CHECK:           %[[C1_I32_5:.+]] = arith.constant 1 : i32 
+//CHECK:           %[[THIRTEEN:.+]] = arith.subi %[[TWELVE]], %[[C1_I32_5]] : i32 
+//CHECK:           memref.store %[[THIRTEEN]], %[[alloca_0]][] : memref<i32> 
+//CHECK:         } 
+//CHECK:       } 
+//CHECK:     } 
+//CHECK:   } 
+//CHECK:   return 
+//CHECK: } 
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/log.cir b/clang/test/CIR/Lowering/ThroughMLIR/log.cir
new file mode 100644
index 000000000000..e9af7c88ca8a
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/log.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<1.0> : !cir.float
+    %1 = cir.const #cir.fp<3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<2.0> : !cir.double
+    %3 = cir.const #cir.fp<4.0> : !cir.long_double<!cir.double>
+    %4 = cir.log %0 : !cir.float
+    %5 = cir.log %1 : !cir.long_double<!cir.f80>
+    %6 = cir.log2 %2 : !cir.double
+    %7 = cir.log10 %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 2.000000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 4.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.log %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.log %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.log2 %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.log10 %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/memref.cir b/clang/test/CIR/Lowering/ThroughMLIR/memref.cir
new file mode 100644
index 000000000000..d51c4425d702
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/memref.cir
@@ -0,0 +1,34 @@
+// RUN: cir-opt %s -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm -o - | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.func @foo() -> !u32i {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<1> : !u32i
+    cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    %2 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    cir.return %2 : !u32i
+  }
+}
+
+//      MLIR: module {
+// MLIR-NEXT:   func @foo() -> i32 {
+// MLIR-NEXT:     [[alloca:%[a-z0-9]+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+// MLIR-NEXT:     %c1_i32 = arith.constant 1 : i32
+// MLIR-NEXT:     memref.store %c1_i32, [[alloca]][] : memref<i32>
+// MLIR-NEXT:     [[load:%[a-z0-9]+]] = memref.load [[alloca]][] : memref<i32>
+// MLIR-NEXT:     return [[load]] : i32
+// MLIR-NEXT:   }
+// MLIR-NEXT: }
+
+//      LLVM: define i32 @foo()
+// LLVM-NEXT:   %1 = alloca i32, i64
+// LLVM-NEXT:   %2 = insertvalue { ptr, ptr, i64 } undef, ptr %1, 0
+// LLVM-NEXT:   %3 = insertvalue { ptr, ptr, i64 } %2, ptr %1, 1
+// LLVM-NEXT:   %4 = insertvalue { ptr, ptr, i64 } %3, i64 0, 2
+// LLVM-NEXT:   %5 = extractvalue { ptr, ptr, i64 } %4, 1
+// LLVM-NEXT:   store i32 1, ptr %5, align 4
+// LLVM-NEXT:   %6 = extractvalue { ptr, ptr, i64 } %4, 1
+// LLVM-NEXT:   %7 = load i32, ptr %6, align 4
+// LLVM-NEXT:   ret i32 %7
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/ptrstride.cir b/clang/test/CIR/Lowering/ThroughMLIR/ptrstride.cir
new file mode 100644
index 000000000000..19782c9d0ba7
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/ptrstride.cir
@@ -0,0 +1,78 @@
+// RUN: cir-opt %s -cir-to-mlir | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.global "private" external @a : !cir.array<!s32i x 100>
+  cir.global "private" external @aa : !cir.array<!cir.array<!s32i x 100> x 100>
+
+  // int get_1d_array_value() { return a[1]; }
+  // MLIR-LABEL: func.func @get_1d_array_value() -> i32
+  // LLVM-LABEL: define i32 @get_1d_array_value()
+  cir.func @get_1d_array_value() -> !s32i {
+    // MLIR-NEXT: %[[BASE:.*]] = memref.get_global @a : memref<100xi32>
+    // MLIR-NEXT: %[[ONE:.*]] = arith.constant 1 : i32
+    // MLIR-NEXT: %[[INDEX:.*]] = arith.index_cast %[[ONE]] : i32 to index
+    // MLIR-NEXT: %[[VALUE:.*]] = memref.load %[[BASE]][%[[INDEX]]] : memref<100xi32>
+
+    // LLVM-NEXT: load i32, ptr getelementptr (i32, ptr @a, i64 1)
+
+    %1 = cir.get_global @a : !cir.ptr<!cir.array<!s32i x 100>>
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.cast(array_to_ptrdecay, %1 : !cir.ptr<!cir.array<!s32i x 100>>), !cir.ptr<!s32i>
+    %4 = cir.ptr_stride(%3 : !cir.ptr<!s32i>, %2 : !s32i), !cir.ptr<!s32i>
+    %5 = cir.load %4 : !cir.ptr<!s32i>, !s32i
+    cir.return %5 : !s32i
+  }
+
+  // int get_2d_array_value() { return aa[1][2]; }
+  // MLIR-LABEL: func.func @get_2d_array_value() -> i32
+  // LLVM-LABEL: define i32 @get_2d_array_value()
+  cir.func @get_2d_array_value() -> !s32i {
+    // MLIR-NEXT: %[[BASE:.*]] = memref.get_global @aa : memref<100x100xi32>
+    // MLIR-NEXT: %[[ONE:.*]] = arith.constant 1 : i32
+    // MLIR-NEXT: %[[INDEX1:.*]] = arith.index_cast %[[ONE]] : i32 to index
+    // MLIR-NEXT: %[[TWO:.*]] = arith.constant 2 : i32
+    // MLIR-NEXT: %[[INDEX2:.*]] = arith.index_cast %[[TWO]] : i32 to index
+    // MLIR-NEXT: %[[VALUE:.*]] = memref.load %[[BASE]][%[[INDEX1]], %[[INDEX2]]] : memref<100x100xi32>
+
+    // LLVM-NEXT: load i32, ptr getelementptr (i32, ptr @aa, i64 102)
+
+    %1 = cir.get_global @aa : !cir.ptr<!cir.array<!cir.array<!s32i x 100> x 100>>
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.cast(array_to_ptrdecay, %1 : !cir.ptr<!cir.array<!cir.array<!s32i x 100> x 100>>), !cir.ptr<!cir.array<!s32i x 100>>
+    %4 = cir.ptr_stride(%3 : !cir.ptr<!cir.array<!s32i x 100>>, %2 : !s32i), !cir.ptr<!cir.array<!s32i x 100>>
+    %5 = cir.const #cir.int<2> : !s32i
+    %6 = cir.cast(array_to_ptrdecay, %4 : !cir.ptr<!cir.array<!s32i x 100>>), !cir.ptr<!s32i>
+    %7 = cir.ptr_stride(%6 : !cir.ptr<!s32i>, %5 : !s32i), !cir.ptr<!s32i>
+    %8 = cir.load %7 : !cir.ptr<!s32i>, !s32i
+    cir.return %8 : !s32i
+  }
+
+  // void inc_1d_array_value() { a[1] += 2; }
+  // MLIR-LABEL: func.func @inc_1d_array_value()
+  // LLVM-LABEL: define void @inc_1d_array_value()
+  cir.func @inc_1d_array_value() {
+    // MLIR-NEXT: %[[TWO:.*]] = arith.constant 2 : i32
+    // MLIR-NEXT: %[[BASE:.*]] = memref.get_global @a : memref<100xi32>
+    // MLIR-NEXT: %[[ONE:.*]] = arith.constant 1 : i32
+    // MLIR-NEXT: %[[INDEX:.*]] = arith.index_cast %[[ONE]] : i32 to index
+    // MLIR-NEXT: %[[VALUE:.*]] = memref.load %[[BASE]][%[[INDEX]]] : memref<100xi32>
+    // MLIR-NEXT: %[[VALUE_INC:.*]] = arith.addi %[[VALUE]], %[[TWO]] : i32
+    // MLIR-NEXT: memref.store %[[VALUE_INC]], %[[BASE]][%[[INDEX]]] : memref<100xi32>
+
+    // LLVM-NEXT: %[[VALUE:.*]] = load i32, ptr getelementptr (i32, ptr @a, i64 1)
+    // LLVM-NEXT: %[[VALUE_INC:.*]] = add i32 %[[VALUE]], 2
+    // LLVM-NEXT: store i32 %[[VALUE_INC]], ptr getelementptr (i32, ptr @a, i64 1)
+
+    %0 = cir.const #cir.int<2> : !s32i
+    %1 = cir.get_global @a : !cir.ptr<!cir.array<!s32i x 100>>
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.cast(array_to_ptrdecay, %1 : !cir.ptr<!cir.array<!s32i x 100>>), !cir.ptr<!s32i>
+    %4 = cir.ptr_stride(%3 : !cir.ptr<!s32i>, %2 : !s32i), !cir.ptr<!s32i>
+    %5 = cir.load %4 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.binop(add, %5, %0) : !s32i
+    cir.store %6, %4 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/round.cir b/clang/test/CIR/Lowering/ThroughMLIR/round.cir
new file mode 100644
index 000000000000..117a93bcba9b
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/round.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<1.31> : !cir.float
+    %1 = cir.const #cir.fp<3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<2.73> : !cir.double
+    %3 = cir.const #cir.fp<4.67> : !cir.long_double<!cir.double>
+    %4 = cir.round %0 : !cir.float
+    %5 = cir.round %1 : !cir.long_double<!cir.f80>
+    %6 = cir.round %2 : !cir.double
+    %7 = cir.round %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.310000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 2.730000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 4.670000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.round %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.round %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.round %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.round %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/scope.cir b/clang/test/CIR/Lowering/ThroughMLIR/scope.cir
new file mode 100644
index 000000000000..cf5effc02ce6
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/scope.cir
@@ -0,0 +1,48 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s -input-file=%t.mlir -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm -o %t.mlir
+// RUN: FileCheck %s -input-file=%t.mlir -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.func @foo() {
+    cir.scope {
+      %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<4> : !u32i
+      cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    }
+    cir.return
+  }
+
+//      MLIR: func.func @foo()
+// MLIR-NEXT:   memref.alloca_scope
+// MLIR-NEXT:     %alloca = memref.alloca() {alignment = 4 : i64} : memref<i32>
+// MLIR-NEXT:     %c4_i32 = arith.constant 4 : i32
+// MLIR-NEXT:     memref.store %c4_i32, %alloca[] : memref<i32>
+// MLIR-NEXT:   }
+// MLIR-NEXT:   return
+
+// LLVM:  llvm.func @foo() {
+// LLVM:    %0 = llvm.intr.stacksave : !llvm.ptr
+// LLVM:    llvm.br ^bb1
+// LLVM:  ^bb1:
+//          [...]
+// LLVM:    llvm.intr.stackrestore %0 : !llvm.ptr
+// LLVM:    llvm.br ^bb2
+// LLVM:  ^bb2:
+// LLVM:    llvm.return
+// LLVM:  }
+
+  // Should drop empty scopes.
+  cir.func @empty_scope() {
+    cir.scope {
+    }
+    cir.return
+  }
+  //      MLIR: func.func @empty_scope()
+  // MLIR-NEXT:   return
+  // MLIR-NEXT: }
+
+  // LLVM: llvm.func @empty_scope()
+  // LLVM:   llvm.return
+}
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/shift.cir b/clang/test/CIR/Lowering/ThroughMLIR/shift.cir
new file mode 100644
index 000000000000..aecbc3f45940
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/shift.cir
@@ -0,0 +1,31 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!u16i = !cir.int<u, 16>
+module {
+  cir.func @testShiftWithDifferentValueAndAmountTypes(%arg0: !s16i, %arg1: !s32i, %arg2: !s64i, %arg3: !u16i) {
+    %1 = cir.shift(left, %arg1: !s32i, %arg2 : !s64i) -> !s32i
+    %2 = cir.shift(left, %arg1 : !s32i, %arg0 : !s16i) -> !s32i
+    %3 = cir.shift(left, %arg1 : !s32i, %arg3 : !u16i) -> !s32i
+    %4 = cir.shift(left, %arg1 : !s32i, %arg1 : !s32i) -> !s32i
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @testShiftWithDifferentValueAndAmountTypes(%arg0: i16, %arg1: i32, %arg2: i64, %arg3: i16) {
+// CHECK-NEXT:     %[[TRUNC:.+]] = arith.trunci %arg2 : i64 to i32
+// CHECK-NEXT:     %[[SHIFT_TRUNC:.+]] = arith.shli %arg1, %[[TRUNC]] : i32
+// CHECK-NEXT:     %[[EXTS:.+]] = arith.extsi %arg0 : i16 to i32
+// CHECK-NEXT:     %[[SHIFT_EXTS:.+]] = arith.shli %arg1, %[[EXTS]] : i32
+// CHECK-NEXT:     %[[EXTU:.+]] = arith.extui %arg3 : i16 to i32
+// CHECK-NEXT:     %[[SHIFT_EXTU:.+]] = arith.shli %arg1, %[[EXTU]] : i32
+// CHECK-NEXT:     %[[BITCAST:.+]] = arith.bitcast %arg1 : i32 to i32
+// CHECK-NEXT:     %[[SHIFT_BITCAST:.+]] = arith.shli %arg1, %[[BITCAST]] : i32
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
+
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/sin.cir b/clang/test/CIR/Lowering/ThroughMLIR/sin.cir
new file mode 100644
index 000000000000..c433b52e105c
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/sin.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %1 = cir.const #cir.fp<1.0> : !cir.float
+    %2 = cir.const #cir.fp<1.0> : !cir.double
+    %3 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.f80>
+    %4 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.double>
+    %5 = cir.sin %1 : !cir.float
+    %6 = cir.sin %2 : !cir.double
+    %7 = cir.sin %3 : !cir.long_double<!cir.f80>
+    %8 = cir.sin %4 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 1.000000e+00 : f80
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.sin %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.sin %[[C1]] : f64
+// CHECK-NEXT:     %{{.+}} = math.sin %[[C2]] : f80
+// CHECK-NEXT:     %{{.+}} = math.sin %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/sqrt.cir b/clang/test/CIR/Lowering/ThroughMLIR/sqrt.cir
new file mode 100644
index 000000000000..a9b8c1a7efa6
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/sqrt.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<9.0> : !cir.float
+    %1 = cir.const #cir.fp<100.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<1.0> : !cir.double
+    %3 = cir.const #cir.fp<2.56> : !cir.long_double<!cir.double>
+    %4 = cir.sqrt %0 : !cir.float
+    %5 = cir.sqrt %1 : !cir.long_double<!cir.f80>
+    %6 = cir.sqrt %2 : !cir.double
+    %7 = cir.sqrt %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 9.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 1.000000e+02 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 2.560000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.sqrt %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.sqrt %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.sqrt %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.sqrt %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/tenary.cir b/clang/test/CIR/Lowering/ThroughMLIR/tenary.cir
new file mode 100644
index 000000000000..ce6f466aebc9
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/tenary.cir
@@ -0,0 +1,44 @@
+// RUN: cir-opt %s -cir-to-mlir | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir --canonicalize | FileCheck %s --check-prefix=MLIR-CANONICALIZE
+// RUN: cir-opt %s -cir-to-mlir --canonicalize -cir-mlir-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+
+module {
+cir.func @_Z1xi(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %3 = cir.const #cir.int<0> : !s32i
+    %4 = cir.cmp(gt, %2, %3) : !s32i, !cir.bool
+    %5 = cir.ternary(%4, true {
+      %7 = cir.const #cir.int<3> : !s32i
+      cir.yield %7 : !s32i
+    }, false {
+      %7 = cir.const #cir.int<5> : !s32i
+      cir.yield %7 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+    %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %6 : !s32i
+  }
+}
+
+// MLIR:      %1 = arith.cmpi sgt, %0, %c0_i32 : i32
+// MLIR-NEXT: %2 = arith.extui %1 : i1 to i8
+// MLIR-NEXT: %3 = arith.trunci %2 : i8 to i1
+// MLIR-NEXT: %4 = scf.if %3 -> (i32) {
+// MLIR-NEXT:   %c3_i32 = arith.constant 3 : i32
+// MLIR-NEXT:   scf.yield %c3_i32 : i32
+// MLIR-NEXT: } else {
+// MLIR-NEXT:   %c5_i32 = arith.constant 5 : i32
+// MLIR-NEXT:   scf.yield %c5_i32 : i32
+// MLIR-NEXT: }
+// MLIR-NEXT: memref.store %4, %alloca_0[] : memref<i32>
+
+// MLIR-CANONICALIZE: %[[CMP:.*]] = arith.cmpi sgt
+// MLIR-CANONICALIZE: arith.select %[[CMP]]
+
+// LLVM: %[[CMP:.*]] = icmp sgt
+// LLVM: select i1 %[[CMP]]
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/unary-inc-dec.cir b/clang/test/CIR/Lowering/ThroughMLIR/unary-inc-dec.cir
new file mode 100644
index 000000000000..1db339fe34fc
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/unary-inc-dec.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm -o - | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.const #cir.int<2> : !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+
+    %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %4 = cir.unary(inc, %3) : !s32i, !s32i
+    cir.store %4, %0 : !s32i, !cir.ptr<!s32i>
+
+    %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.unary(dec, %5) : !s32i, !s32i
+    cir.store %6, %1 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+// MLIR: = arith.constant 1
+// MLIR: = arith.addi
+// MLIR: = arith.constant 1
+// MLIR: = arith.subi
+
+// LLVM: = add i32 %[[#]], 1
+// LLVM: = sub i32 %[[#]], 1
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/unary-plus-minus.cir b/clang/test/CIR/Lowering/ThroughMLIR/unary-plus-minus.cir
new file mode 100644
index 000000000000..ecb7e7ef6734
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/unary-plus-minus.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm -o - | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.const #cir.int<2> : !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+
+    %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %4 = cir.unary(plus, %3) : !s32i, !s32i
+    cir.store %4, %0 : !s32i, !cir.ptr<!s32i>
+
+    %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.unary(minus, %5) : !s32i, !s32i
+    cir.store %6, %1 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+// MLIR: %[[#INPUT_PLUS:]] = memref.load
+// MLIR: memref.store %[[#INPUT_PLUS]]
+// MLIR: %[[#INPUT_MINUS:]] = memref.load
+// MLIR: %[[ZERO:[a-z0-9_]+]] = arith.constant 0
+// MLIR: arith.subi %[[ZERO]], %[[#INPUT_MINUS]]
+
+// LLVM: = sub i32 0, %[[#]]
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/vectype.cpp b/clang/test/CIR/Lowering/ThroughMLIR/vectype.cpp
new file mode 100644
index 000000000000..81c9fe063260
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/vectype.cpp
@@ -0,0 +1,178 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+typedef int vi4 __attribute__((vector_size(16)));
+
+void vector_int_test(int x) {
+
+  // CHECK: %[[ALLOC1:.*]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+  // CHECK: %[[ALLOC2:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC3:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC4:.*]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+  // CHECK: %[[ALLOC5:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC6:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC7:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC8:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC9:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC10:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC11:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC12:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC13:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC14:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC15:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC16:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC17:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC18:.*]] = memref.alloca() {alignment = 16 : i64} : memref<vector<4xi32>>
+
+  // CHECK: memref.store %arg0, %[[ALLOC1]][] : memref<i32>
+
+  vi4 a = { 1, 2, 3, 4 };
+
+  // CHECK: %[[CST:.*]] = arith.constant dense<[1, 2, 3, 4]> : vector<4xi32>
+  // CHECK: memref.store %[[CST]], %[[ALLOC2]][] : memref<vector<4xi32>>
+  
+  vi4 b = {x, 5, 6, x + 1};
+
+  // CHECK: %[[VAL1:.*]] = memref.load %[[ALLOC1]][] : memref<i32>
+  // CHECK: %[[C5:.*]] = arith.constant 5 : i32
+  // CHECK: %[[C6:.*]] = arith.constant 6 : i32
+  // CHECK: %[[VAL2:.*]] = memref.load %[[ALLOC1]][] : memref<i32>
+  // CHECK: %[[C1_I32_2:.*]] = arith.constant 1 : i32
+  // CHECK: %[[SUM:.*]] = arith.addi %[[VAL2]], %[[C1_I32_2]] : i32
+  // CHECK: %[[CST2:.*]] = arith.constant dense<0> : vector<4xi32>
+  // CHECK: %[[C0_I64_2:.*]] = arith.constant 0 : i64
+  // CHECK: %[[VEC4:.*]] = vector.insertelement %[[VAL1]], %[[CST2]][%[[C0_I64_2]] : i64] : vector<4xi32>
+  // CHECK: %[[C1_I64_2:.*]] = arith.constant 1 : i64
+  // CHECK: %[[VEC5:.*]] = vector.insertelement %[[C5]], %[[VEC4]][%[[C1_I64_2]] : i64] : vector<4xi32>
+  // CHECK: %[[C2_I64_2:.*]] = arith.constant 2 : i64
+  // CHECK: %[[VEC6:.*]] = vector.insertelement %[[C6]], %[[VEC5]][%[[C2_I64_2]] : i64] : vector<4xi32>
+  // CHECK: %[[C3_I64_2:.*]] = arith.constant 3 : i64
+  // CHECK: %[[VEC7:.*]] = vector.insertelement %[[SUM]], %[[VEC6]][%[[C3_I64_2]] : i64] : vector<4xi32>
+  // CHECK: memref.store %[[VEC7]], %[[ALLOC3]][] : memref<vector<4xi32>>  
+
+  a[x] = x;
+  
+  // CHECK: %[[VAL3:.*]] = memref.load %[[ALLOC1]][] : memref<i32>
+  // CHECK: %[[VAL4:.*]] = memref.load %[[ALLOC1]][] : memref<i32>
+  // CHECK: %[[VEC8:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[VEC9:.*]] = vector.insertelement %[[VAL3]], %[[VEC8]][%[[VAL4]] : i32] : vector<4xi32>
+  // CHECK: memref.store %[[VEC9]], %[[ALLOC2]][] : memref<vector<4xi32>>
+
+  int c = a[x];
+
+  // CHECK: %[[VEC10:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[VAL5:.*]] = memref.load %[[ALLOC1]][] : memref<i32>
+  // CHECK: %[[EXTRACT:.*]] = vector.extractelement %[[VEC10]][%[[VAL5]] : i32] : vector<4xi32>
+  // CHECK: memref.store %[[EXTRACT]], %[[ALLOC4]][] : memref<i32>
+
+  vi4 d = a + b;
+  
+  // CHECK: %[[ALLOC0_1:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC1_1:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC2_1:.*]] = arith.addi %[[ALLOC0_1]], %[[ALLOC1_1]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC2_1]], %[[ALLOC5]][] : memref<vector<4xi32>>
+  
+  vi4 e = a - b;
+
+  // CHECK: %[[ALLOC0_2:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC1_2:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC3_2:.*]] = arith.subi %[[ALLOC0_2]], %[[ALLOC1_2]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC3_2]], %[[ALLOC6]][] : memref<vector<4xi32>>
+
+  vi4 f = a * b;
+
+  // CHECK: %[[ALLOC0_3:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC1_3:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC4_1:.*]] = arith.muli %[[ALLOC0_3]], %[[ALLOC1_3]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC4_1]], %[[ALLOC7]][] : memref<vector<4xi32>>
+
+  vi4 g = a / b;
+
+  // CHECK: %[[ALLOC0_4:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC1_4:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC5_1:.*]] = arith.divsi %[[ALLOC0_4]], %[[ALLOC1_4]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC5_1]], %[[ALLOC8]][] : memref<vector<4xi32>>
+
+  vi4 h = a % b;
+
+  // CHECK: %[[ALLOC0_5:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC1_5:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC6_1:.*]] = arith.remsi %[[ALLOC0_5]], %[[ALLOC1_5]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC6_1]], %[[ALLOC9]][] : memref<vector<4xi32>>
+
+  vi4 i = a & b;
+
+  // CHECK: %[[ALLOC0_6:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC1_6:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC7_1:.*]] = arith.andi %[[ALLOC0_6]], %[[ALLOC1_6]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC7_1]], %[[ALLOC10]][] : memref<vector<4xi32>>
+
+  vi4 j = a | b;
+
+  // CHECK: %[[ALLOC0_7:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC1_7:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC8_1:.*]] = arith.ori %[[ALLOC0_7]], %[[ALLOC1_7]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC8_1]], %[[ALLOC11]][] : memref<vector<4xi32>>
+
+  vi4 k = a ^ b;
+
+  // CHECK: %[[ALLOC0_8:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC1_8:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[ALLOC9_1:.*]] = arith.xori %[[ALLOC0_8]], %[[ALLOC1_8]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC9_1]], %[[ALLOC12]][] : memref<vector<4xi32>>
+
+  // TODO(cir) : Fix the lowering of unary operators
+  // vi4 l = +a;
+  // vi4 m = -a;
+  // vi4 n = ~a;
+
+  vi4 o = a == b;
+
+  // CHECK: %[[VAL11:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[VAL12:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[CMP_EQ:.*]] = arith.cmpi eq, %[[VAL11]], %[[VAL12]] : vector<4xi32>
+  // CHECK: %[[EXT_EQ:.*]] = arith.extsi %[[CMP_EQ]] : vector<4xi1> to vector<4xi32>
+  // CHECK: memref.store %[[EXT_EQ]], %[[ALLOC13]][] : memref<vector<4xi32>>
+
+  vi4 p = a != b;
+
+  // CHECK: %[[VAL13:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[VAL14:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[CMP_NE:.*]] = arith.cmpi ne, %[[VAL13]], %[[VAL14]] : vector<4xi32>
+  // CHECK: %[[EXT_NE:.*]] = arith.extsi %[[CMP_NE]] : vector<4xi1> to vector<4xi32>
+  // CHECK: memref.store %[[EXT_NE]], %[[ALLOC14]][] : memref<vector<4xi32>>
+
+  vi4 q = a < b;
+
+  // CHECK: %[[VAL15:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[VAL16:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[CMP_SLT:.*]] = arith.cmpi slt, %[[VAL15]], %[[VAL16]] : vector<4xi32>
+  // CHECK: %[[EXT_SLT:.*]] = arith.extsi %[[CMP_SLT]] : vector<4xi1> to vector<4xi32>
+  // CHECK: memref.store %[[EXT_SLT]], %[[ALLOC15]][] : memref<vector<4xi32>>
+  
+  vi4 r = a > b;
+  
+  // CHECK: %[[VAL17:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[VAL18:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[CMP_SGT:.*]] = arith.cmpi sgt, %[[VAL17]], %[[VAL18]] : vector<4xi32>
+  // CHECK: %[[EXT_SGT:.*]] = arith.extsi %[[CMP_SGT]] : vector<4xi1> to vector<4xi32>
+  // CHECK: memref.store %[[EXT_SGT]], %[[ALLOC16]][] : memref<vector<4xi32>>
+
+  vi4 s = a <= b;
+
+  // CHECK: %[[VAL19:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[VAL20:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[CMP_SLE:.*]] = arith.cmpi sle, %[[VAL19]], %[[VAL20]] : vector<4xi32>
+  // CHECK: %[[EXT_SLE:.*]] = arith.extsi %[[CMP_SLE]] : vector<4xi1> to vector<4xi32>
+  // CHECK: memref.store %[[EXT_SLE]], %[[ALLOC17]][] : memref<vector<4xi32>>
+
+  vi4 t = a >= b;
+
+  // CHECK: %[[VAL21:.*]] = memref.load %[[ALLOC2]][] : memref<vector<4xi32>>
+  // CHECK: %[[VAL22:.*]] = memref.load %[[ALLOC3]][] : memref<vector<4xi32>>
+  // CHECK: %[[CMP_SGE:.*]] = arith.cmpi sge, %[[VAL21]], %[[VAL22]] : vector<4xi32>
+  // CHECK: %[[EXT_SGE:.*]] = arith.extsi %[[CMP_SGE]] : vector<4xi1> to vector<4xi32>
+  // CHECK: memref.store %[[EXT_SGE]], %[[ALLOC18]][] : memref<vector<4xi32>>
+
+  // CHECK: return
+}
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/vtable.cir b/clang/test/CIR/Lowering/ThroughMLIR/vtable.cir
new file mode 100644
index 000000000000..66eb06629793
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/vtable.cir
@@ -0,0 +1,73 @@
+// RUN: cir-opt %s --cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+!ty_anon_struct = !cir.struct<struct  {!cir.ptr<!cir.int<u, 8>>, !cir.ptr<!cir.int<u, 8>>}>
+!ty_anon_struct1 = !cir.struct<struct  {!cir.ptr<!cir.int<u, 8>>, !cir.ptr<!cir.int<u, 8>>, !cir.int<u, 32>, !cir.int<u, 32>, !cir.ptr<!cir.int<u, 8>>, !cir.int<s, 64>, !cir.ptr<!cir.int<u, 8>>, !cir.int<s, 64>}>
+!ty_anon_struct2 = !cir.struct<struct  {!cir.array<!cir.ptr<!cir.int<u, 8>> x 4>}>
+!ty_anon_struct3 = !cir.struct<struct  {!cir.array<!cir.ptr<!cir.int<u, 8>> x 3>}>
+!ty_anon_struct4 = !cir.struct<struct  {!cir.array<!cir.ptr<!cir.int<u, 8>> x 4>, !cir.array<!cir.ptr<!cir.int<u, 8>> x 3>}>
+!ty_Father = !cir.struct<class "Father" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>
+!ty_Mother = !cir.struct<class "Mother" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>
+!ty_Child = !cir.struct<class "Child" {!cir.struct<class "Mother" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>, !cir.struct<class "Father" {!cir.ptr<!cir.ptr<!cir.func<!cir.int<u, 32> ()>>>} #cir.record.decl.ast>} #cir.record.decl.ast>
+
+module {
+  cir.func linkonce_odr @_ZN6Mother6simpleEv(%arg0: !cir.ptr<!ty_Mother>) { 
+    %0 = cir.alloca !cir.ptr<!ty_Mother>, !cir.ptr<!cir.ptr<!ty_Mother>>, ["this", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.ptr<!ty_Mother>, !cir.ptr<!cir.ptr<!ty_Mother>>
+    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!ty_Mother>>, !cir.ptr<!ty_Mother>
+    cir.return 
+  }
+  cir.func private @_ZN5ChildC2Ev(%arg0: !cir.ptr<!ty_Child>) { cir.return }
+  cir.global linkonce_odr @_ZTV6Mother = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI6Mother> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Mother9MotherFooEv> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Mother10MotherFoo2Ev> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>}> : !ty_anon_struct2 {alignment = 8 : i64} 
+  cir.global "private" external @_ZTVN10__cxxabiv117__class_type_infoE : !cir.ptr<!cir.ptr<!u8i>> 
+  cir.global linkonce_odr @_ZTS6Mother = #cir.const_array<"6Mother" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7> {alignment = 1 : i64} 
+  cir.global constant external @_ZTI6Mother = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS6Mother> : !cir.ptr<!u8i>}> : !ty_anon_struct {alignment = 8 : i64} 
+  cir.func linkonce_odr @_ZN6Mother9MotherFooEv(%arg0: !cir.ptr<!ty_Mother> ) { cir.return }
+  cir.func linkonce_odr @_ZN6Mother10MotherFoo2Ev(%arg0: !cir.ptr<!ty_Mother> ) { cir.return }
+  cir.global linkonce_odr @_ZTV6Father = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI6Father> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Father9FatherFooEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : !ty_anon_struct3 {alignment = 8 : i64} 
+  cir.func linkonce_odr @_ZN6FatherC2Ev(%arg0: !cir.ptr<!ty_Father> ) { cir.return }
+  cir.global linkonce_odr @_ZTV5Child = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI5Child> : !cir.ptr<!u8i>, #cir.global_view<@_ZN5Child9MotherFooEv> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Mother10MotherFoo2Ev> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>, #cir.const_array<[#cir.ptr<-8 : i64> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI5Child> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Father9FatherFooEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : !ty_anon_struct4 {alignment = 8 : i64} 
+  cir.global "private" external @_ZTVN10__cxxabiv121__vmi_class_type_infoE : !cir.ptr<!cir.ptr<!u8i>> 
+  cir.global linkonce_odr @_ZTS5Child = #cir.const_array<"5Child" : !cir.array<!s8i x 6>> : !cir.array<!s8i x 6> {alignment = 1 : i64} 
+  cir.global linkonce_odr @_ZTS6Father = #cir.const_array<"6Father" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7> {alignment = 1 : i64} 
+  cir.global constant external @_ZTI6Father = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS6Father> : !cir.ptr<!u8i>}> : !ty_anon_struct {alignment = 8 : i64} 
+  cir.global constant external @_ZTI5Child = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv121__vmi_class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS5Child> : !cir.ptr<!u8i>, #cir.int<0> : !u32i, #cir.int<2> : !u32i, #cir.global_view<@_ZTI6Mother> : !cir.ptr<!u8i>, #cir.int<2> : !s64i, #cir.global_view<@_ZTI6Father> : !cir.ptr<!u8i>, #cir.int<2050> : !s64i}> : !ty_anon_struct1 {alignment = 8 : i64} 
+  cir.func linkonce_odr @_ZN5Child9MotherFooEv(%arg0: !cir.ptr<!ty_Child> ) { cir.return }
+  cir.func linkonce_odr @_ZN6Father9FatherFooEv(%arg0: !cir.ptr<!ty_Father> ) { cir.return }
+} 
+
+// MLIR:  llvm.mlir.global linkonce_odr @_ZTV5Child() {addr_space = 0 : i32, alignment = 8 : i64} : !llvm.struct<(array<4 x ptr>, array<3 x ptr>)> {
+// MLIR:    %{{[0-9]+}} = llvm.mlir.undef : !llvm.struct<(array<4 x ptr>, array<3 x ptr>)>
+// MLIR:    %{{[0-9]+}} = llvm.mlir.undef : !llvm.array<4 x ptr>
+// MLIR:    %{{[0-9]+}} = llvm.mlir.zero : !llvm.ptr
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[0] : !llvm.array<4 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.mlir.addressof @_ZTI5Child : !llvm.ptr
+
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[1] : !llvm.array<4 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.mlir.addressof @_ZN5Child9MotherFooEv : !llvm.ptr
+
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[2] : !llvm.array<4 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.mlir.addressof @_ZN6Mother10MotherFoo2Ev : !llvm.ptr
+
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[3] : !llvm.array<4 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[0] : !llvm.struct<(array<4 x ptr>, array<3 x ptr>)> 
+// MLIR:    %{{[0-9]+}} = llvm.mlir.undef : !llvm.array<3 x ptr>
+// MLIR:    %{{[0-9]+}} = llvm.mlir.constant(-8 : i64) : i64
+// MLIR:    %{{[0-9]+}} = llvm.inttoptr %{{[0-9]+}} : i64 to !llvm.ptr
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[0] : !llvm.array<3 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.mlir.addressof @_ZTI5Child : !llvm.ptr
+
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[1] : !llvm.array<3 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.mlir.addressof @_ZN6Father9FatherFooEv : !llvm.ptr
+
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[2] : !llvm.array<3 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[1] : !llvm.struct<(array<4 x ptr>, array<3 x ptr>)> 
+// MLIR:    llvm.return %{{[0-9]+}} : !llvm.struct<(array<4 x ptr>, array<3 x ptr>)>
+// MLIR:  }
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/ThroughMLIR/while.c b/clang/test/CIR/Lowering/ThroughMLIR/while.c
new file mode 100644
index 000000000000..40ad92de95e4
--- /dev/null
+++ b/clang/test/CIR/Lowering/ThroughMLIR/while.c
@@ -0,0 +1,95 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+void singleWhile() {
+  int a = 0;
+  while(a < 2) {
+    a++;
+  }
+}
+
+void nestedWhile() {
+  int a = 0;
+  while(a < 2) {
+    int b = 0;
+    while(b < 2) {
+      b++;
+    }
+    a++;
+  }
+}
+
+//CHECK: func.func @singleWhile() {
+//CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32 
+//CHECK:   memref.store %[[C0_I32]], %[[alloca]][] : memref<i32> 
+//CHECK:   memref.alloca_scope  {
+//CHECK:     scf.while : () -> () {
+//CHECK:       %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32> 
+//CHECK:       %[[C2_I32:.+]] = arith.constant 2 : i32 
+//CHECK:       %[[ONE:.+]] = arith.cmpi slt, %[[ZERO:.+]], %[[C2_I32]] : i32 
+//CHECK:       %[[TWO:.+]] = arith.extui %[[ONE:.+]] : i1 to i32 
+//CHECK:       %[[C0_I32_0:.+]] = arith.constant 0 : i32 
+//CHECK:       %[[THREE:.+]] = arith.cmpi ne, %[[TWO:.+]], %[[C0_I32_0]] : i32 
+//CHECK:       %[[FOUR:.+]] = arith.extui %[[THREE:.+]] : i1 to i8 
+//CHECK:       %[[FIVE:.+]] = arith.trunci %[[FOUR:.+]] : i8 to i1 
+//CHECK:       scf.condition(%[[FIVE]]) 
+//CHECK:     } do {
+//CHECK:       %[[ZERO:.+]] = memref.load %[[alloca]][] : memref<i32> 
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32 
+//CHECK:       %[[ONE:.+]] = arith.addi %0, %[[C1_I32:.+]] : i32 
+//CHECK:       memref.store %[[ONE:.+]], %[[alloca]][] : memref<i32> 
+//CHECK:       scf.yield 
+//CHECK:     } 
+//CHECK:  } 
+//CHECK:   return 
+//CHECK: } 
+
+//CHECK: func.func @nestedWhile() {
+//CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32>
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
+//CHECK:   memref.store %[[C0_I32]], %[[alloca]][] : memref<i32> 
+//CHECK:   memref.alloca_scope  {
+//CHECK:     %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<i32> 
+//CHECK:     scf.while : () -> () {
+//CHECK:       %[[ZERO:.+]] = memref.load %alloca[] : memref<i32> 
+//CHECK:       %[[C2_I32:.+]] = arith.constant 2 : i32 
+//CHECK:       %[[ONE:.+]] = arith.cmpi slt, %[[ZERO]], %[[C2_I32]] : i32 
+//CHECK:       %[[TWO:.+]] = arith.extui %[[ONE]] : i1 to i32 
+//CHECK:       %[[C0_I32_1:.+]] = arith.constant 0 : i32 
+//CHECK:       %[[THREE:.+]] = arith.cmpi ne, %[[TWO]], %[[C0_I32_1]] : i32 
+//CHECK:       %[[FOUR:.+]] = arith.extui %[[THREE]] : i1 to i8 
+//CHECK:       %[[FIVE:.+]] = arith.trunci %[[FOUR]] : i8 to i1 
+//CHECK:       scf.condition(%[[FIVE]]) 
+//CHECK:     } do {
+//CHECK:         %[[C0_I32_1]] = arith.constant 0 : i32 
+//CHECK:         memref.store %[[C0_I32_1]], %[[alloca_0]][] : memref<i32> 
+//CHECK:         memref.alloca_scope  {
+//CHECK:           scf.while : () -> () {
+//CHECK:             %[[TWO]] = memref.load %[[alloca_0]][] : memref<i32> 
+//CHECK:             %[[C2_I32]] = arith.constant 2 : i32 
+//CHECK:             %[[THREE]] = arith.cmpi slt, %[[TWO]], %[[C2_I32]] : i32 
+//CHECK:             %[[FOUR]] = arith.extui %[[THREE]] : i1 to i32 
+//CHECK:             %[[C0_I32_2:.+]] = arith.constant 0 : i32 
+//CHECK:             %[[FIVE]] = arith.cmpi ne, %[[FOUR]], %[[C0_I32_2]] : i32 
+//CHECK:             %[[SIX:.+]] = arith.extui %[[FIVE]] : i1 to i8 
+//CHECK:             %[[SEVEN:.+]] = arith.trunci %[[SIX]] : i8 to i1 
+//CHECK:             scf.condition(%[[SEVEN]]) 
+//CHECK:           } do {
+//CHECK:             %[[TWO]] = memref.load %[[alloca_0]][] : memref<i32> 
+//CHECK:             %[[C1_I32_2:.+]] = arith.constant 1 : i32 
+//CHECK:             %[[THREE]] = arith.addi %[[TWO]], %[[C1_I32_2]] : i32 
+//CHECK:             memref.store %[[THREE]], %[[alloca_0]][] : memref<i32> 
+//CHECK:             scf.yield 
+//CHECK:           } 
+//CHECK:         } 
+//CHECK:         %[[ZERO]] = memref.load %[[alloca]][] : memref<i32> 
+//CHECK:         %[[C1_I32:.+]] = arith.constant 1 : i32 
+//CHECK:         %[[ONE]] = arith.addi %[[ZERO]], %[[C1_I32]] : i32 
+//CHECK:         memref.store %[[ONE]], %[[alloca]][] : memref<i32> 
+//CHECK:         scf.yield 
+//CHECK:       } 
+//CHECK:     } 
+//CHECK:     return 
+//CHECK:   } 
+//CHECK: } 
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/address-space.cir b/clang/test/CIR/Lowering/address-space.cir
new file mode 100644
index 000000000000..ee857bd32119
--- /dev/null
+++ b/clang/test/CIR/Lowering/address-space.cir
@@ -0,0 +1,54 @@
+// RUN: cir-translate %s -cir-to-llvmir -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+
+module attributes {
+  cir.triple = "spirv64-unknown-unknown",
+  llvm.data_layout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"
+} {
+  cir.global external addrspace(offload_global) @addrspace1 = #cir.int<1> : !s32i
+  // LLVM: @addrspace1 = addrspace(1) global i32
+
+  cir.global "private" internal addrspace(offload_local) @addrspace2 : !s32i
+  // LLVM: @addrspace2 = internal addrspace(3) global i32 undef
+
+  cir.global external addrspace(target<7>) @addrspace3 = #cir.int<3> : !s32i
+  // LLVM: @addrspace3 = addrspace(7) global i32
+
+  // LLVM: define void @foo(ptr %0)
+  cir.func @foo(%arg0: !cir.ptr<!s32i>) {
+    // LLVM-NEXT: alloca ptr,
+    %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg", init] {alignment = 8 : i64}
+    cir.return
+  }
+
+  // LLVM: define void @bar(ptr addrspace(1) %0)
+  cir.func @bar(%arg0: !cir.ptr<!s32i, addrspace(target<1>)>) {
+    // LLVM-NEXT: alloca ptr addrspace(1)
+    %0 = cir.alloca !cir.ptr<!s32i, addrspace(target<1>)>, !cir.ptr<!cir.ptr<!s32i, addrspace(target<1>)>>, ["arg", init] {alignment = 8 : i64}
+    cir.return
+  }
+
+  // LLVM: define void @baz(ptr %0)
+  cir.func @baz(%arg0: !cir.ptr<!s32i, addrspace(target<0>)>) {
+    // LLVM-NEXT: alloca ptr,
+    %0 = cir.alloca !cir.ptr<!s32i, addrspace(target<0>)>, !cir.ptr<!cir.ptr<!s32i, addrspace(target<0>)>>, ["arg", init] {alignment = 8 : i64}
+    cir.return
+  }
+
+  // LLVM: define void @test_lower_offload_as()
+  cir.func @test_lower_offload_as() {
+    %0 = cir.alloca !cir.ptr<!s32i, addrspace(offload_private)>, !cir.ptr<!cir.ptr<!s32i, addrspace(offload_private)>>, ["arg0", init] {alignment = 8 : i64}
+    // LLVM-NEXT: alloca ptr,
+    %1 = cir.alloca !cir.ptr<!s32i, addrspace(offload_global)>, !cir.ptr<!cir.ptr<!s32i, addrspace(offload_global)>>, ["arg1", init] {alignment = 8 : i64}
+    // LLVM-NEXT: alloca ptr addrspace(1),
+    %2 = cir.alloca !cir.ptr<!s32i, addrspace(offload_constant)>, !cir.ptr<!cir.ptr<!s32i, addrspace(offload_constant)>>, ["arg2", init] {alignment = 8 : i64}
+    // LLVM-NEXT: alloca ptr addrspace(2),
+    %3 = cir.alloca !cir.ptr<!s32i, addrspace(offload_local)>, !cir.ptr<!cir.ptr<!s32i, addrspace(offload_local)>>, ["arg3", init] {alignment = 8 : i64}
+    // LLVM-NEXT: alloca ptr addrspace(3),
+    %4 = cir.alloca !cir.ptr<!s32i, addrspace(offload_generic)>, !cir.ptr<!cir.ptr<!s32i, addrspace(offload_generic)>>, ["arg4", init] {alignment = 8 : i64}
+    // LLVM-NEXT: alloca ptr addrspace(4),
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/alloca.cir b/clang/test/CIR/Lowering/alloca.cir
new file mode 100644
index 000000000000..62b8c1c60111
--- /dev/null
+++ b/clang/test/CIR/Lowering/alloca.cir
@@ -0,0 +1,17 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @foo(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, %arg0 : !s32i, ["tmp"] {alignment = 16 : i64}
+    cir.return
+  }
+}
+
+//      MLIR: module {
+// MLIR-NEXT:  llvm.func @foo(%arg0: i32) attributes {cir.extra_attrs = #fn_attr, global_visibility = #cir<visibility default>} {
+// MLIR-NEXT:    %0 = llvm.alloca %arg0 x i32 {alignment = 16 : i64} : (i32) -> !llvm.ptr
+// MLIR-NEXT:    llvm.return
+// MLIR-NEXT:  }
+// MLIR-NEXT: }
diff --git a/clang/test/CIR/Lowering/array-init.c b/clang/test/CIR/Lowering/array-init.c
new file mode 100644
index 000000000000..0b9a19b5c9ba
--- /dev/null
+++ b/clang/test/CIR/Lowering/array-init.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+// LLVM: charInit1.ar = internal global [4 x [4 x i8]] {{.*}}4 x i8] c"aa\00\00", [4 x i8] c"aa\00\00", [4 x i8] c"aa\00\00", [4 x i8] c"aa\00\00"], align 16
+char charInit1() {
+  static char ar[][4] = {"aa", "aa", "aa", "aa"};
+  return ar[0][0];
+}
+
+// LLVM: define dso_local void @zeroInit
+// LLVM: [[RES:%.*]] = alloca [3 x i32], i64 1
+// LLVM: store [3 x i32] zeroinitializer, ptr [[RES]]
+void zeroInit() {
+  int a[3] = {0, 0, 0};
+}
+
+// LLVM: %1 = alloca [4 x [1 x i8]], i64 1, align 1
+// LLVM: store [4 x [1 x i8]] {{.*}}1 x i8] c"a", [1 x i8] c"b", [1 x i8] c"c", [1 x i8] c"d"], ptr %1, align 1
+void charInit2() {
+  char arr[4][1] = {"a", "b", "c", "d"};
+}
+
+// LLVM: %1 = alloca [4 x [2 x i8]], i64 1, align 1
+// LLVM: store [4 x [2 x i8]] {{.*}}2 x i8] c"ab", [2 x i8] c"cd", [2 x i8] c"ef", [2 x i8] c"gh"], ptr %1, align 1
+void charInit3() {
+  char arr[4][2] = {"ab", "cd", "ef", "gh"};
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/array.cir b/clang/test/CIR/Lowering/array.cir
new file mode 100644
index 000000000000..554a4a1fc18a
--- /dev/null
+++ b/clang/test/CIR/Lowering/array.cir
@@ -0,0 +1,35 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir -o -  | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+!ty_S = !cir.struct<struct "S" {!s32i} #cir.record.decl.ast>
+
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !cir.array<i32 x 10>, !cir.ptr<!cir.array<i32 x 10>>, ["a"] {alignment = 16 : i64}
+    cir.return
+  }
+
+//      MLIR: module {
+// MLIR-NEXT: func @foo()
+// MLIR-NEXT:  %0 = llvm.mlir.constant(1 : index) : i64
+// MLIR-NEXT:  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 16 : i64} : (i64) -> !llvm.ptr
+// MLIR-NEXT:    llvm.return
+// MLIR-NEXT:  }
+// MLIR-NEXT: }
+
+//      LLVM: %1 = alloca [10 x i32], i64 1, align 16
+// LLVM-NEXT: ret void
+
+  cir.global external @arr = #cir.const_array<[#cir.const_struct<{#cir.int<1> : !s32i}> : !ty_S, #cir.zero : !ty_S]> : !cir.array<!ty_S x 2>
+  // CHECK: llvm.mlir.global external @arr() {addr_space = 0 : i32} : !llvm.array<2 x struct<"struct.S", (i32)>> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.array<2 x struct<"struct.S", (i32)>>
+  // CHECK:   %1 = llvm.mlir.undef : !llvm.struct<"struct.S", (i32)>
+  // CHECK:   %2 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %3 = llvm.insertvalue %2, %1[0] : !llvm.struct<"struct.S", (i32)>
+  // CHECK:   %4 = llvm.insertvalue %3, %0[0] : !llvm.array<2 x struct<"struct.S", (i32)>>
+  // CHECK:   %5 = cir.llvmir.zeroinit : !llvm.struct<"struct.S", (i32)>
+  // CHECK:   %6 = llvm.insertvalue %5, %4[1] : !llvm.array<2 x struct<"struct.S", (i32)>>
+  // CHECK:   llvm.return %6 : !llvm.array<2 x struct<"struct.S", (i32)>>
+  // CHECK: }
+}
diff --git a/clang/test/CIR/Lowering/asm.cir b/clang/test/CIR/Lowering/asm.cir
new file mode 100644
index 000000000000..3ba57ac17b2c
--- /dev/null
+++ b/clang/test/CIR/Lowering/asm.cir
@@ -0,0 +1,55 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+
+  cir.func @simple(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+
+    cir.asm(x86_att, 
+      out = [],
+      in = [],
+      in_out = [],
+      {"" "~{dirflag},~{fpsr},~{flags}"}) -> !s32i
+    // CHECK: llvm.inline_asm asm_dialect = att operand_attrs = [] "", "~{dirflag},~{fpsr},~{flags}"  : () -> i32
+        
+    cir.asm(x86_att, 
+      out = [],
+      in = [],
+      in_out = [],
+      {"xyz" "~{dirflag},~{fpsr},~{flags}"}) side_effects -> !s32i
+    // CHECK: llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [] "xyz", "~{dirflag},~{fpsr},~{flags}" : () -> i32
+
+    cir.asm(x86_att, 
+      out = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+      in = [],
+      in_out = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+      {"" "=*m,*m,~{dirflag},~{fpsr},~{flags}"}) side_effects -> !s32i
+    // CHECK: llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [{elementtype = i32}, {elementtype = i32}] "", "=*m,*m,~{dirflag},~{fpsr},~{flags}" %1, %1 : (!llvm.ptr, !llvm.ptr) -> i32
+
+    cir.asm(x86_att, 
+      out = [],
+      in = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+      in_out = [],
+      {"" "*m,~{dirflag},~{fpsr},~{flags}"}) side_effects -> !s32i      
+    // CHECK: llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [{elementtype = i32}] "", "*m,~{dirflag},~{fpsr},~{flags}" %1 : (!llvm.ptr) -> i32
+
+    cir.asm(x86_att, 
+      out = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+      in = [],
+      in_out = [],
+      {"" "=*m,~{dirflag},~{fpsr},~{flags}"}) side_effects -> !s32i
+    // CHECK: llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [{elementtype = i32}] "", "=*m,~{dirflag},~{fpsr},~{flags}" %1 : (!llvm.ptr) -> i32
+   
+    cir.asm(x86_att, 
+      out = [],
+      in = [],
+      in_out = [],
+      {"" "=&r,=&r,1,~{dirflag},~{fpsr},~{flags}"}) side_effects -> !s32i
+    // CHECK: llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [] "", "=&r,=&r,1,~{dirflag},~{fpsr},~{flags}"  : () -> i32
+    cir.return
+  }
+
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/atomic-runtime.cpp b/clang/test/CIR/Lowering/atomic-runtime.cpp
new file mode 100644
index 000000000000..411a08dc5af2
--- /dev/null
+++ b/clang/test/CIR/Lowering/atomic-runtime.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s
+
+// Test __atomic_* built-ins that have a memory order parameter with a runtime
+// value.  This requires generating a switch statement, so the amount of
+// generated code is surprisingly large.
+//
+// This is just a quick smoke test.  Only atomic_load_n is tested.
+
+int runtime_load(int *ptr, int order) {
+  return __atomic_load_n(ptr, order);
+}
+
+// CHECK:   %[[T7:[0-9]+]] = load ptr, ptr %[[T3:[0-9]+]], align 8
+// CHECK:   %[[T8:[0-9]+]] = load i32, ptr %[[T4:[0-9]+]], align 4
+// CHECK:   switch i32 %[[T8]], label %[[L9:[0-9]+]] [
+// CHECK:     i32 1, label %[[L11:[0-9]+]]
+// CHECK:     i32 2, label %[[L11]]
+// CHECK:     i32 5, label %[[L13:[0-9]+]]
+// CHECK:   ]
+// CHECK: [[L9]]:
+// CHECK:   %[[T10:[0-9]+]] = load atomic i32, ptr %[[T7]] monotonic, align 4
+// CHECK:   store i32 %[[T10]], ptr %[[T6:[0-9]+]], align 4
+// CHECK:   br label %[[L15:[0-9]+]]
+// CHECK: [[L11]]:
+// CHECK:   %[[T12:[0-9]+]] = load atomic i32, ptr %[[T7]] acquire, align 4
+// CHECK:   store i32 %[[T12]], ptr %[[T6]], align 4
+// CHECK:   br label %[[L15]]
+// CHECK: [[L13]]:
+// CHECK:   %[[T14:[0-9]+]] = load atomic i32, ptr %[[T7]] seq_cst, align 4
+// CHECK:   store i32 %[[T14]], ptr %[[T6]], align 4
+// CHECK:   br label %[[L15]]
+// CHECK: [[L15]]:
+// CHECK:   %[[T16:[0-9]+]] = load i32, ptr %[[T6]], align 4
+// CHECK:   store i32 %[[T16]], ptr %[[T5:[0-9]+]], align 4
+// CHECK:   %[[T17:[0-9]+]] = load i32, ptr %[[T5]], align 4
+// CHECK:   ret i32 %[[T17]]
diff --git a/clang/test/CIR/Lowering/binop-fp.cir b/clang/test/CIR/Lowering/binop-fp.cir
new file mode 100644
index 000000000000..dfda6e91cb51
--- /dev/null
+++ b/clang/test/CIR/Lowering/binop-fp.cir
@@ -0,0 +1,68 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["c"] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["d"] {alignment = 4 : i64}
+    %2 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["y", init] {alignment = 4 : i64}
+    %3 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["e"] {alignment = 8 : i64}
+    %4 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["f"] {alignment = 8 : i64}
+    %5 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["g", init] {alignment = 8 : i64}
+    %6 = cir.load %0 : !cir.ptr<!cir.float>, !cir.float
+    %7 = cir.load %1 : !cir.ptr<!cir.float>, !cir.float
+    %8 = cir.binop(mul, %6, %7) : !cir.float
+    cir.store %8, %2 : !cir.float, !cir.ptr<!cir.float>
+    %9 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %10 = cir.load %1 : !cir.ptr<!cir.float>, !cir.float
+    %11 = cir.binop(div, %9, %10) : !cir.float
+    cir.store %11, %2 : !cir.float, !cir.ptr<!cir.float>
+    %12 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %13 = cir.load %1 : !cir.ptr<!cir.float>, !cir.float
+    %14 = cir.binop(add, %12, %13) : !cir.float
+    cir.store %14, %2 : !cir.float, !cir.ptr<!cir.float>
+    %15 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %16 = cir.load %1 : !cir.ptr<!cir.float>, !cir.float
+    %17 = cir.binop(sub, %15, %16) : !cir.float
+    cir.store %17, %2 : !cir.float, !cir.ptr<!cir.float>
+    %18 = cir.load %3 : !cir.ptr<!cir.double>, !cir.double
+    %19 = cir.load %4 : !cir.ptr<!cir.double>, !cir.double
+    %20 = cir.binop(add, %18, %19) : !cir.double
+    cir.store %20, %5 : !cir.double, !cir.ptr<!cir.double>
+    %21 = cir.load %3 : !cir.ptr<!cir.double>, !cir.double
+    %22 = cir.load %4 : !cir.ptr<!cir.double>, !cir.double
+    %23 = cir.binop(sub, %21, %22) : !cir.double
+    cir.store %23, %5 : !cir.double, !cir.ptr<!cir.double>
+    %24 = cir.load %3 : !cir.ptr<!cir.double>, !cir.double
+    %25 = cir.load %4 : !cir.ptr<!cir.double>, !cir.double
+    %26 = cir.binop(mul, %24, %25) : !cir.double
+    cir.store %26, %5 : !cir.double, !cir.ptr<!cir.double>
+    %27 = cir.load %3 : !cir.ptr<!cir.double>, !cir.double
+    %28 = cir.load %4 : !cir.ptr<!cir.double>, !cir.double
+    %29 = cir.binop(div, %27, %28) : !cir.double
+    cir.store %29, %5 : !cir.double, !cir.ptr<!cir.double>
+    cir.return
+  }
+}
+
+// MLIR: = llvm.alloca {{.*}} f32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR: = llvm.alloca {{.*}} f64 {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// MLIR: = llvm.fmul {{.*}} : f32
+// MLIR: = llvm.fdiv
+// MLIR: = llvm.fadd
+// MLIR: = llvm.fsub
+// MLIR: = llvm.fadd {{.*}} : f64
+// MLIR: = llvm.fsub
+// MLIR: = llvm.fmul
+// MLIR: = llvm.fdiv
+
+// LLVM: = alloca float, i64
+// LLVM: = alloca double, i64
+// LLVM: = fmul float
+// LLVM: = fdiv float
+// LLVM: = fadd float
+// LLVM: = fsub float
+// LLVM: = fadd double
+// LLVM: = fsub double
+// LLVM: = fmul double
+// LLVM: = fdiv double
diff --git a/clang/test/CIR/Lowering/binop-overflow.cir b/clang/test/CIR/Lowering/binop-overflow.cir
new file mode 100644
index 000000000000..5cdd9d82ae7b
--- /dev/null
+++ b/clang/test/CIR/Lowering/binop-overflow.cir
@@ -0,0 +1,67 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir -o -  | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @test_add_u32_u32_u32(%lhs: !u32i, %rhs: !u32i, %res: !cir.ptr<!u32i>) -> !cir.bool {
+    %result, %overflow = cir.binop.overflow(add, %lhs, %rhs) : !u32i, (!u32i, !cir.bool)
+    cir.store %result, %res : !u32i, !cir.ptr<!u32i>
+    cir.return %overflow : !cir.bool
+  }
+
+  //      MLIR: llvm.func @test_add_u32_u32_u32(%[[LHS:.+]]: i32, %[[RHS:.+]]: i32, %[[RES_PTR:.+]]: !llvm.ptr) -> i8
+  // MLIR-NEXT:   %[[#INTRIN_RET:]] = llvm.call_intrinsic "llvm.uadd.with.overflow.i32"(%[[LHS]], %[[RHS]]) : (i32, i32) -> !llvm.struct<(i32, i1)>
+  // MLIR-NEXT:   %[[#RES:]] = llvm.extractvalue %[[#INTRIN_RET]][0] : !llvm.struct<(i32, i1)>
+  // MLIR-NEXT:   %[[#OVFL:]] = llvm.extractvalue %[[#INTRIN_RET]][1] : !llvm.struct<(i32, i1)>
+  // MLIR-NEXT:   %[[#OVFL_EXT:]] = llvm.zext %[[#OVFL]] : i1 to i8
+  // MLIR-NEXT:   llvm.store %[[#RES]], %[[RES_PTR]] {{.*}} : i32, !llvm.ptr
+  // MLIR-NEXT:   llvm.return %[[#OVFL_EXT]] : i8
+  // MLIR-NEXT: }
+
+  //      LLVM: define i8 @test_add_u32_u32_u32(i32 %[[#LHS:]], i32 %[[#RHS:]], ptr %[[#RES_PTR:]])
+  // LLVM-NEXT:   %[[#INTRIN_RET:]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %[[#LHS]], i32 %[[#RHS]])
+  // LLVM-NEXT:   %[[#RES:]] = extractvalue { i32, i1 } %[[#INTRIN_RET]], 0
+  // LLVM-NEXT:   %[[#OVFL:]] = extractvalue { i32, i1 } %[[#INTRIN_RET]], 1
+  // LLVM-NEXT:   %[[#OVFL_EXT:]] = zext i1 %[[#OVFL]] to i8
+  // LLVM-NEXT:   store i32 %[[#RES]], ptr %[[#RES_PTR]], align 4
+  // LLVM-NEXT:   ret i8 %[[#OVFL_EXT]]
+  // LLVM-NEXT: }
+
+  cir.func @test_add_u32_u32_i32(%lhs: !u32i, %rhs: !u32i, %res: !cir.ptr<!s32i>) -> !cir.bool {
+    %result, %overflow = cir.binop.overflow(add, %lhs, %rhs) : !u32i, (!s32i, !cir.bool)
+    cir.store %result, %res : !s32i, !cir.ptr<!s32i>
+    cir.return %overflow : !cir.bool
+  }
+
+  //      MLIR: llvm.func @test_add_u32_u32_i32(%[[LHS:.+]]: i32, %[[RHS:.+]]: i32, %[[RES_PTR:.+]]: !llvm.ptr) -> i8
+  // MLIR-NEXT:   %[[#LHS_EXT:]] = llvm.zext %[[LHS]] : i32 to i33
+  // MLIR-NEXT:   %[[#RHS_EXT:]] = llvm.zext %[[RHS]] : i32 to i33
+  // MLIR-NEXT:   %[[#INTRIN_RET:]] = llvm.call_intrinsic "llvm.sadd.with.overflow.i33"(%[[#LHS_EXT]], %[[#RHS_EXT]]) : (i33, i33) -> !llvm.struct<(i33, i1)>
+  // MLIR-NEXT:   %[[#RES_EXT:]] = llvm.extractvalue %[[#INTRIN_RET]][0] : !llvm.struct<(i33, i1)>
+  // MLIR-NEXT:   %[[#ARITH_OVFL:]] = llvm.extractvalue %[[#INTRIN_RET]][1] : !llvm.struct<(i33, i1)>
+  // MLIR-NEXT:   %[[#RES:]] = llvm.trunc %[[#RES_EXT]] : i33 to i32
+  // MLIR-NEXT:   %[[#RES_EXT_2:]] = llvm.sext %[[#RES]] : i32 to i33
+  // MLIR-NEXT:   %[[#TRUNC_OVFL:]] = llvm.icmp "ne" %[[#RES_EXT_2]], %[[#RES_EXT]] : i33
+  // MLIR-NEXT:   %[[#OVFL:]] = llvm.or %[[#ARITH_OVFL]], %[[#TRUNC_OVFL]]  : i1
+  // MLIR-NEXT:   %[[#OVFL_EXT:]] = llvm.zext %[[#OVFL]] : i1 to i8
+  // MLIR-NEXT:   llvm.store %[[#RES]], %[[RES_PTR]] {{.*}} : i32, !llvm.ptr
+  // MLIR-NEXT:   llvm.return %[[#OVFL_EXT]] : i8
+  // MLIR-NEXT: }
+
+  //      LLVM: define i8 @test_add_u32_u32_i32(i32 %[[#LHS:]], i32 %[[#RHS:]], ptr %[[#RES_PTR:]])
+  // LLVM-NEXT:   %[[#LHS_EXT:]] = zext i32 %[[#LHS]] to i33
+  // LLVM-NEXT:   %[[#RHS_EXT:]] = zext i32 %[[#RHS]] to i33
+  // LLVM-NEXT:   %[[#INTRIN_RET:]] = call { i33, i1 } @llvm.sadd.with.overflow.i33(i33 %[[#LHS_EXT]], i33 %[[#RHS_EXT]])
+  // LLVM-NEXT:   %[[#RES_EXT:]] = extractvalue { i33, i1 } %[[#INTRIN_RET]], 0
+  // LLVM-NEXT:   %[[#ARITH_OVFL:]] = extractvalue { i33, i1 } %[[#INTRIN_RET]], 1
+  // LLVM-NEXT:   %[[#RES:]] = trunc i33 %[[#RES_EXT]] to i32
+  // LLVM-NEXT:   %[[#RES_EXT_2:]] = sext i32 %[[#RES]] to i33
+  // LLVM-NEXT:   %[[#TRUNC_OVFL:]] = icmp ne i33 %[[#RES_EXT_2]], %[[#RES_EXT]]
+  // LLVM-NEXT:   %[[#OVFL:]] = or i1 %[[#ARITH_OVFL]], %[[#TRUNC_OVFL]]
+  // LLVM-NEXT:   %[[#OVFL_EXT:]] = zext i1 %[[#OVFL]] to i8
+  // LLVM-NEXT:   store i32 %[[#RES]], ptr %[[#RES_PTR]], align 4
+  // LLVM-NEXT:   ret i8 %[[#OVFL_EXT]]
+  // LLVM-NEXT: }
+}
diff --git a/clang/test/CIR/Lowering/binop-signed-int.cir b/clang/test/CIR/Lowering/binop-signed-int.cir
new file mode 100644
index 000000000000..5f028a6c901b
--- /dev/null
+++ b/clang/test/CIR/Lowering/binop-signed-int.cir
@@ -0,0 +1,64 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %3 = cir.const #cir.int<2> : !s32i    cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+    %4 = cir.const #cir.int<1> : !s32i    cir.store %4, %1 : !s32i, !cir.ptr<!s32i>
+    %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %7 = cir.binop(mul, %5, %6) : !s32i
+    // CHECK: = llvm.mul
+    cir.store %7, %2 : !s32i, !cir.ptr<!s32i>
+    %8 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %9 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %10 = cir.binop(div, %8, %9) : !s32i
+      // CHECK: = llvm.sdiv
+    cir.store %10, %2 : !s32i, !cir.ptr<!s32i>
+    %11 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %12 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %13 = cir.binop(rem, %11, %12) : !s32i
+    // CHECK: = llvm.srem
+    cir.store %13, %2 : !s32i, !cir.ptr<!s32i>
+    %14 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %15 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %16 = cir.binop(add, %14, %15) : !s32i
+    // CHECK: = llvm.add
+    cir.store %16, %2 : !s32i, !cir.ptr<!s32i>
+    %17 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %18 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %19 = cir.binop(sub, %17, %18) : !s32i
+    // CHECK: = llvm.sub
+    cir.store %19, %2 : !s32i, !cir.ptr<!s32i>
+    %20 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %21 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %22 = cir.shift(right, %20 : !s32i, %21 : !s32i) -> !s32i
+    // CHECK: = llvm.ashr
+    cir.store %22, %2 : !s32i, !cir.ptr<!s32i>
+    %23 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %24 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %25 = cir.shift(left, %23 : !s32i, %24 : !s32i) -> !s32i
+    // CHECK: = llvm.shl
+    cir.store %25, %2 : !s32i, !cir.ptr<!s32i>
+    %26 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %27 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %28 = cir.binop(and, %26, %27) : !s32i
+    // CHECK: = llvm.and
+    cir.store %28, %2 : !s32i, !cir.ptr<!s32i>
+    %29 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %30 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %31 = cir.binop(xor, %29, %30) : !s32i
+    // CHECK: = llvm.xor
+    cir.store %31, %2 : !s32i, !cir.ptr<!s32i>
+    %32 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %33 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %34 = cir.binop(or, %32, %33) : !s32i
+    // CHECK: = llvm.or
+    cir.store %34, %2 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/binop-unsigned-int.cir b/clang/test/CIR/Lowering/binop-unsigned-int.cir
new file mode 100644
index 000000000000..9633a7f4d966
--- /dev/null
+++ b/clang/test/CIR/Lowering/binop-unsigned-int.cir
@@ -0,0 +1,76 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+!u32i = !cir.int<u, 32>
+
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !u32i, !cir.ptr<!u32i>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+    %3 = cir.const #cir.int<2> : !u32i    cir.store %3, %0 : !u32i, !cir.ptr<!u32i>
+    %4 = cir.const #cir.int<1> : !u32i    cir.store %4, %1 : !u32i, !cir.ptr<!u32i>
+    %5 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %6 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %7 = cir.binop(mul, %5, %6) : !u32i
+    cir.store %7, %2 : !u32i, !cir.ptr<!u32i>
+    %8 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %9 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %10 = cir.binop(div, %8, %9) : !u32i
+    cir.store %10, %2 : !u32i, !cir.ptr<!u32i>
+    %11 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %12 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %13 = cir.binop(rem, %11, %12) : !u32i
+    cir.store %13, %2 : !u32i, !cir.ptr<!u32i>
+    %14 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %15 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %16 = cir.binop(add, %14, %15) : !u32i
+    cir.store %16, %2 : !u32i, !cir.ptr<!u32i>
+    %17 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %18 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %19 = cir.binop(sub, %17, %18) : !u32i
+    cir.store %19, %2 : !u32i, !cir.ptr<!u32i>
+    %20 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %21 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %22 = cir.shift(right, %20 : !u32i, %21 : !u32i) -> !u32i
+    cir.store %22, %2 : !u32i, !cir.ptr<!u32i>
+    %23 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %24 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %25 = cir.shift(left, %23 : !u32i, %24 : !u32i) -> !u32i
+    cir.store %25, %2 : !u32i, !cir.ptr<!u32i>
+    %26 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %27 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %28 = cir.binop(and, %26, %27) : !u32i
+    cir.store %28, %2 : !u32i, !cir.ptr<!u32i>
+    %29 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %30 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %31 = cir.binop(xor, %29, %30) : !u32i
+    cir.store %31, %2 : !u32i, !cir.ptr<!u32i>
+    %32 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %33 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %34 = cir.binop(or, %32, %33) : !u32i
+    cir.store %34, %2 : !u32i, !cir.ptr<!u32i>
+    cir.return
+  }
+}
+
+// MLIR: = llvm.mul
+// MLIR: = llvm.udiv
+// MLIR: = llvm.urem
+// MLIR: = llvm.add
+// MLIR: = llvm.sub
+// MLIR: = llvm.lshr
+// MLIR: = llvm.shl
+// MLIR: = llvm.and
+// MLIR: = llvm.xor
+// MLIR: = llvm.or
+
+// LLVM: = mul i32
+// LLVM: = udiv i32
+// LLVM: = urem i32
+// LLVM: = add i32
+// LLVM: = sub i32
+// LLVM: = lshr i32
+// LLVM: = shl i32
+// LLVM: = and i32
+// LLVM: = xor i32
+// LLVM: = or i32
diff --git a/clang/test/CIR/Lowering/bit.cir b/clang/test/CIR/Lowering/bit.cir
new file mode 100644
index 000000000000..425248c66821
--- /dev/null
+++ b/clang/test/CIR/Lowering/bit.cir
@@ -0,0 +1,206 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+
+cir.func @clrsb_s32(%arg : !s32i) {
+  %0 = cir.bit.clrsb(%arg : !s32i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @clrsb_s32(%arg0: i32)
+// CHECK-NEXT:   %0 = llvm.mlir.constant(0 : i32) : i32
+// CHECK-NEXT:   %1 = llvm.icmp "slt" %arg0, %0 : i32
+// CHECK-NEXT:   %2 = llvm.mlir.constant(-1 : i32) : i32
+// CHECK-NEXT:   %3 = llvm.xor %arg0, %2  : i32
+// CHECK-NEXT:   %4 = llvm.select %1, %3, %arg0 : i1, i32
+// CHECK-NEXT:   %5 = llvm.mlir.constant(false) : i1
+// CHECK-NEXT:   %6 = llvm.call_intrinsic "llvm.ctlz.i32"(%4, %5) : (i32, i1) -> i32
+// CHECK-NEXT:   %7 = llvm.mlir.constant(1 : i32) : i32
+// CHECK-NEXT:   %8 = llvm.sub %6, %7  : i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @clrsb_s64(%arg : !s64i) {
+  %0 = cir.bit.clrsb(%arg : !s64i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @clrsb_s64(%arg0: i64)
+// CHECK-NEXT:   %0 = llvm.mlir.constant(0 : i64) : i64
+// CHECK-NEXT:   %1 = llvm.icmp "slt" %arg0, %0 : i64
+// CHECK-NEXT:   %2 = llvm.mlir.constant(-1 : i64) : i64
+// CHECK-NEXT:   %3 = llvm.xor %arg0, %2  : i64
+// CHECK-NEXT:   %4 = llvm.select %1, %3, %arg0 : i1, i64
+// CHECK-NEXT:   %5 = llvm.mlir.constant(false) : i1
+// CHECK-NEXT:   %6 = llvm.call_intrinsic "llvm.ctlz.i64"(%4, %5) : (i64, i1) -> i64
+// CHECK-NEXT:   %7 = llvm.trunc %6 : i64 to i32
+// CHECK-NEXT:   %8 = llvm.mlir.constant(1 : i32) : i32
+// CHECK-NEXT:   %9 = llvm.sub %7, %8  : i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @clz_u16(%arg : !u16i) {
+  %0 = cir.bit.clz(%arg : !u16i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @clz_u16(%arg0: i16)
+// CHECK-NEXT:   %0 = llvm.mlir.constant(true) : i1
+// CHECK-NEXT:   %1 = llvm.call_intrinsic "llvm.ctlz.i16"(%arg0, %0) : (i16, i1) -> i16
+// CHECK-NEXT:   %2 = llvm.zext %1 : i16 to i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @clz_u32(%arg : !u32i) {
+  %0 = cir.bit.clz(%arg : !u32i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @clz_u32(%arg0: i32)
+// CHECK-NEXT:   %0 = llvm.mlir.constant(true) : i1
+// CHECK-NEXT:   %1 = llvm.call_intrinsic "llvm.ctlz.i32"(%arg0, %0) : (i32, i1) -> i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @clz_u64(%arg : !u64i) {
+  %0 = cir.bit.clz(%arg : !u64i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @clz_u64(%arg0: i64)
+// CHECK-NEXT:   %0 = llvm.mlir.constant(true) : i1
+// CHECK-NEXT:   %1 = llvm.call_intrinsic "llvm.ctlz.i64"(%arg0, %0) : (i64, i1) -> i64
+// CHECK-NEXT:   %2 = llvm.trunc %1 : i64 to i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @ctz_u16(%arg : !u16i) {
+  %0 = cir.bit.ctz(%arg : !u16i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @ctz_u16(%arg0: i16)
+// CHECK-NEXT:   %0 = llvm.mlir.constant(true) : i1
+// CHECK-NEXT:   %1 = llvm.call_intrinsic "llvm.cttz.i16"(%arg0, %0) : (i16, i1) -> i16
+// CHECK-NEXT:   %2 = llvm.zext %1 : i16 to i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @ctz_u32(%arg : !u32i) {
+  %0 = cir.bit.ctz(%arg : !u32i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @ctz_u32(%arg0: i32)
+// CHECK-NEXT:   %0 = llvm.mlir.constant(true) : i1
+// CHECK-NEXT:   %1 = llvm.call_intrinsic "llvm.cttz.i32"(%arg0, %0) : (i32, i1) -> i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @ctz_u64(%arg : !u64i) {
+  %0 = cir.bit.ctz(%arg : !u64i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @ctz_u64(%arg0: i64)
+// CHECK-NEXT:   %0 = llvm.mlir.constant(true) : i1
+// CHECK-NEXT:   %1 = llvm.call_intrinsic "llvm.cttz.i64"(%arg0, %0) : (i64, i1) -> i64
+// CHECK-NEXT:   %2 = llvm.trunc %1 : i64 to i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @ffs_s32(%arg : !s32i) {
+  %0 = cir.bit.ffs(%arg : !s32i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @ffs_s32(%arg0: i32)
+// CHECK-NEXT:   %0 = llvm.mlir.constant(false) : i1
+// CHECK-NEXT:   %1 = llvm.call_intrinsic "llvm.cttz.i32"(%arg0, %0) : (i32, i1) -> i32
+// CHECK-NEXT:   %2 = llvm.mlir.constant(1 : i32) : i32
+// CHECK-NEXT:   %3 = llvm.add %1, %2  : i32
+// CHECK-NEXT:   %4 = llvm.mlir.constant(0 : i32) : i32
+// CHECK-NEXT:   %5 = llvm.icmp "eq" %arg0, %4 : i32
+// CHECK-NEXT:   %6 = llvm.mlir.constant(0 : i32) : i32
+// CHECK-NEXT:   %7 = llvm.select %5, %6, %3 : i1, i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @ffs_s64(%arg : !s64i) {
+  %0 = cir.bit.ffs(%arg : !s64i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @ffs_s64(%arg0: i64)
+// CHECK-NEXT:   %0 = llvm.mlir.constant(false) : i1
+// CHECK-NEXT:   %1 = llvm.call_intrinsic "llvm.cttz.i64"(%arg0, %0) : (i64, i1) -> i64
+// CHECK-NEXT:   %2 = llvm.trunc %1 : i64 to i32
+// CHECK-NEXT:   %3 = llvm.mlir.constant(1 : i32) : i32
+// CHECK-NEXT:   %4 = llvm.add %2, %3  : i32
+// CHECK-NEXT:   %5 = llvm.mlir.constant(0 : i64) : i64
+// CHECK-NEXT:   %6 = llvm.icmp "eq" %arg0, %5 : i64
+// CHECK-NEXT:   %7 = llvm.mlir.constant(0 : i32) : i32
+// CHECK-NEXT:   %8 = llvm.select %6, %7, %4 : i1, i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @parity_s32(%arg : !u32i) {
+  %0 = cir.bit.parity(%arg : !u32i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @parity_s32(%arg0: i32)
+// CHECK-NEXT:   %0 = llvm.call_intrinsic "llvm.ctpop.i32"(%arg0) : (i32) -> i32
+// CHECK-NEXT:   %1 = llvm.mlir.constant(1 : i32) : i32
+// CHECK-NEXT:   %2 = llvm.and %0, %1  : i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @parity_s64(%arg : !u64i) {
+  %0 = cir.bit.parity(%arg : !u64i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @parity_s64(%arg0: i64)
+// CHECK-NEXT:   %0 = llvm.call_intrinsic "llvm.ctpop.i64"(%arg0) : (i64) -> i64
+// CHECK-NEXT:   %1 = llvm.trunc %0 : i64 to i32
+// CHECK-NEXT:   %2 = llvm.mlir.constant(1 : i32) : i32
+// CHECK-NEXT:   %3 = llvm.and %1, %2  : i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @popcount_u16(%arg : !u16i) {
+  %0 = cir.bit.popcount(%arg : !u16i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @popcount_u16(%arg0: i16)
+// CHECK-NEXT:   %0 = llvm.call_intrinsic "llvm.ctpop.i16"(%arg0) : (i16) -> i16
+// CHECK-NEXT:   %1 = llvm.zext %0 : i16 to i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @popcount_u32(%arg : !u32i) {
+  %0 = cir.bit.popcount(%arg : !u32i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @popcount_u32(%arg0: i32)
+// CHECK-NEXT:   %0 = llvm.call_intrinsic "llvm.ctpop.i32"(%arg0) : (i32) -> i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @popcount_u64(%arg : !u64i) {
+  %0 = cir.bit.popcount(%arg : !u64i) : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @popcount_u64(%arg0: i64)
+// CHECK-NEXT:   %0 = llvm.call_intrinsic "llvm.ctpop.i64"(%arg0) : (i64) -> i64
+// CHECK-NEXT:   %1 = llvm.trunc %0 : i64 to i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Lowering/bitfieils.c b/clang/test/CIR/Lowering/bitfieils.c
new file mode 100644
index 000000000000..cac1285c4e44
--- /dev/null
+++ b/clang/test/CIR/Lowering/bitfieils.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+typedef struct {
+  int a : 4;
+} B;
+
+// LLVM: define dso_local void @set_signed
+// LLVM:   [[TMP0:%.*]] = load ptr
+// LLVM:   [[TMP1:%.*]] = load i8, ptr [[TMP0]]
+// LLVM:   [[TMP2:%.*]] = and i8 [[TMP1]], -16
+// LLVM:   [[TMP3:%.*]] = or i8 [[TMP2]], 14
+// LLVM:   store i8 [[TMP3]], ptr [[TMP0]]
+void set_signed(B* b) {
+  b->a = -2;
+}
+
+// LLVM: define dso_local i32 @get_signed
+// LLVM:   [[TMP0:%.*]] = alloca i32
+// LLVM:   [[TMP1:%.*]] = load ptr
+// LLVM:   [[TMP2:%.*]] = load i8, ptr [[TMP1]]
+// LLVM:   [[TMP3:%.*]] = shl i8 [[TMP2]], 4
+// LLVM:   [[TMP4:%.*]] = ashr i8 [[TMP3]], 4
+// LLVM:   [[TMP5:%.*]] = sext i8 [[TMP4]] to i32
+// LLVM:   store i32 [[TMP5]], ptr [[TMP0]]
+// LLVM:   [[TMP6:%.*]] = load i32, ptr [[TMP0]]
+// LLVM:   ret i32 [[TMP6]]
+int get_signed(B* b) {
+  return b->a;
+}
diff --git a/clang/test/CIR/Lowering/bitint.cir b/clang/test/CIR/Lowering/bitint.cir
new file mode 100644
index 000000000000..b1c9d031b7cc
--- /dev/null
+++ b/clang/test/CIR/Lowering/bitint.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @ParamPassing(%arg0: !cir.int<s, 15>, %arg1: !cir.int<s, 31>) -> !cir.int<s, 2> {
+    %0 = cir.cast(integral, %arg0 : !cir.int<s, 15>), !s32i
+    %1 = cir.cast(integral, %arg1 : !cir.int<s, 31>), !s32i
+    %2 = cir.binop(add, %0, %1) : !s32i
+    %3 = cir.cast(integral, %2 : !s32i), !cir.int<s, 2>
+    cir.return %3 : !cir.int<s, 2>
+  }
+}
+
+//      MLIR: llvm.func @ParamPassing(%arg0: i15, %arg1: i31) -> i2
+// MLIR-NEXT:   %0 = llvm.sext %arg0 : i15 to i32
+// MLIR-NEXT:   %1 = llvm.sext %arg1 : i31 to i32
+// MLIR-NEXT:   %2 = llvm.add %0, %1  : i32
+// MLIR-NEXT:   %3 = llvm.trunc %2 : i32 to i2
+// MLIR-NEXT:   llvm.return %3 : i2
+// MLIR-NEXT: }
+
+//      LLVM: define i2 @ParamPassing(i15 %0, i31 %1) !dbg !3 {
+// LLVM-NEXT:   %3 = sext i15 %0 to i32
+// LLVM-NEXT:   %4 = sext i31 %1 to i32
+// LLVM-NEXT:   %5 = add i32 %3, %4
+// LLVM-NEXT:   %6 = trunc i32 %5 to i2
+// LLVM-NEXT:   ret i2 %6
+// LLVM-NEXT: }
diff --git a/clang/test/CIR/Lowering/bool-to-int.cir b/clang/test/CIR/Lowering/bool-to-int.cir
new file mode 100644
index 000000000000..1b4bb73f80f9
--- /dev/null
+++ b/clang/test/CIR/Lowering/bool-to-int.cir
@@ -0,0 +1,21 @@
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+
+module {
+  cir.func @foo(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %1 = cir.const #true
+    %2 = cir.cast(bool_to_int, %1 : !cir.bool), !s32i
+    cir.return %2 : !s32i
+  }
+  cir.func @bar(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %1 = cir.const #false
+    %2 = cir.cast(bool_to_int, %1 : !cir.bool), !s32i
+    cir.return %2 : !s32i
+  }
+}
+
+// CHECK: ret i32 1
+// CHECK: ret i32 0
diff --git a/clang/test/CIR/Lowering/bool.cir b/clang/test/CIR/Lowering/bool.cir
new file mode 100644
index 000000000000..9b424355aa18
--- /dev/null
+++ b/clang/test/CIR/Lowering/bool.cir
@@ -0,0 +1,29 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+
+module {
+  cir.global external @g_bl = #false
+// MLIR: llvm.mlir.global external @g_bl(false) {addr_space = 0 : i32} : i8
+// LLVM: @g_bl = global i8 0
+
+  cir.func @foo() {
+    %1 = cir.const #true
+    %0 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["a", init] {alignment = 1 : i64}
+    cir.store %1, %0 : !cir.bool, !cir.ptr<!cir.bool>
+    cir.return
+  }
+//      MLIR: llvm.func @foo()
+//  MLIR-DAG: = llvm.mlir.constant(1 : i8) : i8
+//  MLIR-DAG: [[Value:%[a-z0-9]+]] = llvm.mlir.constant(1 : index) : i64
+//  MLIR-DAG: = llvm.alloca [[Value]] x i8 {alignment = 1 : i64} : (i64) -> !llvm.ptr
+//  MLIR-DAG: llvm.store %0, %2 {{.*}} : i8, !llvm.ptr
+// MLIR-NEXT: llvm.return
+
+//      LLVM: define void @foo()
+// LLVM-NEXT:   %1 = alloca i8, i64 1, align 1
+// LLVM-NEXT:   store i8 1, ptr %1, align 1
+// LLVM-NEXT:   ret void
+}
diff --git a/clang/test/CIR/Lowering/branch.cir b/clang/test/CIR/Lowering/branch.cir
new file mode 100644
index 000000000000..bbfb61e582a0
--- /dev/null
+++ b/clang/test/CIR/Lowering/branch.cir
@@ -0,0 +1,37 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+cir.func @foo(%arg0: !cir.bool) -> !s32i {
+  cir.brcond %arg0 ^bb1, ^bb2
+  ^bb1:
+    %0 = cir.const #cir.int<1>: !s32i
+    cir.return %0 : !s32i
+  ^bb2:
+    %1 = cir.const #cir.int<0>: !s32i
+    cir.return %1 : !s32i
+}
+
+//      MLIR: module {
+// MLIR-NEXT:   llvm.func @foo(%arg0: i8) -> i32
+// MLIR-NEXT:     %0 = llvm.trunc %arg0 : i8 to i1
+// MLIR-NEXT:     llvm.cond_br %0, ^bb1, ^bb2
+// MLIR-NEXT:   ^bb1:  // pred: ^bb0
+// MLIR-NEXT:     %1 = llvm.mlir.constant(1 : i32) : i32
+// MLIR-NEXT:     llvm.return %1 : i32
+// MLIR-NEXT:   ^bb2:  // pred: ^bb0
+// MLIR-NEXT:     %2 = llvm.mlir.constant(0 : i32) : i32
+// MLIR-NEXT:     llvm.return %2 : i32
+// MLIR-NEXT:   }
+// MLIR-NEXT: }
+
+//       LLVM: define i32 @foo(i8 %0)
+//  LLVM-NEXT:   %2 = trunc i8 %0 to i1
+//  LLVM-NEXT:   br i1 %2, label %3, label %4
+// LLVM-EMPTY:
+//  LLVM-NEXT: 3:                                                ; preds = %1
+//  LLVM-NEXT:   ret i32 1
+// LLVM-EMPTY:
+//  LLVM-NEXT: 4:                                                ; preds = %1
+//  LLVM-NEXT:   ret i32 0
+//  LLVM-NEXT: }
diff --git a/clang/test/CIR/Lowering/brcond.cir b/clang/test/CIR/Lowering/brcond.cir
new file mode 100644
index 000000000000..9586f70cf727
--- /dev/null
+++ b/clang/test/CIR/Lowering/brcond.cir
@@ -0,0 +1,43 @@
+// RUN: cir-opt %s -cir-to-llvm | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+#fn_attr = #cir<extra({inline = #cir.inline<no>, nothrow = #cir.nothrow, optnone = #cir.optnone})>
+module { cir.func no_proto  @test() -> !cir.bool extra(#fn_attr) {
+    %0 = cir.const #cir.int<0> : !s32i 
+    %1 = cir.cast(int_to_bool, %0 : !s32i), !cir.bool 
+    cir.br ^bb1 
+  ^bb1:
+    cir.brcond %1 ^bb2, ^bb3 
+  ^bb2:
+    cir.return %1 : !cir.bool 
+  ^bb3:
+    cir.br ^bb4 
+  ^bb4:
+    cir.return %1 : !cir.bool 
+  } 
+}
+
+// MLIR:         {{.*}} = llvm.mlir.constant(0 : i32) : i32
+// MLIR-NEXT:    {{.*}} = llvm.mlir.constant(0 : i32) : i32
+// MLIR-NEXT:    {{.*}} = llvm.icmp "ne" {{.*}}, {{.*}} : i32
+// MLIR-NEXT:    {{.*}} = llvm.zext {{.*}} : i1 to i8
+// MLIR-NEXT:    llvm.br ^bb1
+// MLIR-NEXT:  ^bb1:
+// MLIR-NEXT:    llvm.cond_br {{.*}}, ^bb2, ^bb3
+// MLIR-NEXT:  ^bb2:
+// MLIR-NEXT:    llvm.return {{.*}} : i8
+// MLIR-NEXT:  ^bb3:
+// MLIR-NEXT:    llvm.br ^bb4
+// MLIR-NEXT:  ^bb4:
+// MLIR-NEXT:    llvm.return {{.*}} : i8
+
+// LLVM: br label {{.*}}
+// LLVM: 1:
+// LLVM: br i1 false, label {{.*}}, label {{.*}}
+// LLVM: 2:
+// LLVM:  ret i8 0
+// LLVM: 3:
+// LLVM:  br label {{.*}}
+// LLVM: 4:
+// LLVM:  ret i8 0
diff --git a/clang/test/CIR/Lowering/bswap.cir b/clang/test/CIR/Lowering/bswap.cir
new file mode 100644
index 000000000000..7733b4de1dae
--- /dev/null
+++ b/clang/test/CIR/Lowering/bswap.cir
@@ -0,0 +1,19 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+
+cir.func @test(%arg0: !u32i) -> !u32i {
+  %0 = cir.bswap(%arg0 : !u32i) : !u32i
+  cir.return %0 : !u32i
+}
+
+//      MLIR: llvm.func @test(%arg0: i32) -> i32
+// MLIR-NEXT:   %0 = llvm.intr.bswap(%arg0) : (i32) -> i32
+// MLIR-NEXT:   llvm.return %0 : i32
+// MLIR-NEXT: }
+
+//      LLVM: define i32 @test(i32 %0)
+// LLVM-NEXT:   %2 = call i32 @llvm.bswap.i32(i32 %0)
+// LLVM-NEXT:   ret i32 %2
+// LLVM-NEXT: }
diff --git a/clang/test/CIR/Lowering/builtin-binary-fp2fp.c b/clang/test/CIR/Lowering/builtin-binary-fp2fp.c
new file mode 100644
index 000000000000..2877aa5cef30
--- /dev/null
+++ b/clang/test/CIR/Lowering/builtin-binary-fp2fp.c
@@ -0,0 +1,194 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fmath-errno -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -ffast-math -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM-FASTMATH
+
+// copysign
+
+float my_copysignf(float x, float y) {
+  return __builtin_copysignf(x, y);
+}
+
+// LLVM: define dso_local float @my_copysignf
+// LLVM:   %{{.+}} = call float @llvm.copysign.f32(float %{{.+}}, float %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local float @my_copysignf
+// LLVM-FASTMATH:   %{{.+}} = call float @llvm.copysign.f32(float %{{.+}}, float %{{.+}})
+// LLVM-FASTMATH: }
+
+double my_copysign(double x, double y) {
+  return __builtin_copysign(x, y);
+}
+
+// LLVM: define dso_local double @my_copysign
+// LLVM:   %{{.+}} = call double @llvm.copysign.f64(double %{{.+}}, double %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local double @my_copysign
+// LLVM-FASTMATH:   %{{.+}} = call double @llvm.copysign.f64(double %{{.+}}, double %{{.+}})
+// LLVM-FASTMATH: }
+
+long double my_copysignl(long double x, long double y) {
+  return __builtin_copysignl(x, y);
+}
+
+// LLVM: define dso_local x86_fp80 @my_copysignl
+// LLVM:   %{{.+}} = call x86_fp80 @llvm.copysign.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local x86_fp80 @my_copysignl
+// LLVM-FASTMATH:   %{{.+}} = call x86_fp80 @llvm.copysign.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM-FASTMATH: }
+
+// fmax
+
+float my_fmaxf(float x, float y) {
+  return __builtin_fmaxf(x, y);
+}
+
+// LLVM: define dso_local float @my_fmaxf
+// LLVM:   %{{.+}} = call float @llvm.maxnum.f32(float %{{.+}}, float %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local float @my_fmaxf
+// LLVM-FASTMATH:   %{{.+}} = call float @llvm.maxnum.f32(float %{{.+}}, float %{{.+}})
+// LLVM-FASTMATH: }
+
+double my_fmax(double x, double y) {
+  return __builtin_fmax(x, y);
+}
+
+// LLVM: define dso_local double @my_fmax
+// LLVM:   %{{.+}} = call double @llvm.maxnum.f64(double %{{.+}}, double %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local double @my_fmax
+// LLVM-FASTMATH:   %{{.+}} = call double @llvm.maxnum.f64(double %{{.+}}, double %{{.+}})
+// LLVM-FASTMATH: }
+
+long double my_fmaxl(long double x, long double y) {
+  return __builtin_fmaxl(x, y);
+}
+
+// LLVM: define dso_local x86_fp80 @my_fmaxl
+// LLVM:   %{{.+}} = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local x86_fp80 @my_fmaxl
+// LLVM-FASTMATH:   %{{.+}} = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM-FASTMATH: }
+
+// fmin
+
+float my_fminf(float x, float y) {
+  return __builtin_fminf(x, y);
+}
+
+// LLVM: define dso_local float @my_fminf
+// LLVM:   %{{.+}} = call float @llvm.minnum.f32(float %{{.+}}, float %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local float @my_fminf
+// LLVM-FASTMATH:   %{{.+}} = call float @llvm.minnum.f32(float %{{.+}}, float %{{.+}})
+// LLVM-FASTMATH: }
+
+double my_fmin(double x, double y) {
+  return __builtin_fmin(x, y);
+}
+
+// LLVM: define dso_local double @my_fmin
+// LLVM:   %{{.+}} = call double @llvm.minnum.f64(double %{{.+}}, double %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local double @my_fmin
+// LLVM-FASTMATH:   %{{.+}} = call double @llvm.minnum.f64(double %{{.+}}, double %{{.+}})
+// LLVM-FASTMATH: }
+
+long double my_fminl(long double x, long double y) {
+  return __builtin_fminl(x, y);
+}
+
+// LLVM: define dso_local x86_fp80 @my_fminl
+// LLVM:   %{{.+}} = call x86_fp80 @llvm.minnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local x86_fp80 @my_fminl
+// LLVM-FASTMATH:   %{{.+}} = call x86_fp80 @llvm.minnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM-FASTMATH: }
+
+// fmod
+
+float my_fmodf(float x, float y) {
+  return __builtin_fmodf(x, y);
+}
+
+// LLVM: define dso_local float @my_fmodf
+// LLVM:   %{{.+}} = call float @fmodf(float %{{.+}}, float %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local float @my_fmodf
+// LLVM-FASTMATH:   %{{.+}} = frem float %{{.+}}, %{{.+}}
+// LLVM-FASTMATH: }
+
+double my_fmod(double x, double y) {
+  return __builtin_fmod(x, y);
+}
+
+// LLVM: define dso_local double @my_fmod
+// LLVM:   %{{.+}} = call double @fmod(double %{{.+}}, double %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local double @my_fmod
+// LLVM-FASTMATH:   %{{.+}} = frem double %{{.+}}, %{{.+}}
+// LLVM-FASTMATH: }
+
+long double my_fmodl(long double x, long double y) {
+  return __builtin_fmodl(x, y);
+}
+
+// LLVM: define dso_local x86_fp80 @my_fmodl
+// LLVM:   %{{.+}} = call x86_fp80 @fmodl(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local x86_fp80 @my_fmodl
+// LLVM-FASTMATH:   %{{.+}} = frem x86_fp80 %{{.+}}, %{{.+}}
+// LLVM-FASTMATH: }
+
+// pow
+
+float my_powf(float x, float y) {
+  return __builtin_powf(x, y);
+}
+
+// LLVM: define dso_local float @my_powf
+// LLVM:   %{{.+}} = call float @powf(float %{{.+}}, float %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local float @my_powf
+// LLVM-FASTMATH:   %{{.+}} = call float @llvm.pow.f32(float %{{.+}}, float %{{.+}})
+// LLVM-FASTMATH: }
+
+double my_pow(double x, double y) {
+  return __builtin_pow(x, y);
+}
+
+// LLVM: define dso_local double @my_pow
+// LLVM:   %{{.+}} = call double @pow(double %{{.+}}, double %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local double @my_pow
+// LLVM-FASTMATH:   %{{.+}} = call double @llvm.pow.f64(double %{{.+}}, double %{{.+}})
+// LLVM-FASTMATH: }
+
+long double my_powl(long double x, long double y) {
+  return __builtin_powl(x, y);
+}
+
+// LLVM: define dso_local x86_fp80 @my_powl
+// LLVM:   %{{.+}} = call x86_fp80 @powl(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local x86_fp80 @my_powl
+// LLVM-FASTMATH:   %{{.+}} = call x86_fp80 @llvm.pow.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM-FASTMATH: }
diff --git a/clang/test/CIR/Lowering/builtin-floating-point.cir b/clang/test/CIR/Lowering/builtin-floating-point.cir
new file mode 100644
index 000000000000..82b733233da3
--- /dev/null
+++ b/clang/test/CIR/Lowering/builtin-floating-point.cir
@@ -0,0 +1,50 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s
+
+module {
+  cir.func @test(%arg0 : !cir.float) {
+    %1 = cir.cos %arg0 : !cir.float
+    // CHECK: llvm.intr.cos(%arg0) : (f32) -> f32
+
+    %2 = cir.ceil %arg0 : !cir.float
+    // CHECK: llvm.intr.ceil(%arg0) : (f32) -> f32
+
+    %3 = cir.exp %arg0 : !cir.float
+    // CHECK: llvm.intr.exp(%arg0) : (f32) -> f32
+
+    %4 = cir.exp2 %arg0 : !cir.float
+    // CHECK: llvm.intr.exp2(%arg0) : (f32) -> f32
+
+    %5 = cir.fabs %arg0 : !cir.float
+    // CHECK: llvm.intr.fabs(%arg0) : (f32) -> f32
+
+    %6 = cir.floor %arg0 : !cir.float
+    // CHECK: llvm.intr.floor(%arg0) : (f32) -> f32
+
+    %7 = cir.log %arg0 : !cir.float
+    // CHECK: llvm.intr.log(%arg0) : (f32) -> f32
+
+    %8 = cir.log10 %arg0 : !cir.float
+    // CHECK: llvm.intr.log10(%arg0) : (f32) -> f32
+
+    %9 = cir.log2 %arg0 : !cir.float
+    // CHECK: llvm.intr.log2(%arg0) : (f32) -> f32
+
+    %10 = cir.nearbyint %arg0 : !cir.float
+    // CHECK: llvm.intr.nearbyint(%arg0) : (f32) -> f32
+
+    %11 = cir.rint %arg0 : !cir.float
+    // CHECK: llvm.intr.rint(%arg0) : (f32) -> f32
+
+    %12 = cir.round %arg0 : !cir.float
+    // CHECK: llvm.intr.round(%arg0) : (f32) -> f32
+
+    %13 = cir.sin %arg0 : !cir.float
+    // CHECK: llvm.intr.sin(%arg0) : (f32) -> f32
+
+    %14 = cir.sqrt %arg0 : !cir.float
+    // CHECK: llvm.intr.sqrt(%arg0) : (f32) -> f32
+
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/call-op-call-conv.cir b/clang/test/CIR/Lowering/call-op-call-conv.cir
new file mode 100644
index 000000000000..837cc4b82ab9
--- /dev/null
+++ b/clang/test/CIR/Lowering/call-op-call-conv.cir
@@ -0,0 +1,22 @@
+// RUN: cir-translate -cir-to-llvmir %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+!fnptr = !cir.ptr<!cir.func<!s32i (!s32i)>>
+
+module {
+  cir.func private @my_add(%a: !s32i, %b: !s32i) -> !s32i cc(spir_function)
+
+  cir.func @ind(%fnptr: !fnptr, %a : !s32i) {
+    %1 = cir.call %fnptr(%a) : (!fnptr, !s32i) -> !s32i cc(spir_kernel)
+    // LLVM: %{{[0-9]+}} = call spir_kernel i32 %{{[0-9]+}}(i32 %{{[0-9]+}})
+
+    %2 = cir.call %fnptr(%a) : (!fnptr, !s32i) -> !s32i cc(spir_function)
+    // LLVM: %{{[0-9]+}} = call spir_func i32 %{{[0-9]+}}(i32 %{{[0-9]+}})
+
+    %3 = cir.call @my_add(%1, %2) : (!s32i, !s32i) -> !s32i cc(spir_function)
+    // LLVM: %{{[0-9]+}} = call spir_func i32 @my_add(i32 %{{[0-9]+}}, i32 %{{[0-9]+}})
+
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/call.cir b/clang/test/CIR/Lowering/call.cir
new file mode 100644
index 000000000000..2c40bb88e523
--- /dev/null
+++ b/clang/test/CIR/Lowering/call.cir
@@ -0,0 +1,39 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+
+module {
+  cir.func @a() {
+    cir.return
+  }
+  cir.func @d() {
+    cir.call @a() : () -> ()
+    cir.return
+  }
+
+//      MLIR: llvm.func @a()
+// MLIR-NEXT:   llvm.return
+// MLIR-NEXT: }
+// MLIR-NEXT: llvm.func @d()
+// MLIR-NEXT:   llvm.call @a() : () -> ()
+// MLIR-NEXT:   llvm.return
+// MLIR-NEXT: }
+
+//      LLVM: define void @a()
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
+//      LLVM: define void @d()
+// LLVM-NEXT:   call void @a()
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
+
+  // check operands and results type lowering
+  cir.func @callee(!cir.ptr<i32>) -> !cir.ptr<i32> attributes {sym_visibility = "private"}
+  // MLIR: llvm.func @callee(!llvm.ptr) -> !llvm.ptr
+  cir.func @caller(%arg0: !cir.ptr<i32>) -> !cir.ptr<i32> {
+  // MLIR: llvm.func @caller(%arg0: !llvm.ptr) -> !llvm.ptr
+    %0 = cir.call @callee(%arg0) : (!cir.ptr<i32>) -> !cir.ptr<i32>
+    // MLIR: %{{[0-9]+}} = llvm.call @callee(%arg0) : (!llvm.ptr) -> !llvm.ptr
+    cir.return %0 : !cir.ptr<i32>
+  }
+
+} // end module
diff --git a/clang/test/CIR/Lowering/cast.cir b/clang/test/CIR/Lowering/cast.cir
new file mode 100644
index 000000000000..e100e0c2f07e
--- /dev/null
+++ b/clang/test/CIR/Lowering/cast.cir
@@ -0,0 +1,99 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir
+
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+!u8i = !cir.int<u, 8>
+!u64i = !cir.int<u, 64>
+
+module {
+  cir.func @cStyleCasts(%arg0: !u32i, %arg1: !s32i, %arg2: !cir.float, %arg3: !cir.double) -> !s32i {
+  // CHECK: llvm.func @cStyleCasts
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x1", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x2", init] {alignment = 4 : i64}
+    %20 = cir.alloca !s16i, !cir.ptr<!s16i>, ["x4", init] {alignment = 2 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %3 = cir.alloca !s8i, !cir.ptr<!s8i>, ["a", init] {alignment = 1 : i64}
+    %4 = cir.alloca !s16i, !cir.ptr<!s16i>, ["b", init] {alignment = 2 : i64}
+    %5 = cir.alloca !s64i, !cir.ptr<!s64i>, ["c", init] {alignment = 8 : i64}
+    %6 = cir.alloca !s64i, !cir.ptr<!s64i>, ["d", init] {alignment = 8 : i64}
+    %7 = cir.alloca !cir.array<!s32i x 3>, !cir.ptr<!cir.array<!s32i x 3>>, ["arr"] {alignment = 4 : i64}
+    %8 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["e", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !u32i, !cir.ptr<!u32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+
+    // Integer casts.
+    %9 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %10 = cir.cast(integral, %9 : !u32i), !s8i
+    // CHECK: %{{[0-9]+}} = llvm.trunc %{{[0-9]+}} : i32 to i8
+    cir.store %10, %3 : !s8i, !cir.ptr<!s8i>
+    %11 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %12 = cir.cast(integral, %11 : !s32i), !s16i
+    // CHECK: %{{[0-9]+}} = llvm.trunc %{{[0-9]+}} : i32 to i16
+    cir.store %12, %4 : !s16i, !cir.ptr<!s16i>
+    %13 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %14 = cir.cast(integral, %13 : !u32i), !s64i
+    // CHECK: %{{[0-9]+}} = llvm.zext %{{[0-9]+}} : i32 to i64
+    cir.store %14, %5 : !s64i, !cir.ptr<!s64i>
+    %15 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %16 = cir.cast(integral, %15 : !s32i), !s64i
+    // CHECK: %{{[0-9]+}} = llvm.sext %{{[0-9]+}} : i32 to i64
+    %30 = cir.cast(integral, %arg1 : !s32i), !u32i
+    // Should not produce a cast.
+    %32 = cir.cast(integral, %arg0 : !u32i), !s32i
+    // Should not produce a cast.
+    %21 = cir.load %20 : !cir.ptr<!s16i>, !s16i
+    %22 = cir.cast(integral, %21 : !s16i), !u64i
+    // CHECK: %[[TMP:[0-9]+]] = llvm.sext %{{[0-9]+}} : i16 to i64
+    %33 = cir.cast(int_to_bool, %arg1 : !s32i), !cir.bool
+    // CHECK: %[[#ZERO:]] = llvm.mlir.constant(0 : i32) : i32
+    // CHECK: %[[#CMP:]] = llvm.icmp "ne" %arg1, %[[#ZERO]] : i32
+    // CHECK: %{{.+}} = llvm.zext %[[#CMP]] : i1 to i8
+
+    // Pointer casts.
+    cir.store %16, %6 : !s64i, !cir.ptr<!s64i>
+    %17 = cir.cast(array_to_ptrdecay, %7 : !cir.ptr<!cir.array<!s32i x 3>>), !cir.ptr<!s32i>
+    cir.store %17, %8 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    // CHECK: %{{[0-9]+}} = llvm.getelementptr %{{[0-9]+}}[0] : (!llvm.ptr) -> !llvm.ptr, i32
+    %23 = cir.cast(int_to_ptr, %22 : !u64i), !cir.ptr<!u8i>
+    // CHECK: %[[TMP2:[0-9]+]] = llvm.inttoptr %[[TMP]] : i64 to !llvm.ptr
+    %24 = cir.cast(ptr_to_int, %23 : !cir.ptr<!u8i>), !s32i
+    // CHECK: %{{[0-9]+}} = llvm.ptrtoint %[[TMP2]] : !llvm.ptr to i32
+    %29 = cir.cast(ptr_to_bool, %23 : !cir.ptr<!u8i>), !cir.bool
+
+    // Floating point casts.
+    %25 = cir.cast(int_to_float, %arg1 : !s32i), !cir.float
+    // CHECK: %{{.+}} = llvm.sitofp %{{.+}} : i32 to f32
+    %26 = cir.cast(int_to_float, %arg0 : !u32i), !cir.float
+    // CHECK: %{{.+}} = llvm.uitofp %{{.+}} : i32 to f32
+    %27 = cir.cast(float_to_int, %arg2 : !cir.float), !s32i
+    // CHECK: %{{.+}} = llvm.fptosi %{{.+}} : f32 to i32
+    %28 = cir.cast(float_to_int, %arg2 : !cir.float), !u32i
+    // CHECK: %{{.+}} = llvm.fptoui %{{.+}} : f32 to i32
+    %18 = cir.const #cir.int<0> : !s32i
+    // CHECK: %{{.+}} = llvm.fptrunc %{{.+}} : f64 to f32
+    %34 = cir.cast(floating, %arg3 : !cir.double), !cir.float
+
+    cir.store %18, %2 : !s32i, !cir.ptr<!s32i>
+    %19 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    cir.return %19 : !s32i
+  }
+
+  cir.func @testBoolToIntCast(%arg0: !cir.bool)  {
+  // CHECK: llvm.func @testBoolToIntCast
+    %0 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["bl", init] {alignment = 1 : i64}
+    %1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["y", init] {alignment = 1 : i64}
+    cir.store %arg0, %0 : !cir.bool, !cir.ptr<!cir.bool>
+
+    %2 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
+    %3 = cir.cast(bool_to_int, %2 : !cir.bool), !u8i
+    // CHECK: %[[LOAD_BOOL:.*]] = llvm.load %{{.*}} : !llvm.ptr -> i8
+    // CHECK: %{{.*}} = llvm.bitcast %[[LOAD_BOOL]] : i8 to i8
+
+    cir.store %3, %1 : !u8i, !cir.ptr<!u8i>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/class.cir b/clang/test/CIR/Lowering/class.cir
new file mode 100644
index 000000000000..dd028f4c3b7d
--- /dev/null
+++ b/clang/test/CIR/Lowering/class.cir
@@ -0,0 +1,96 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+!u8i = !cir.int<u, 8>
+!u32i = !cir.int<u, 32>
+!ty_S = !cir.struct<class "S" {!u8i, !s32i}>
+!ty_S2A = !cir.struct<class "S2A" {!s32i} #cir.record.decl.ast>
+!ty_S1_ = !cir.struct<class "S1" {!s32i, !cir.float, !cir.ptr<!s32i>} #cir.record.decl.ast>
+!ty_S2_ = !cir.struct<class "S2" {!ty_S2A} #cir.record.decl.ast>
+!ty_S3_ = !cir.struct<class "S3" {!s32i} #cir.record.decl.ast>
+
+module {
+  cir.func @test() {
+    %1 = cir.alloca !ty_S, !cir.ptr<!ty_S>, ["x"] {alignment = 4 : i64}
+    // CHECK: %[[#ARRSIZE:]] = llvm.mlir.constant(1 : index) : i64
+    // CHECK: %[[#CLASS:]] = llvm.alloca %[[#ARRSIZE]] x !llvm.struct<"class.S", (i8, i32)>
+    %3 = cir.get_member %1[0] {name = "c"} : !cir.ptr<!ty_S> -> !cir.ptr<!u8i>
+    // CHECK: = llvm.getelementptr %[[#CLASS]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"class.S", (i8, i32)>
+    %5 = cir.get_member %1[1] {name = "i"} : !cir.ptr<!ty_S> -> !cir.ptr<!s32i>
+    // CHECK: = llvm.getelementptr %[[#CLASS]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"class.S", (i8, i32)>
+    cir.return
+  }
+
+  cir.func @shouldConstInitLocalClassesWithConstStructAttr() {
+    %0 = cir.alloca !ty_S2A, !cir.ptr<!ty_S2A>, ["s"] {alignment = 4 : i64}
+    %1 = cir.const #cir.const_struct<{#cir.int<1> : !s32i}> : !ty_S2A
+    cir.store %1, %0 : !ty_S2A, !cir.ptr<!ty_S2A>
+    cir.return
+  }
+  // CHECK: llvm.func @shouldConstInitLocalClassesWithConstStructAttr()
+  // CHECK:   %0 = llvm.mlir.constant(1 : index) : i64
+  // CHECK:   %1 = llvm.alloca %0 x !llvm.struct<"class.S2A", (i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+  // CHECK:   %2 = llvm.mlir.undef : !llvm.struct<"class.S2A", (i32)>
+  // CHECK:   %3 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %4 = llvm.insertvalue %3, %2[0] : !llvm.struct<"class.S2A", (i32)>
+  // CHECK:   llvm.store %4, %1 {{.*}}: !llvm.struct<"class.S2A", (i32)>, !llvm.ptr
+  // CHECK:   llvm.return
+  // CHECK: }
+
+  // Should lower basic #cir.const_struct initializer.
+  cir.global external @s1 = #cir.const_struct<{#cir.int<1> : !s32i, #cir.fp<1.000000e-01> : !cir.float, #cir.ptr<null> : !cir.ptr<!s32i>}> : !ty_S1_
+  // CHECK: llvm.mlir.global external @s1() {addr_space = 0 : i32} : !llvm.struct<"class.S1", (i32, f32, ptr)> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.struct<"class.S1", (i32, f32, ptr)>
+  // CHECK:   %1 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %2 = llvm.insertvalue %1, %0[0] : !llvm.struct<"class.S1", (i32, f32, ptr)>
+  // CHECK:   %3 = llvm.mlir.constant(0.099999994 : f32) : f32
+  // CHECK:   %4 = llvm.insertvalue %3, %2[1] : !llvm.struct<"class.S1", (i32, f32, ptr)>
+  // CHECK:   %5 = llvm.mlir.zero : !llvm.ptr
+  // CHECK:   %6 = llvm.insertvalue %5, %4[2] : !llvm.struct<"class.S1", (i32, f32, ptr)>
+  // CHECK:   llvm.return %6 : !llvm.struct<"class.S1", (i32, f32, ptr)>
+  // CHECK: }
+
+  // Should lower nested #cir.const_struct initializer.
+  cir.global external @s2 = #cir.const_struct<{#cir.const_struct<{#cir.int<1> : !s32i}> : !ty_S2A}> : !ty_S2_
+  // CHECK: llvm.mlir.global external @s2() {addr_space = 0 : i32} : !llvm.struct<"class.S2", (struct<"class.S2A", (i32)>)> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.struct<"class.S2", (struct<"class.S2A", (i32)>)>
+  // CHECK:   %1 = llvm.mlir.undef : !llvm.struct<"class.S2A", (i32)>
+  // CHECK:   %2 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %3 = llvm.insertvalue %2, %1[0] : !llvm.struct<"class.S2A", (i32)>
+  // CHECK:   %4 = llvm.insertvalue %3, %0[0] : !llvm.struct<"class.S2", (struct<"class.S2A", (i32)>)>
+  // CHECK:   llvm.return %4 : !llvm.struct<"class.S2", (struct<"class.S2A", (i32)>)>
+  // CHECK: }
+
+  cir.global external @s3 = #cir.const_array<[#cir.const_struct<{#cir.int<1> : !s32i}> : !ty_S3_, #cir.const_struct<{#cir.int<2> : !s32i}> : !ty_S3_, #cir.const_struct<{#cir.int<3> : !s32i}> : !ty_S3_]> : !cir.array<!ty_S3_ x 3>
+  // CHECK: llvm.mlir.global external @s3() {addr_space = 0 : i32} : !llvm.array<3 x struct<"class.S3", (i32)>> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.array<3 x struct<"class.S3", (i32)>>
+  // CHECK:   %1 = llvm.mlir.undef : !llvm.struct<"class.S3", (i32)>
+  // CHECK:   %2 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %3 = llvm.insertvalue %2, %1[0] : !llvm.struct<"class.S3", (i32)>
+  // CHECK:   %4 = llvm.insertvalue %3, %0[0] : !llvm.array<3 x struct<"class.S3", (i32)>>
+  // CHECK:   %5 = llvm.mlir.undef : !llvm.struct<"class.S3", (i32)>
+  // CHECK:   %6 = llvm.mlir.constant(2 : i32) : i32
+  // CHECK:   %7 = llvm.insertvalue %6, %5[0] : !llvm.struct<"class.S3", (i32)>
+  // CHECK:   %8 = llvm.insertvalue %7, %4[1] : !llvm.array<3 x struct<"class.S3", (i32)>>
+  // CHECK:   %9 = llvm.mlir.undef : !llvm.struct<"class.S3", (i32)>
+  // CHECK:   %10 = llvm.mlir.constant(3 : i32) : i32
+  // CHECK:   %11 = llvm.insertvalue %10, %9[0] : !llvm.struct<"class.S3", (i32)>
+  // CHECK:   %12 = llvm.insertvalue %11, %8[2] : !llvm.array<3 x struct<"class.S3", (i32)>>
+  // CHECK:   llvm.return %12 : !llvm.array<3 x struct<"class.S3", (i32)>>
+  // CHECK: }
+
+  cir.func @shouldLowerClassCopies() {
+  // CHECK: llvm.func @shouldLowerClassCopies()
+    %1 = cir.alloca !ty_S, !cir.ptr<!ty_S>, ["a"] {alignment = 4 : i64}
+    // CHECK: %[[#ONE:]] = llvm.mlir.constant(1 : index) : i64
+    // CHECK: %[[#SA:]] = llvm.alloca %[[#ONE]] x !llvm.struct<"class.S", (i8, i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+    %2 = cir.alloca !ty_S, !cir.ptr<!ty_S>, ["b", init] {alignment = 4 : i64}
+    // CHECK: %[[#ONE:]] = llvm.mlir.constant(1 : index) : i64
+    // CHECK: %[[#SB:]] = llvm.alloca %[[#ONE]] x !llvm.struct<"class.S", (i8, i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+    cir.copy %1 to %2 : !cir.ptr<!ty_S>
+    // CHECK: %[[#SIZE:]] = llvm.mlir.constant(8 : i32) : i32
+    // CHECK: "llvm.intr.memcpy"(%[[#SB]], %[[#SA]], %[[#SIZE]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/cmp.cir b/clang/test/CIR/Lowering/cmp.cir
new file mode 100644
index 000000000000..a905e8490b20
--- /dev/null
+++ b/clang/test/CIR/Lowering/cmp.cir
@@ -0,0 +1,68 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b"] {alignment = 4 : i64}
+    %2 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["c"] {alignment = 4 : i64}
+    %3 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["d"] {alignment = 4 : i64}
+    %4 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["e"] {alignment = 1 : i64}
+    %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %7 = cir.cmp(gt, %5, %6) : !s32i, !cir.bool
+    // CHECK: llvm.icmp "sgt"
+    %8 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %9 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %10 = cir.cmp(eq, %8, %9) : !s32i, !cir.bool
+    // CHECK: llvm.icmp "eq"
+    %11 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %12 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %13 = cir.cmp(lt, %11, %12) : !s32i, !cir.bool
+    // CHECK: llvm.icmp "slt"
+    %14 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %15 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %16 = cir.cmp(ge, %14, %15) : !s32i, !cir.bool
+    // CHECK: llvm.icmp "sge"
+    %17 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %18 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %19 = cir.cmp(ne, %17, %18) : !s32i, !cir.bool
+    // CHECK: llvm.icmp "ne"
+    %20 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %21 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %22 = cir.cmp(le, %20, %21) : !s32i, !cir.bool
+    // CHECK: llvm.icmp "sle"
+    %23 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %24 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %25 = cir.cmp(gt, %23, %24) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "ogt"
+    %26 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %27 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %28 = cir.cmp(eq, %26, %27) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "oeq"
+    %29 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %30 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %31 = cir.cmp(lt, %29, %30) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "olt"
+    %32 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %33 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %34 = cir.cmp(ge, %32, %33) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "oge"
+    %35 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %36 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %37 = cir.cmp(ne, %35, %36) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "une"
+    %38 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %39 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %40 = cir.cmp(le, %38, %39) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "ole"
+
+    // Pointer comparisons.
+    %41 = cir.cmp(ne, %0, %1) : !cir.ptr<!s32i>, !cir.bool
+    // CHECK: llvm.icmp "ne"
+    %42 = cir.cmp(lt, %0, %1) : !cir.ptr<!s32i>, !cir.bool
+    // CHECK: llvm.icmp "ult"
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/cmp3way.cir b/clang/test/CIR/Lowering/cmp3way.cir
new file mode 100644
index 000000000000..6e00a9440f59
--- /dev/null
+++ b/clang/test/CIR/Lowering/cmp3way.cir
@@ -0,0 +1,40 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+
+!s8i = !cir.int<s, 8>
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+#cmp3way_info = #cir.cmp3way_info<strong, lt = -1, eq = 0, gt = 1>
+
+module {
+  cir.func @test_scmp(%arg0 : !s32i, %arg1 : !s32i) -> !s8i {
+    %0 = cir.cmp3way(%arg0 : !s32i, %arg1, #cmp3way_info) : !s8i
+    cir.return %0 : !s8i
+  }
+
+  //      MLIR: llvm.func @test_scmp(%arg0: i32, %arg1: i32) -> i8
+  // MLIR-NEXT:   %0 = llvm.call_intrinsic "llvm.scmp.i8.i32"(%arg0, %arg1) : (i32, i32) -> i8
+  // MLIR-NEXT:   llvm.return %0 : i8
+  // MLIR-NEXT: }
+
+  //      LLVM: define i8 @test_scmp(i32 %0, i32 %1)
+  // LLVM-NEXT:   %[[#RET:]] = call i8 @llvm.scmp.i8.i32(i32 %0, i32 %1)
+  // LLVM-NEXT:   ret i8 %[[#RET]]
+  // LLVM-NEXT: }
+
+  cir.func @test_ucmp(%arg0 : !u32i, %arg1 : !u32i) -> !s8i {
+    %0 = cir.cmp3way(%arg0 : !u32i, %arg1, #cmp3way_info) : !s8i
+    cir.return %0 : !s8i
+  }
+
+  //      MLIR: llvm.func @test_ucmp(%arg0: i32, %arg1: i32) -> i8
+  // MLIR-NEXT:   %0 = llvm.call_intrinsic "llvm.ucmp.i8.i32"(%arg0, %arg1) : (i32, i32) -> i8
+  // MLIR-NEXT:   llvm.return %0 : i8
+  // MLIR-NEXT: }
+
+  //      LLVM: define i8 @test_ucmp(i32 %0, i32 %1)
+  // LLVM-NEXT:   %[[#RET:]] = call i8 @llvm.ucmp.i8.i32(i32 %0, i32 %1)
+  // LLVM-NEXT:   ret i8 %[[#RET]]
+  // LLVM-NEXT: }
+}
diff --git a/clang/test/CIR/Lowering/complex.cir b/clang/test/CIR/Lowering/complex.cir
new file mode 100644
index 000000000000..91ded659997d
--- /dev/null
+++ b/clang/test/CIR/Lowering/complex.cir
@@ -0,0 +1,15 @@
+// RUN: cir-translate -cir-to-llvmir -o %t.ll %s
+// RUN: FileCheck --input-file %t.ll -check-prefix=LLVM %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @complex_const() -> !cir.complex<!s32i> {
+    %0 = cir.const #cir.complex<#cir.int<1> : !s32i, #cir.int<2> : !s32i> : !cir.complex<!s32i>
+    cir.return %0 : !cir.complex<!s32i>
+  }
+
+  // LLVM-LABEL: define { i32, i32 } @complex_const()
+  //  LLVM-NEXT:   ret { i32, i32 } { i32 1, i32 2 }
+  //  LLVM-NEXT: }
+}
diff --git a/clang/test/CIR/Lowering/const-array.cir b/clang/test/CIR/Lowering/const-array.cir
new file mode 100644
index 000000000000..69917ddb3a36
--- /dev/null
+++ b/clang/test/CIR/Lowering/const-array.cir
@@ -0,0 +1,15 @@
+// RUN: cir-translate %s -cir-to-llvmir -o -  | FileCheck %s -check-prefix=LLVM
+
+!u8i = !cir.int<u, 8>
+
+module {
+  cir.global "private" internal @normal_url_char = #cir.const_array<[#cir.int<0> : !u8i, #cir.int<1> : !u8i], trailing_zeros> : !cir.array<!u8i x 4>
+  // LLVM: @normal_url_char = internal global [4 x i8] c"\00\01\00\00"
+
+  cir.func @c0() -> !cir.ptr<!cir.array<!u8i x 4>> {
+    %0 = cir.get_global @normal_url_char : !cir.ptr<!cir.array<!u8i x 4>>
+    cir.return %0 : !cir.ptr<!cir.array<!u8i x 4>>
+  }
+  // LLVM: define ptr @c0()
+  // LLVM: ret ptr @normal_url_char
+}
diff --git a/clang/test/CIR/Lowering/const.cir b/clang/test/CIR/Lowering/const.cir
new file mode 100644
index 000000000000..4bb234c56995
--- /dev/null
+++ b/clang/test/CIR/Lowering/const.cir
@@ -0,0 +1,81 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s8i = !cir.int<s, 8>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!ty_anon2E1_ = !cir.struct<struct "anon.1" {!cir.int<s, 32>, !cir.int<s, 32>} #cir.record.decl.ast>
+module {
+  cir.func @testConstArrInit() {
+    %0 = cir.const #cir.const_array<"string\00" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7>
+    // CHECK: llvm.mlir.constant(dense<[115, 116, 114, 105, 110, 103, 0]> : tensor<7xi8>) : !llvm.array<7 x i8>
+    %1 = cir.const #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i]> : !cir.array<!s32i x 2>
+    // CHECK: llvm.mlir.constant(dense<[1, 2]> : tensor<2xi32>) : !llvm.array<2 x i32>
+    %3 = cir.const #cir.const_array<[#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float]> : !cir.array<!cir.float x 2>
+    // CHECK: llvm.mlir.constant(dense<[1.000000e+00, 2.000000e+00]> : tensor<2xf32>) : !llvm.array<2 x f32>
+    %4 = cir.const #cir.zero : !cir.array<!s32i x 3>
+    // CHECK: llvm.mlir.zero : !llvm.array<3 x i32>
+    cir.return
+  }
+
+  cir.func @testConvertConstArrayToDenseConst() {
+    %0 = cir.const #cir.const_array<[#cir.const_array<[#cir.int<1> : !s32i]> : !cir.array<!s32i x 1>, #cir.zero : !cir.array<!s32i x 1>]> : !cir.array<!cir.array<!s32i x 1> x 2>
+    %1 = cir.const #cir.const_array<[#cir.const_array<[#cir.int<1> : !s64i]> : !cir.array<!s64i x 1>, #cir.zero : !cir.array<!s64i x 1>]> : !cir.array<!cir.array<!s64i x 1> x 2>
+    %2 = cir.const #cir.const_array<[#cir.const_array<[#cir.fp<1.000000e+00> : !cir.float]> : !cir.array<!cir.float x 1>, #cir.zero : !cir.array<!cir.float x 1>]> : !cir.array<!cir.array<!cir.float x 1> x 2>
+    %3 = cir.const #cir.const_array<[#cir.const_array<[#cir.fp<1.000000e+00> : !cir.double]> : !cir.array<!cir.double x 1>, #cir.zero : !cir.array<!cir.double x 1>]> : !cir.array<!cir.array<!cir.double x 1> x 2>
+    %4 = cir.const #cir.const_array<[#cir.const_array<[#cir.const_array<[#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.array<!s32i x 3>]> : !cir.array<!cir.array<!s32i x 3> x 1>, #cir.zero : !cir.array<!cir.array<!s32i x 3> x 1>]> : !cir.array<!cir.array<!cir.array<!s32i x 3> x 1> x 2>
+
+    cir.return
+  }
+  // CHECK:  llvm.func @testConvertConstArrayToDenseConst()
+  // CHECK:    {{%.*}} = llvm.mlir.constant(dense<{{\[\[}}1], [0{{\]\]}}> : tensor<2x1xi32>) : !llvm.array<2 x array<1 x i32>>
+  // CHECK:    {{%.*}} = llvm.mlir.constant(dense<{{\[\[}}1], [0{{\]\]}}> : tensor<2x1xi64>) : !llvm.array<2 x array<1 x i64>>
+  // CHECK:    {{%.*}} = llvm.mlir.constant(dense<{{\[\[}}1.000000e+00], [0.000000e+00{{\]\]}}> : tensor<2x1xf32>) : !llvm.array<2 x array<1 x f32>>
+  // CHECK:    {{%.*}} = llvm.mlir.constant(dense<{{\[\[}}1.000000e+00], [0.000000e+00{{\]\]}}> : tensor<2x1xf64>) : !llvm.array<2 x array<1 x f64>>
+  // CHECK:    {{%.*}} = llvm.mlir.constant(dense<{{\[\[\[}}1, 1, 1{{\]\]}}, {{\[\[}}0, 0, 0{{\]\]\]}}> : tensor<2x1x3xi32>) : !llvm.array<2 x array<1 x array<3 x i32>>>
+  // CHECK:    llvm.return
+
+  cir.func @testConstArrayOfStructs() {
+    %0 = cir.alloca !cir.array<!ty_anon2E1_ x 1>, !cir.ptr<!cir.array<!ty_anon2E1_ x 1>>, ["a"] {alignment = 4 : i64}
+    %1 = cir.const #cir.const_array<[#cir.const_struct<{#cir.int<0> : !s32i, #cir.int<1> : !s32i}> : !ty_anon2E1_]> : !cir.array<!ty_anon2E1_ x 1>
+    cir.store %1, %0 : !cir.array<!ty_anon2E1_ x 1>, !cir.ptr<!cir.array<!ty_anon2E1_ x 1>>
+    cir.return
+  }
+  // CHECK:  llvm.func @testConstArrayOfStructs()
+  // CHECK:    %0 = llvm.mlir.constant(1 : index) : i64
+  // CHECK:    %1 = llvm.alloca %0 x !llvm.array<1 x struct<"struct.anon.1", (i32, i32)>> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+  // CHECK:    %2 = llvm.mlir.undef : !llvm.array<1 x struct<"struct.anon.1", (i32, i32)>>
+  // CHECK:    %3 = llvm.mlir.undef : !llvm.struct<"struct.anon.1", (i32, i32)>
+  // CHECK:    %4 = llvm.mlir.constant(0 : i32) : i32
+  // CHECK:    %5 = llvm.insertvalue %4, %3[0] : !llvm.struct<"struct.anon.1", (i32, i32)>
+  // CHECK:    %6 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:    %7 = llvm.insertvalue %6, %5[1] : !llvm.struct<"struct.anon.1", (i32, i32)>
+  // CHECK:    %8 = llvm.insertvalue %7, %2[0] : !llvm.array<1 x struct<"struct.anon.1", (i32, i32)>>
+  // CHECK:    llvm.store %8, %1 {{.*}}: !llvm.array<1 x struct<"struct.anon.1", (i32, i32)>>, !llvm.ptr
+  // CHECK:    llvm.return
+
+  cir.func @testArrWithTrailingZeros() {
+    %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["a"] {alignment = 16 : i64}
+    %1 = cir.const #cir.const_array<[#cir.int<1> : !s32i], trailing_zeros> : !cir.array<!s32i x 10>
+    cir.store %1, %0 : !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>
+    cir.return
+  }
+  // CHECK: llvm.func @testArrWithTrailingZeros()
+  // CHECK:   %0 = llvm.mlir.constant(1 : index) : i64
+  // CHECK:   %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 16 : i64} : (i64) -> !llvm.ptr
+  // CHECK:   %2 = llvm.mlir.zero : !llvm.array<10 x i32>
+  // CHECK:   %3 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %4 = llvm.insertvalue %3, %2[0] : !llvm.array<10 x i32>
+
+  cir.func @testInitArrWithBool() {
+    %1 = cir.const #cir.const_array<[#cir.bool<true> : !cir.bool]> : !cir.array<!cir.bool x 1>
+    cir.return
+  }
+
+  // CHECK: llvm.func @testInitArrWithBool()
+  // CHECK:   [[ARR:%.*]] = llvm.mlir.undef : !llvm.array<1 x i8>
+  // CHECK:   [[TRUE:%.*]] = llvm.mlir.constant(1 : i8) : i8
+  // CHECK:   {{.*}} = llvm.insertvalue [[TRUE]], [[ARR]][0] : !llvm.array<1 x i8>
+  // CHECL: llvm.return
+
+}
diff --git a/clang/test/CIR/Lowering/data-member.cir b/clang/test/CIR/Lowering/data-member.cir
new file mode 100644
index 000000000000..40846c53f920
--- /dev/null
+++ b/clang/test/CIR/Lowering/data-member.cir
@@ -0,0 +1,52 @@
+// RUN: cir-opt -cir-to-llvm -o - %s | FileCheck -check-prefix=MLIR %s
+// RUN: cir-translate -cir-to-llvmir -o - %s  | FileCheck -check-prefix=LLVM %s
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!structT = !cir.struct<struct "Point" {!cir.int<s, 32>, !cir.int<s, 32>, !cir.int<s, 32>}>
+
+module @test {
+  cir.global external @pt_member = #cir.data_member<1> : !cir.data_member<!s32i in !structT>
+  // MLIR: llvm.mlir.global external @pt_member(4 : i64) {addr_space = 0 : i32} : i64
+  // LLVM: @pt_member = global i64 4
+
+  cir.func @constant() -> !cir.data_member<!s32i in !structT> {
+    %0 = cir.const #cir.data_member<1> : !cir.data_member<!s32i in !structT>
+    cir.return %0 : !cir.data_member<!s32i in !structT>
+  }
+  //      MLIR: llvm.func @constant() -> i64
+  // MLIR-NEXT:   %0 = llvm.mlir.constant(4 : i64) : i64
+  // MLIR-NEXT:   llvm.return %0 : i64
+  // MLIR-NEXT: }
+
+  //      LLVM: define i64 @constant()
+  // LLVM-NEXT:   ret i64 4
+  // LLVM-NEXT: }
+
+  cir.func @null_constant() -> !cir.data_member<!s32i in !structT> {
+    %0 = cir.const #cir.data_member<null> : !cir.data_member<!s32i in !structT>
+    cir.return %0 : !cir.data_member<!s32i in !structT>
+  }
+  //      MLIR: llvm.func @null_constant() -> i64
+  // MLIR-NEXT:   %0 = llvm.mlir.constant(-1 : i64) : i64
+  // MLIR-NEXT:   llvm.return %0 : i64
+  // MLIR-NEXT: }
+
+  //      LLVM: define i64 @null_constant() !dbg !7 {
+  // LLVM-NEXT:   ret i64 -1
+  // LLVM-NEXT: }
+
+  cir.func @get_runtime_member(%arg0: !cir.ptr<!structT>, %arg1: !cir.data_member<!s32i in !structT>) -> !cir.ptr<!s32i> {
+    %0 = cir.get_runtime_member %arg0[%arg1 : !cir.data_member<!s32i in !structT>] : !cir.ptr<!structT> -> !cir.ptr<!s32i>
+    cir.return %0 : !cir.ptr<!s32i>
+  }
+  //      MLIR: llvm.func @get_runtime_member(%arg0: !llvm.ptr, %arg1: i64) -> !llvm.ptr
+  // MLIR-NEXT:   %0 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, i8
+  // MLIR-NEXT:   llvm.return %0 : !llvm.ptr
+  // MLIR-NEXT: }
+
+  //      LLVM: define ptr @get_runtime_member(ptr %0, i64 %1)
+  // LLVM-NEXT:   %3 = getelementptr i8, ptr %0, i64 %1
+  // LLVM-NEXT:   ret ptr %3
+  // LLVM-NEXT: }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/dot.cir b/clang/test/CIR/Lowering/dot.cir
new file mode 100644
index 000000000000..5c5ed4736f7a
--- /dev/null
+++ b/clang/test/CIR/Lowering/dot.cir
@@ -0,0 +1,115 @@
+// RUN: cir-opt %s -cir-to-llvm --reconcile-unrealized-casts -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @dot(%arg0: !cir.ptr<!cir.double>, %arg1: !cir.ptr<!cir.double>, %arg2: !s32i) -> !cir.double {
+    %0 = cir.alloca !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>, ["a", init] {alignment = 8 : i64}
+    %1 = cir.alloca !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>, ["b", init] {alignment = 8 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["size", init] {alignment = 4 : i64}
+    %3 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["__retval"] {alignment = 8 : i64}
+    %4 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["q", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>
+    cir.store %arg1, %1 : !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>
+    cir.store %arg2, %2 : !s32i, !cir.ptr<!s32i>
+    %5 = cir.const #cir.fp<0.000000e+00> : !cir.double
+    cir.store %5, %4 : !cir.double, !cir.ptr<!cir.double>
+    cir.scope {
+      %8 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %9 = cir.const #cir.int<0> : !s32i
+      cir.store %9, %8 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %10 = cir.load %8 : !cir.ptr<!s32i>, !s32i
+        %11 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %12 = cir.cmp(lt, %10, %11) : !s32i, !s32i
+        %13 = cir.cast(int_to_bool, %12 : !s32i), !cir.bool
+        cir.condition(%13)
+      } body {
+        %10 = cir.load %0 : !cir.ptr<!cir.ptr<!cir.double>>, !cir.ptr<!cir.double>
+        %11 = cir.load %8 : !cir.ptr<!s32i>, !s32i
+        %12 = cir.ptr_stride(%10 : !cir.ptr<!cir.double>, %11 : !s32i), !cir.ptr<!cir.double>
+        %13 = cir.load %12 : !cir.ptr<!cir.double>, !cir.double
+        %14 = cir.load %1 : !cir.ptr<!cir.ptr<!cir.double>>, !cir.ptr<!cir.double>
+        %15 = cir.load %8 : !cir.ptr<!s32i>, !s32i
+        %16 = cir.ptr_stride(%14 : !cir.ptr<!cir.double>, %15 : !s32i), !cir.ptr<!cir.double>
+        %17 = cir.load %16 : !cir.ptr<!cir.double>, !cir.double
+        %18 = cir.binop(mul, %13, %17) : !cir.double
+        %19 = cir.load %4 : !cir.ptr<!cir.double>, !cir.double
+        %20 = cir.binop(add, %19, %18) : !cir.double
+        cir.store %20, %4 : !cir.double, !cir.ptr<!cir.double>
+        cir.yield
+      } step {
+        %10 = cir.load %8 : !cir.ptr<!s32i>, !s32i
+        %11 = cir.unary(inc, %10) : !s32i, !s32i
+        cir.store %11, %8 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    %6 = cir.load %4 : !cir.ptr<!cir.double>, !cir.double
+    cir.store %6, %3 : !cir.double, !cir.ptr<!cir.double>
+    %7 = cir.load %3 : !cir.ptr<!cir.double>, !cir.double
+    cir.return %7 : !cir.double
+  }
+}
+
+// MLIR-LABEL:   llvm.func @dot(
+// MLIR:           %[[VAL_3:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_4:.*]] = llvm.alloca %[[VAL_3]] x !llvm.ptr {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// MLIR:           %[[VAL_5:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_6:.*]] = llvm.alloca %[[VAL_5]] x !llvm.ptr {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// MLIR:           %[[VAL_7:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_8:.*]] = llvm.alloca %[[VAL_7]] x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR:           %[[VAL_9:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_10:.*]] = llvm.alloca %[[VAL_9]] x f64 {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// MLIR:           %[[VAL_11:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_12:.*]] = llvm.alloca %[[VAL_11]] x f64 {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// MLIR:           llvm.store {{.*}}, %[[VAL_4]] {{.*}}: !llvm.ptr, !llvm.ptr
+// MLIR:           llvm.store {{.*}}, %[[VAL_6]] {{.*}}: !llvm.ptr, !llvm.ptr
+// MLIR:           llvm.store {{.*}}, %[[VAL_8]] {{.*}}: i32, !llvm.ptr
+// MLIR:           %[[VAL_13:.*]] = llvm.mlir.constant(0.000000e+00 : f64) : f64
+// MLIR:           llvm.store %[[VAL_13]], %[[VAL_12]] {{.*}}: f64, !llvm.ptr
+// MLIR:           llvm.br ^bb1
+// MLIR:         ^bb1:
+// MLIR:           %[[VAL_14:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_15:.*]] = llvm.alloca %[[VAL_14]] x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR:           %[[VAL_16:.*]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR:           llvm.store %[[VAL_16]], %[[VAL_15]] {{.*}}: i32, !llvm.ptr
+// MLIR:           llvm.br ^bb2
+// MLIR:         ^bb2:
+// MLIR:           %[[VAL_17:.*]] = llvm.load %[[VAL_15]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:           %[[VAL_18:.*]] = llvm.load %[[VAL_8]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:           %[[VAL_19:.*]] = llvm.icmp "slt" %[[VAL_17]], %[[VAL_18]] : i32
+// MLIR:           %[[VAL_20:.*]] = llvm.zext %[[VAL_19]] : i1 to i32
+// MLIR:           %[[VAL_21:.*]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR:           %[[VAL_22:.*]] = llvm.icmp "ne" %[[VAL_20]], %[[VAL_21]] : i32
+// MLIR:           llvm.cond_br %[[VAL_22]], ^bb3, ^bb5
+// MLIR:         ^bb3:
+// MLIR:           %[[VAL_23:.*]] = llvm.load %[[VAL_4]] {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+// MLIR:           %[[VAL_24:.*]] = llvm.load %[[VAL_15]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:           %[[VAL_25:.*]] = llvm.sext %[[VAL_24]] : i32 to i64
+// MLIR:           %[[VAL_26:.*]] = llvm.getelementptr %[[VAL_23]]{{\[}}%[[VAL_25]]] : (!llvm.ptr, i64) -> !llvm.ptr, f64
+// MLIR:           %[[VAL_27:.*]] = llvm.load %[[VAL_26]] {alignment = 8 : i64} : !llvm.ptr -> f64
+// MLIR:           %[[VAL_28:.*]] = llvm.load %[[VAL_6]] {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+// MLIR:           %[[VAL_29:.*]] = llvm.load %[[VAL_15]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:           %[[VAL_30:.*]] = llvm.sext %[[VAL_29]] : i32 to i64
+// MLIR:           %[[VAL_31:.*]] = llvm.getelementptr %[[VAL_28]]{{\[}}%[[VAL_30]]] : (!llvm.ptr, i64) -> !llvm.ptr, f64
+// MLIR:           %[[VAL_32:.*]] = llvm.load %[[VAL_31]] {alignment = 8 : i64} : !llvm.ptr -> f64
+// MLIR:           %[[VAL_33:.*]] = llvm.fmul %[[VAL_27]], %[[VAL_32]]  : f64
+// MLIR:           %[[VAL_34:.*]] = llvm.load %[[VAL_12]] {alignment = 8 : i64} : !llvm.ptr -> f64
+// MLIR:           %[[VAL_35:.*]] = llvm.fadd %[[VAL_34]], %[[VAL_33]]  : f64
+// MLIR:           llvm.store %[[VAL_35]], %[[VAL_12]] {{.*}}: f64, !llvm.ptr
+// MLIR:           llvm.br ^bb4
+// MLIR:         ^bb4:
+// MLIR:           %[[VAL_36:.*]] = llvm.load %[[VAL_15]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:           %[[VAL_37:.*]] = llvm.mlir.constant(1 : i32) : i32
+// MLIR:           %[[VAL_38:.*]] = llvm.add %[[VAL_36]], %[[VAL_37]] : i32
+// MLIR:           llvm.store %[[VAL_38]], %[[VAL_15]] {{.*}}: i32, !llvm.ptr
+// MLIR:           llvm.br ^bb2
+// MLIR:         ^bb5:
+// MLIR:           llvm.br ^bb6
+// MLIR:         ^bb6:
+// MLIR:           %[[VAL_39:.*]] = llvm.load %[[VAL_12]] {alignment = 8 : i64} : !llvm.ptr -> f64
+// MLIR:           llvm.store %[[VAL_39]], %[[VAL_10]] {{.*}}: f64, !llvm.ptr
+// MLIR:           %[[VAL_40:.*]] = llvm.load %[[VAL_10]] {alignment = 8 : i64} : !llvm.ptr -> f64
+// MLIR:           llvm.return %[[VAL_40]] : f64
+// MLIR:         }
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/exceptions.cir b/clang/test/CIR/Lowering/exceptions.cir
new file mode 100644
index 000000000000..5b0414e2ee78
--- /dev/null
+++ b/clang/test/CIR/Lowering/exceptions.cir
@@ -0,0 +1,108 @@
+// RUN: cir-translate %s -cir-to-llvmir -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+module @"try-catch.cpp" attributes {cir.lang = #cir.lang<cxx>, cir.sob = #cir.signed_overflow_behavior<undefined>, cir.triple = "x86_64-unknown-linux-gnu", dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>, #dlti.dl_entry<"dlti.endianness", "little">>, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"} {
+  cir.global "private" constant external @_ZTIi : !cir.ptr<!u8i>
+  cir.global "private" constant external @_ZTIPKc : !cir.ptr<!u8i>
+  cir.func private @_Z8divisionii(!s32i, !s32i) -> !cir.double
+  // LLVM: @_Z2tcv() personality ptr @__gxx_personality_v0
+  cir.func @_Z2tcv() -> !u64i {
+    %0 = cir.alloca !u64i, !cir.ptr<!u64i>, ["__retval"] {alignment = 8 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %3 = cir.alloca !u64i, !cir.ptr<!u64i>, ["z"] {alignment = 8 : i64}
+    %4 = cir.const #cir.int<50> : !s32i
+    cir.store %4, %1 : !s32i, !cir.ptr<!s32i>
+    %5 = cir.const #cir.int<3> : !s32i
+    cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+    cir.br ^bb1
+  ^bb1:
+    %6 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["msg"] {alignment = 8 : i64}
+    %7 = cir.alloca !s32i, !cir.ptr<!s32i>, ["idx"] {alignment = 4 : i64}
+    cir.br ^bb2
+  ^bb2:
+    %8 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %9 = cir.const #cir.int<4> : !s32i
+    cir.store %9, %8 : !s32i, !cir.ptr<!s32i>
+    %10 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %11 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %12 = cir.try_call @_Z8divisionii(%10, %11) ^bb3, ^bb4 : (!s32i, !s32i) -> !cir.double
+    // LLVM: invoke double @_Z8divisionii
+    // LLVM:     to label %[[CONT:.*]] unwind label %[[UNWIND:.*]],
+  ^bb3:
+    // LLVM: [[CONT]]:
+    %13 = cir.cast(float_to_int, %12 : !cir.double), !u64i
+    cir.store %13, %3 : !u64i, !cir.ptr<!u64i>
+    %14 = cir.load %8 : !cir.ptr<!s32i>, !s32i
+    %15 = cir.unary(inc, %14) : !s32i, !s32i
+    cir.store %15, %8 : !s32i, !cir.ptr<!s32i>
+    cir.br ^bb10
+  ^bb4:
+    // LLVM: [[UNWIND]]:
+    // LLVM: %[[EHINFO:.*]] = landingpad { ptr, i32 }
+    // LLVM:     catch ptr @_ZTIi
+    // LLVM:     catch ptr @_ZTIPKc
+    %exception_ptr, %type_id = cir.eh.inflight_exception [@_ZTIi, @_ZTIPKc]
+    // LLVM: extractvalue { ptr, i32 } %[[EHINFO]], 0
+    // LLVM: extractvalue { ptr, i32 } %[[EHINFO]], 1
+    cir.br ^bb5(%exception_ptr, %type_id : !cir.ptr<!void>, !u32i)
+  ^bb5(%16: !cir.ptr<!void>, %17: !u32i):
+    %18 = cir.eh.typeid @_ZTIi
+    // LLVM: call i32 @llvm.eh.typeid.for.p0(ptr @_ZTIi)
+    %19 = cir.cmp(eq, %17, %18) : !u32i, !cir.bool
+    cir.brcond %19 ^bb6(%16 : !cir.ptr<!void>), ^bb7(%16, %17 : !cir.ptr<!void>, !u32i)
+  ^bb6(%20: !cir.ptr<!void>):
+    %21 = cir.catch_param begin %20 -> !cir.ptr<!s32i>
+    // LLVM: %[[EH_IDX:.*]] = phi ptr
+    // LLVM: call ptr @__cxa_begin_catch(ptr %[[EH_IDX]])
+    %22 = cir.load %21 : !cir.ptr<!s32i>, !s32i
+    cir.store %22, %7 : !s32i, !cir.ptr<!s32i>
+    %23 = cir.const #cir.int<98> : !s32i
+    %24 = cir.cast(integral, %23 : !s32i), !u64i
+    cir.store %24, %3 : !u64i, !cir.ptr<!u64i>
+    %25 = cir.load %7 : !cir.ptr<!s32i>, !s32i
+    %26 = cir.unary(inc, %25) : !s32i, !s32i
+    cir.store %26, %7 : !s32i, !cir.ptr<!s32i>
+    cir.catch_param end
+    // LLVM: call void @__cxa_end_catch()
+    cir.br ^bb10
+  ^bb7(%27: !cir.ptr<!void>, %28: !u32i):
+    %29 = cir.eh.typeid @_ZTIPKc
+    // LLVM: call i32 @llvm.eh.typeid.for.p0(ptr @_ZTIPKc)
+    %30 = cir.cmp(eq, %28, %29) : !u32i, !cir.bool
+    cir.brcond %30 ^bb8(%27 : !cir.ptr<!void>), ^bb9(%27, %28 : !cir.ptr<!void>, !u32i)
+  ^bb8(%31: !cir.ptr<!void>):
+    %32 = cir.catch_param begin %31 -> !cir.ptr<!s8i>
+    // LLVM: %[[EH_MSG:.*]] = phi ptr
+    // LLVM: call ptr @__cxa_begin_catch(ptr %[[EH_MSG]])
+    cir.store %32, %6 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+    %33 = cir.const #cir.int<99> : !s32i
+    %34 = cir.cast(integral, %33 : !s32i), !u64i
+    cir.store %34, %3 : !u64i, !cir.ptr<!u64i>
+    %35 = cir.load %6 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+    %36 = cir.const #cir.int<0> : !s32i
+    %37 = cir.ptr_stride(%35 : !cir.ptr<!s8i>, %36 : !s32i), !cir.ptr<!s8i>
+    cir.catch_param end
+    // LLVM: call void @__cxa_end_catch()
+    cir.br ^bb10
+  ^bb9(%38: !cir.ptr<!void>, %39: !u32i):
+    // LLVM: %[[RESUME_EH:.*]] = phi ptr
+    // LLVM: %[[RESUME_SEL:.*]] = phi i32
+    // LLVM: %[[RES0:.*]] = insertvalue { ptr, i32 } poison, ptr %[[RESUME_EH]], 0
+    // LLVM: %[[RES1:.*]] = insertvalue { ptr, i32 } %[[RES0]], i32 %[[RESUME_SEL]], 1
+    // LLVM: resume { ptr, i32 } %[[RES1]]
+    cir.resume %38, %39
+  ^bb10:
+    %40 = cir.load %3 : !cir.ptr<!u64i>, !u64i
+    cir.store %40, %0 : !u64i, !cir.ptr<!u64i>
+    %41 = cir.load %0 : !cir.ptr<!u64i>, !u64i
+    cir.return %41 : !u64i
+  }
+}
diff --git a/clang/test/CIR/Lowering/expect.cir b/clang/test/CIR/Lowering/expect.cir
new file mode 100644
index 000000000000..64c9c10e6277
--- /dev/null
+++ b/clang/test/CIR/Lowering/expect.cir
@@ -0,0 +1,54 @@
+// RUN: cir-opt %s -cir-to-llvm | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+
+!s64i = !cir.int<s, 64>
+module {
+  cir.func @foo(%arg0: !s64i) {
+    %0 = cir.const #cir.int<1> : !s64i
+    %1 = cir.expect(%arg0, %0) : !s64i
+    %2 = cir.cast(int_to_bool, %1 : !s64i), !cir.bool
+    cir.if %2 {
+      cir.yield
+    }
+    %3 = cir.expect(%arg0, %0, 1.000000e-01) : !s64i
+    %4 = cir.cast(int_to_bool, %3 : !s64i), !cir.bool
+    cir.if %4 {
+      cir.yield
+    }
+    cir.return
+  }
+}
+
+// MLIR:  llvm.func @foo(%arg0: i64)
+// MLIR:    [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64
+// MLIR:    [[EXPECT:%.*]] = llvm.intr.expect %arg0, [[ONE]] : i64
+// MLIR:    [[ZERO:%.*]] = llvm.mlir.constant(0 : i64) : i64
+// MLIR:    [[CMP_NE:%.*]] = llvm.icmp "ne" [[EXPECT]], [[ZERO]] : i64
+// MLIR:    llvm.cond_br [[CMP_NE]], ^bb1, ^bb2
+// MLIR:  ^bb1:  // pred: ^bb0
+// MLIR:    llvm.br ^bb2
+// MLIR:  ^bb2:  // 2 preds: ^bb0, ^bb1
+// MLIR:    [[EXPECT_WITH_PROB:%.*]] = llvm.intr.expect.with.probability %arg0, [[ONE]], 1.000000e-01 : i64
+// MLIR:    [[ZERO:%.*]] = llvm.mlir.constant(0 : i64) : i64
+// MLIR:    [[CMP_NE:%.*]] = llvm.icmp "ne" [[EXPECT_WITH_PROB]], [[ZERO]] : i64
+// MLIR:    llvm.cond_br [[CMP_NE]], ^bb3, ^bb4
+// MLIR:  ^bb3:  // pred: ^bb2
+// MLIR:    llvm.br ^bb4
+// MLIR:  ^bb4:  // 2 preds: ^bb2, ^bb3
+// MLIR:    llvm.return
+
+// LLVM:  define void @foo(i64 %0)
+// LLVM:    [[EXPECT:%.*]] = call i64 @llvm.expect.i64(i64 %0, i64 1)
+// LLVM:    [[CMP_NE:%.*]] = icmp ne i64 [[EXPECT]], 0
+// LLVM:    br i1 [[CMP_NE]], label %4, label %5
+// LLVM:  4:
+// LLVM:    br label %5
+// LLVM:  5:
+// LLVM:    [[EXPECT_WITH_PROB:%.*]] = call i64 @llvm.expect.with.probability.i64(i64 %0, i64 1, double 1.000000e-01)
+// LLVM:    [[CMP_NE:%.*]] = icmp ne i64 [[EXPECT_WITH_PROB]], 0
+// LLVM:    br i1 [[CMP_NE]], label %8, label %9
+// LLVM:  8:
+// LLVM:    br label %9
+// LLVM:  9:
+// LLVM:    ret void
+
diff --git a/clang/test/CIR/Lowering/float.cir b/clang/test/CIR/Lowering/float.cir
new file mode 100644
index 000000000000..d4b66500b210
--- /dev/null
+++ b/clang/test/CIR/Lowering/float.cir
@@ -0,0 +1,18 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir
+
+module {
+  cir.func @test() {
+    // %0 = cir.const 1.0 : f16
+    // DISABLED-CHECK: %{{.+}} = llvm.mlir.constant(1.000000e+00 : f16) : f16
+    %1 = cir.const #cir.fp<1.0> : !cir.float
+    // CHECK: %{{.+}} = llvm.mlir.constant(1.000000e+00 : f32) : f32
+    %2 = cir.const #cir.fp<1.0> : !cir.double
+    // CHECK: %{{.+}} = llvm.mlir.constant(1.000000e+00 : f64) : f64
+    %3 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.f80>
+    // CHECK: %{{.+}} = llvm.mlir.constant(1.000000e+00 : f80) : f80
+    // %5 = cir.const 1.0 : bf16
+    // DISABLED-CHECK: %{{.+}} = llvm.mlir.constant(1.000000e+00 : bf16) : bf16
+    cir.return
+  }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/func-call-conv.cir b/clang/test/CIR/Lowering/func-call-conv.cir
new file mode 100644
index 000000000000..a32e67a7d1de
--- /dev/null
+++ b/clang/test/CIR/Lowering/func-call-conv.cir
@@ -0,0 +1,20 @@
+// RUN: cir-translate %s -cir-to-llvmir -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+module {
+    // LLVM: define void @foo()
+    cir.func @foo() cc(c) {
+        cir.return
+    }
+
+    // LLVM: define spir_kernel void @bar()
+    cir.func @bar() cc(spir_kernel) {
+        cir.return
+    }
+
+    // LLVM: define spir_func void @baz()
+    cir.func @baz() cc(spir_function) {
+        cir.return
+    }
+}
diff --git a/clang/test/CIR/Lowering/func.cir b/clang/test/CIR/Lowering/func.cir
new file mode 100644
index 000000000000..76e6d4f0d181
--- /dev/null
+++ b/clang/test/CIR/Lowering/func.cir
@@ -0,0 +1,17 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck %s -check-prefix=MLIR --input-file=%t.mlir
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func no_proto private @noProto3(...) -> !s32i
+  // MLIR: llvm.func @noProto3(...) -> i32
+  cir.func @test3(%arg0: !s32i) {
+    %3 = cir.get_global @noProto3 : !cir.ptr<!cir.func<!s32i (...)>>
+    // MLIR: %[[#FN_PTR:]] = llvm.mlir.addressof @noProto3 : !llvm.ptr
+    %4 = cir.cast(bitcast, %3 : !cir.ptr<!cir.func<!s32i (...)>>), !cir.ptr<!cir.func<!s32i (!s32i)>>
+    // MLIR: %[[#FUNC:]] = llvm.bitcast %[[#FN_PTR]] : !llvm.ptr to !llvm.ptr
+    %5 = cir.call %4(%arg0) : (!cir.ptr<!cir.func<!s32i (!s32i)>>, !s32i) -> !s32i
+    // MLIR: %{{.+}} = llvm.call %[[#FUNC]](%{{.+}}) : !llvm.ptr, (i32) -> i32
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/globals.cir b/clang/test/CIR/Lowering/globals.cir
new file mode 100644
index 000000000000..c3bd1cc3a726
--- /dev/null
+++ b/clang/test/CIR/Lowering/globals.cir
@@ -0,0 +1,194 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+!void = !cir.void
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+!u8i = !cir.int<u, 8>
+!ty_A = !cir.struct<struct "A" {!s32i, !cir.array<!cir.array<!s32i x 2> x 2>} #cir.record.decl.ast>
+!ty_Bar = !cir.struct<struct "Bar" {!s32i, !s8i} #cir.record.decl.ast>
+!ty_StringStruct = !cir.struct<struct "StringStruct" {!cir.array<!s8i x 3>, !cir.array<!s8i x 3>, !cir.array<!s8i x 3>} #cir.record.decl.ast>
+!ty_StringStructPtr = !cir.struct<struct "StringStructPtr" {!cir.ptr<!s8i>} #cir.record.decl.ast>
+!ty_anon2E1_ = !cir.struct<struct "anon.1" {!cir.ptr<!cir.func<!cir.void (!cir.int<s, 32>)>>} #cir.record.decl.ast>
+
+module {
+  cir.global external @a = #cir.int<3> : !s32i
+  cir.global external @c = #cir.int<2> : !u64i
+  cir.global external @y = #cir.fp<3.400000e+00> : !cir.float
+  cir.global external @w = #cir.fp<4.300000e+00> : !cir.double
+  cir.global external @x = #cir.int<51> : !s8i
+  cir.global external @rgb = #cir.const_array<[#cir.int<0> : !u8i, #cir.int<233> : !u8i, #cir.int<33> : !u8i]> : !cir.array<!u8i x 3>
+  cir.global external @alpha = #cir.const_array<[#cir.int<97> : !s8i, #cir.int<98> : !s8i, #cir.int<99> : !s8i, #cir.int<0> : !s8i]> : !cir.array<!s8i x 4>
+  cir.global "private" constant internal @".str" = #cir.const_array<"example\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8> {alignment = 1 : i64}
+  cir.global external @s = #cir.global_view<@".str"> : !cir.ptr<!s8i>
+  // MLIR: llvm.mlir.global internal constant @".str"("example\00")
+  // MLIR-SAME: {addr_space = 0 : i32, alignment = 1 : i64}
+  // MLIR: llvm.mlir.global external @s() {addr_space = 0 : i32} : !llvm.ptr {
+  // MLIR:   %0 = llvm.mlir.addressof @".str" : !llvm.ptr
+  // MLIR:   %1 = llvm.bitcast %0 : !llvm.ptr to !llvm.ptr
+  // MLIR:   llvm.return %1 : !llvm.ptr
+  // MLIR: }
+  // LLVM: @.str = internal constant [8 x i8] c"example\00"
+  // LLVM: @s = global ptr @.str
+  cir.global external @aPtr = #cir.global_view<@a> : !cir.ptr<!s32i>
+  // MLIR: llvm.mlir.global external @aPtr() {addr_space = 0 : i32} : !llvm.ptr {
+  // MLIR:   %0 = llvm.mlir.addressof @a : !llvm.ptr
+  // MLIR:   llvm.return %0 : !llvm.ptr
+  // MLIR: }
+  cir.global "private" constant internal @".str1" = #cir.const_array<"example1\00" : !cir.array<!s8i x 9>> : !cir.array<!s8i x 9> {alignment = 1 : i64}
+  cir.global external @s1 = #cir.global_view<@".str1"> : !cir.ptr<!s8i>
+  cir.global external @s2 = #cir.global_view<@".str"> : !cir.ptr<!s8i>
+  cir.func @_Z10use_globalv() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["li", init] {alignment = 4 : i64}
+    %1 = cir.get_global @a : !cir.ptr<!s32i>
+    %2 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+  cir.func @_Z17use_global_stringv() {
+    %0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["c", init] {alignment = 1 : i64}
+    %1 = cir.get_global @s2 : !cir.ptr<!cir.ptr<!s8i>>
+    %2 = cir.load %1 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+    %3 = cir.const #cir.int<0> : !s32i
+    %4 = cir.ptr_stride(%2 : !cir.ptr<!s8i>, %3 : !s32i), !cir.ptr<!s8i>
+    %5 = cir.load %4 : !cir.ptr<!s8i>, !s8i
+    %6 = cir.cast(integral, %5 : !s8i), !u8i
+    cir.store %6, %0 : !u8i, !cir.ptr<!u8i>
+    cir.return
+  }
+  cir.func linkonce_odr @_Z4funcIiET_v() -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<0> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %2 : !s32i
+  }
+  cir.func @_Z8use_funcv() -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %1 = cir.call @_Z4funcIiET_v() : () -> !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %2 : !s32i
+  }
+  cir.global external @string = #cir.const_array<[#cir.int<119> : !s8i, #cir.int<104> : !s8i, #cir.int<97> : !s8i, #cir.int<116> : !s8i, #cir.int<110> : !s8i, #cir.int<111> : !s8i, #cir.int<119> : !s8i, #cir.int<0> : !s8i]> : !cir.array<!s8i x 8>
+  // MLIR: llvm.mlir.global external @string(dense<[119, 104, 97, 116, 110, 111, 119, 0]> : tensor<8xi8>) {addr_space = 0 : i32} : !llvm.array<8 x i8>
+  cir.global external @uint = #cir.const_array<[#cir.int<255> : !u32i]> : !cir.array<!u32i x 1>
+  // MLIR: llvm.mlir.global external @uint(dense<255> : tensor<1xi32>) {addr_space = 0 : i32} : !llvm.array<1 x i32>
+  cir.global external @sshort = #cir.const_array<[#cir.int<11111> : !s16i, #cir.int<22222> : !s16i]> : !cir.array<!s16i x 2>
+  // MLIR: llvm.mlir.global external @sshort(dense<[11111, 22222]> : tensor<2xi16>) {addr_space = 0 : i32} : !llvm.array<2 x i16>
+  cir.global external @sint = #cir.const_array<[#cir.int<123> : !s32i, #cir.int<456> : !s32i, #cir.int<789> : !s32i]> : !cir.array<!s32i x 3>
+  // MLIR: llvm.mlir.global external @sint(dense<[123, 456, 789]> : tensor<3xi32>) {addr_space = 0 : i32} : !llvm.array<3 x i32>
+  cir.global external @ll = #cir.const_array<[#cir.int<999999999> : !s64i, #cir.int<0> : !s64i, #cir.int<0> : !s64i, #cir.int<0> : !s64i]> : !cir.array<!s64i x 4>
+  // MLIR: llvm.mlir.global external @ll(dense<[999999999, 0, 0, 0]> : tensor<4xi64>) {addr_space = 0 : i32} : !llvm.array<4 x i64>
+  cir.global external @twoDim = #cir.const_array<[#cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i]> : !cir.array<!s32i x 2>, #cir.const_array<[#cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 2>]> : !cir.array<!cir.array<!s32i x 2> x 2>
+  // MLIR: llvm.mlir.global external @twoDim(dense<{{\[\[}}1, 2], [3, 4{{\]\]}}> : tensor<2x2xi32>) {addr_space = 0 : i32} : !llvm.array<2 x array<2 x i32>>
+
+  // The following tests check direclty the resulting LLVM IR because the MLIR
+  // version is two long. Always prefer the MLIR prefix when possible.
+  cir.global external @nestedTwoDim = #cir.const_struct<{#cir.int<1> : !s32i, #cir.const_array<[#cir.const_array<[#cir.int<2> : !s32i, #cir.int<3> : !s32i]> : !cir.array<!s32i x 2>, #cir.const_array<[#cir.int<4> : !s32i, #cir.int<5> : !s32i]> : !cir.array<!s32i x 2>]> : !cir.array<!cir.array<!s32i x 2> x 2>}> : !ty_A
+  // LLVM: @nestedTwoDim = global %struct.A { i32 1, [2 x [2 x i32{{\]\] \[\[}}2 x i32] [i32 2, i32 3], [2 x i32] [i32 4, i32 5{{\]\]}} }
+  cir.global external @nestedString = #cir.const_struct<{#cir.const_array<"1\00\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>, #cir.const_array<"\00\00\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>, #cir.const_array<"\00\00\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>}> : !ty_StringStruct
+  // LLVM: @nestedString = global %struct.StringStruct { [3 x i8] c"1\00\00", [3 x i8] zeroinitializer, [3 x i8] zeroinitializer }
+  cir.global external @nestedStringPtr = #cir.const_struct<{#cir.global_view<@".str"> : !cir.ptr<!s8i>}> : !ty_StringStructPtr
+  // LLVM: @nestedStringPtr = global %struct.StringStructPtr { ptr @.str }
+
+  cir.func @_Z11get_globalsv() {
+    %0 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["s", init] {alignment = 8 : i64}
+    %1 = cir.alloca !cir.ptr<!u32i>, !cir.ptr<!cir.ptr<!u32i>>, ["u", init] {alignment = 8 : i64}
+    %2 = cir.alloca !cir.ptr<!s16i>, !cir.ptr<!cir.ptr<!s16i>>, ["ss", init] {alignment = 8 : i64}
+    %3 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["si", init] {alignment = 8 : i64}
+    %4 = cir.alloca !cir.ptr<!s64i>, !cir.ptr<!cir.ptr<!s64i>>, ["l", init] {alignment = 8 : i64}
+    %5 = cir.get_global @string : !cir.ptr<!cir.array<!s8i x 8>>
+    %6 = cir.cast(array_to_ptrdecay, %5 : !cir.ptr<!cir.array<!s8i x 8>>), !cir.ptr<!s8i>
+    // MLIR: %[[RES:[0-9]+]] = llvm.mlir.addressof @string : !llvm.ptr
+    // MLIR: %{{[0-9]+}} = llvm.getelementptr %[[RES]][0] : (!llvm.ptr) -> !llvm.ptr, i8
+    cir.store %6, %0 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+    %7 = cir.get_global @uint : !cir.ptr<!cir.array<!u32i x 1>>
+    %8 = cir.cast(array_to_ptrdecay, %7 : !cir.ptr<!cir.array<!u32i x 1>>), !cir.ptr<!u32i>
+    // MLIR: %[[RES:[0-9]+]] = llvm.mlir.addressof @uint : !llvm.ptr
+    // MLIR: %{{[0-9]+}} = llvm.getelementptr %[[RES]][0] : (!llvm.ptr) -> !llvm.ptr, i32
+    cir.store %8, %1 : !cir.ptr<!u32i>, !cir.ptr<!cir.ptr<!u32i>>
+    %9 = cir.get_global @sshort : !cir.ptr<!cir.array<!s16i x 2>>
+    %10 = cir.cast(array_to_ptrdecay, %9 : !cir.ptr<!cir.array<!s16i x 2>>), !cir.ptr<!s16i>
+    // MLIR: %[[RES:[0-9]+]] = llvm.mlir.addressof @sshort : !llvm.ptr
+    // MLIR: %{{[0-9]+}} = llvm.getelementptr %[[RES]][0] : (!llvm.ptr) -> !llvm.ptr, i16
+    cir.store %10, %2 : !cir.ptr<!s16i>, !cir.ptr<!cir.ptr<!s16i>>
+    %11 = cir.get_global @sint : !cir.ptr<!cir.array<!s32i x 3>>
+    %12 = cir.cast(array_to_ptrdecay, %11 : !cir.ptr<!cir.array<!s32i x 3>>), !cir.ptr<!s32i>
+    // MLIR: %[[RES:[0-9]+]] = llvm.mlir.addressof @sint : !llvm.ptr
+    // MLIR: %{{[0-9]+}} = llvm.getelementptr %[[RES]][0] : (!llvm.ptr) -> !llvm.ptr, i32
+    cir.store %12, %3 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    %13 = cir.get_global @ll : !cir.ptr<!cir.array<!s64i x 4>>
+    %14 = cir.cast(array_to_ptrdecay, %13 : !cir.ptr<!cir.array<!s64i x 4>>), !cir.ptr<!s64i>
+    // MLIR: %[[RES:[0-9]+]] = llvm.mlir.addressof @ll : !llvm.ptr
+    // MLIR: %{{[0-9]+}} = llvm.getelementptr %[[RES]][0] : (!llvm.ptr) -> !llvm.ptr, i64
+    cir.store %14, %4 : !cir.ptr<!s64i>, !cir.ptr<!cir.ptr<!s64i>>
+    cir.return
+  }
+  cir.global external @flt = #cir.const_array<[#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float]> : !cir.array<!cir.float x 2>
+  cir.global external @zeroInitFlt = #cir.const_array<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.array<!cir.float x 2>
+  // MLIR: llvm.mlir.global external @flt(dense<[1.000000e+00, 2.000000e+00]> : tensor<2xf32>) {addr_space = 0 : i32} : !llvm.array<2 x f32>
+  // MLIR: llvm.mlir.global external @zeroInitFlt(dense<0.000000e+00> : tensor<2xf32>) {addr_space = 0 : i32} : !llvm.array<2 x f32>
+  cir.global "private" internal @staticVar = #cir.int<0> : !s32i
+  // MLIR: llvm.mlir.global internal @staticVar(0 : i32) {addr_space = 0 : i32} : i32
+  cir.global external @nullPtr = #cir.ptr<null> : !cir.ptr<!s32i>
+  // MLIR: llvm.mlir.global external @nullPtr()
+  // MLIR:   %0 = llvm.mlir.zero : !llvm.ptr
+  // MLIR:   llvm.return %0 : !llvm.ptr
+  // MLIR: }
+  cir.global external @zeroStruct = #cir.zero : !ty_Bar
+  // MLIR: llvm.mlir.global external @zeroStruct()
+  // MLIR:   %0 = llvm.mlir.zero : !llvm.struct<"struct.Bar", (i32, i8)>
+  // MLIR:   llvm.return %0 : !llvm.struct<"struct.Bar", (i32, i8)>
+  // MLIR: }
+  cir.global common @comm = #cir.int<0> : !s32i
+  // MLIR: llvm.mlir.global common @comm(0 : i32) {addr_space = 0 : i32} : i32
+
+  cir.global "private" internal @Handlers = #cir.const_array<[#cir.const_struct<{#cir.global_view<@myfun> : !cir.ptr<!cir.func<!void (!s32i)>>}> : !ty_anon2E1_]> : !cir.array<!ty_anon2E1_ x 1>
+  cir.func internal private @myfun(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+  cir.func @foo(%arg0: !s32i, %arg1: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["flag", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.get_global @Handlers : !cir.ptr<!cir.array<!ty_anon2E1_ x 1>>
+    %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %4 = cir.cast(array_to_ptrdecay, %2 : !cir.ptr<!cir.array<!ty_anon2E1_ x 1>>), !cir.ptr<!ty_anon2E1_>
+    %5 = cir.ptr_stride(%4 : !cir.ptr<!ty_anon2E1_>, %3 : !s32i), !cir.ptr<!ty_anon2E1_>
+    %6 = cir.get_member %5[0] {name = "func"} : !cir.ptr<!ty_anon2E1_> -> !cir.ptr<!cir.ptr<!cir.func<!void (!s32i)>>>
+    %7 = cir.load %6 : !cir.ptr<!cir.ptr<!cir.func<!void (!s32i)>>>, !cir.ptr<!cir.func<!void (!s32i)>>
+    %8 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.call %7(%8) : (!cir.ptr<!cir.func<!void (!s32i)>>, !s32i) -> ()
+    cir.return
+  }
+  //MLIR-LABEL: @foo
+  //MLIR:  %[[RES4:.*]] = llvm.mlir.addressof @Handlers : !llvm.ptr
+  //MLIR:  %[[LOAD:.*]] = llvm.load {{.*}} {alignment = 4 : i64} : !llvm.ptr -> i32
+  //MLIR:  %[[RES6:.*]] = llvm.getelementptr %[[RES4]][0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.anon.1", (ptr)>
+  //MLIR:  %[[RES5:.*]] = llvm.sext %[[LOAD]] : i32 to i64
+  //MLIR:  %[[RES7:.*]] = llvm.getelementptr %[[RES6]][%[[RES5]]] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<"struct.anon.1", (ptr)>
+  //MLIR:  %[[RES8:.*]] = llvm.getelementptr %[[RES7]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.anon.1", (ptr)>
+  //MLIR:  %[[RES9:.*]] = llvm.load %[[RES8]] {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+  //MLIR:  llvm.call %[[RES9]]({{.*}}) : !llvm.ptr, (i32) -> ()
+
+  cir.global external @zero_array = #cir.zero : !cir.array<!s32i x 16>
+  cir.func @use_zero_array() {
+    %0 = cir.const #cir.global_view<@zero_array> : !cir.ptr<!s32i>
+    %1 = cir.const #cir.int<0> : !s32i
+    %2 = cir.ptr_stride(%0 : !cir.ptr<!s32i>, %1 : !s32i), !cir.ptr<!s32i>
+    %3 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    cir.return
+  }
+  // MLIR:  %0 = llvm.mlir.addressof @zero_array
+
+}
diff --git a/clang/test/CIR/Lowering/goto.cir b/clang/test/CIR/Lowering/goto.cir
new file mode 100644
index 000000000000..f09626ec122f
--- /dev/null
+++ b/clang/test/CIR/Lowering/goto.cir
@@ -0,0 +1,53 @@
+// RUN: cir-opt %s --pass-pipeline='builtin.module(cir-to-llvm,canonicalize{region-simplify=disabled})' -o - | FileCheck %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+
+module {
+  
+  cir.func @gotoFromIf(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %6 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      %7 = cir.const #cir.int<5> : !s32i
+      %8 = cir.cmp(gt, %6, %7) : !s32i, !s32i
+      %9 = cir.cast(int_to_bool, %8 : !s32i), !cir.bool
+      cir.if %9 {
+        cir.goto "err"
+      }
+    }
+    %2 = cir.const #cir.int<0> : !s32i
+    cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+    cir.br ^bb1
+  ^bb1:
+    %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %3 : !s32i
+  ^bb2:
+    cir.label "err"
+    %4 = cir.const #cir.int<1> : !s32i
+    %5 = cir.unary(minus, %4) : !s32i, !s32i
+    cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+    cir.br ^bb1
+  }
+
+// MLIR:  llvm.func @gotoFromIf
+// MLIR:    %[[#One:]] = llvm.mlir.constant(1 : i32) : i32
+// MLIR:    %[[#Zero:]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR:    llvm.cond_br {{.*}}, ^bb[[#COND_YES:]], ^bb[[#COND_NO:]]
+// MLIR:  ^bb[[#COND_YES]]:
+// MLIR:    llvm.br ^bb[[#GOTO_BLK:]]
+// MLIR:   ^bb[[#COND_NO]]:
+// MLIR:    llvm.br ^bb[[#BLK:]]
+// MLIR:  ^bb[[#BLK]]:
+// MLIR:    llvm.store %[[#Zero]], %[[#Ret_val_addr:]] {{.*}}: i32, !llvm.ptr
+// MLIR:    llvm.br ^bb[[#RETURN:]]
+// MLIR:  ^bb[[#RETURN]]:
+// MLIR:    %[[#Ret_val:]] = llvm.load %[[#Ret_val_addr]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:    llvm.return %[[#Ret_val]] : i32
+// MLIR:  ^bb[[#GOTO_BLK]]:
+// MLIR:    %[[#Neg_one:]] = llvm.sub %[[#Zero]], %[[#One]]  : i32
+// MLIR:    llvm.store %[[#Neg_one]], %[[#Ret_val_addr]] {{.*}}: i32, !llvm.ptr
+// MLIR:    llvm.br ^bb[[#RETURN]]
+// MLIR: }
+}
diff --git a/clang/test/CIR/Lowering/hello.cir b/clang/test/CIR/Lowering/hello.cir
new file mode 100644
index 000000000000..e9dbe93ddb4e
--- /dev/null
+++ b/clang/test/CIR/Lowering/hello.cir
@@ -0,0 +1,35 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+!s8i = !cir.int<s, 8>
+module @"/tmp/test.raw" attributes {cir.lang = #cir.lang<c>, cir.sob = #cir.signed_overflow_behavior<undefined>, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i32>>, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"} {
+  cir.func private @printf(!cir.ptr<!s8i>, ...) -> !s32i
+  cir.global "private" constant internal @".str" = #cir.const_array<"Hello, world!\0A\00" : !cir.array<!s8i x 15>> : !cir.array<!s8i x 15> {alignment = 1 : i64}
+  cir.func @main() -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %1 = cir.get_global @printf : !cir.ptr<!cir.func<!s32i (!cir.ptr<!s8i>, ...)>>
+    %2 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 15>>
+    %3 = cir.cast(array_to_ptrdecay, %2 : !cir.ptr<!cir.array<!s8i x 15>>), !cir.ptr<!s8i>
+    %4 = cir.call @printf(%3) : (!cir.ptr<!s8i>) -> !s32i
+    %5 = cir.const #cir.int<0> : !s32i
+    cir.store %5, %0 : !s32i, !cir.ptr<!s32i>
+    %6 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %6 : !s32i
+  }
+}
+
+// CHECK:  llvm.func @printf(!llvm.ptr, ...) -> i32
+// CHECK:  llvm.mlir.global internal constant @".str"("Hello, world!\0A\00")
+// CHECK-SAME: {addr_space = 0 : i32, alignment = 1 : i64}
+// CHECK:  llvm.func @main() -> i32
+// CHECK:    %0 = llvm.mlir.constant(1 : index) : i64
+// CHECK:    %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// CHECK:    %2 = llvm.mlir.addressof @".str" : !llvm.ptr
+// CHECK:    %3 = llvm.getelementptr %2[0] : (!llvm.ptr) -> !llvm.ptr, i8
+// CHECK:    %4 = llvm.call @printf(%3) : (!llvm.ptr) -> i32
+// CHECK:    %5 = llvm.mlir.constant(0 : i32) : i32
+// CHECK:    llvm.store %5, %1 {{.*}} : i32, !llvm.ptr
+// CHECK:    %6 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+// CHECK:    llvm.return %6 : i32
+// CHECK:  }
diff --git a/clang/test/CIR/Lowering/if.cir b/clang/test/CIR/Lowering/if.cir
new file mode 100644
index 000000000000..cd42497983e4
--- /dev/null
+++ b/clang/test/CIR/Lowering/if.cir
@@ -0,0 +1,65 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @foo(%arg0: !s32i) -> !s32i {
+    %4 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    cir.if %4 {
+      %5 = cir.const #cir.int<1> : !s32i
+      cir.return %5 : !s32i
+    } else {
+      %5 = cir.const #cir.int<0> : !s32i
+      cir.return %5 : !s32i
+    }
+    cir.return %arg0 : !s32i
+  }
+
+//      MLIR:   llvm.func @foo(%arg0: i32) -> i32
+// MLIR-NEXT:     %0 = llvm.mlir.constant(0 : i32) : i32
+// MLIR-NEXT:     %1 = llvm.icmp "ne" %arg0, %0 : i32
+// MLIR-NEXT:     llvm.cond_br %1, ^bb2, ^bb1
+// MLIR-NEXT:   ^bb1:  // pred: ^bb0
+// MLIR-NEXT:     %2 = llvm.mlir.constant(0 : i32) : i32
+// MLIR-NEXT:     llvm.return %2 : i32
+// MLIR-NEXT:   ^bb2:  // pred: ^bb0
+// MLIR-NEXT:     %3 = llvm.mlir.constant(1 : i32) : i32
+// MLIR-NEXT:     llvm.return %3 : i32
+// MLIR-NEXT:   ^bb3:  // no predecessors
+// MLIR-NEXT:     llvm.return %arg0 : i32
+// MLIR-NEXT:   }
+
+//       LLVM: define i32 @foo(i32 %0)
+//  LLVM-NEXT:   %2 = icmp ne i32 %0, 0
+//  LLVM-NEXT:   br i1 %2, label %4, label %3
+// LLVM-EMPTY:
+//  LLVM-NEXT: 3:
+//  LLVM-NEXT:   ret i32 0
+// LLVM-EMPTY:
+//  LLVM-NEXT: 4:
+//  LLVM-NEXT:   ret i32 1
+// LLVM-EMPTY:
+//  LLVM-NEXT: 5:
+//  LLVM-NEXT:   ret i32 %0
+//  LLVM-NEXT: }
+
+  cir.func @onlyIf(%arg0: !s32i) -> !s32i {
+    %4 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    cir.if %4 {
+      %5 = cir.const #cir.int<1> : !s32i
+      cir.return %5 : !s32i
+    }
+    cir.return %arg0 : !s32i
+  }
+
+  //      MLIR: llvm.func @onlyIf(%arg0: i32) -> i32
+  // MLIR-NEXT:   %0 = llvm.mlir.constant(0 : i32) : i32
+  // MLIR-NEXT:   %1 = llvm.icmp "ne" %arg0, %0 : i32
+  // MLIR-NEXT:   llvm.cond_br %1, ^bb1, ^bb2
+  // MLIR-NEXT: ^bb1:  // pred: ^bb0
+  // MLIR-NEXT:   %2 = llvm.mlir.constant(1 : i32) : i32
+  // MLIR-NEXT:   llvm.return %2 : i32
+  // MLIR-NEXT: ^bb2:  // pred: ^bb0
+  // MLIR-NEXT:   llvm.return %arg0 : i32
+  // MLIR-NEXT: }
+}
diff --git a/clang/test/CIR/Lowering/int-wrap.cir b/clang/test/CIR/Lowering/int-wrap.cir
new file mode 100644
index 000000000000..b6b8bd385b89
--- /dev/null
+++ b/clang/test/CIR/Lowering/int-wrap.cir
@@ -0,0 +1,24 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @test(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["len", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %2 = cir.const #cir.int<42> : !s32i
+    %3 = cir.binop(sub, %1, %2) nsw : !s32i
+    %4 = cir.binop(sub, %1, %2) nuw : !s32i
+    %5 = cir.binop(sub, %1, %2) : !s32i
+    cir.return
+  }
+}
+
+// MLIR:      llvm.sub {{.*}}, {{.*}} overflow<nsw>  : i32
+// MLIR-NEXT: llvm.sub {{.*}}, {{.*}} overflow<nuw>  : i32
+// MLIR-NEXT: llvm.sub {{.*}}, {{.*}}  : i32
+
+// LLVM:      sub nsw i32 {{.*}}, {{.*}}, !dbg !9
+// LLVM-NEXT: sub nuw i32 {{.*}}, {{.*}}, !dbg !10
+// LLVM-NEXT: sub i32 {{.*}}, {{.*}}, !dbg !11
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/intrinsics.cir b/clang/test/CIR/Lowering/intrinsics.cir
new file mode 100644
index 000000000000..25b0b34738bc
--- /dev/null
+++ b/clang/test/CIR/Lowering/intrinsics.cir
@@ -0,0 +1,23 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+
+module {
+  cir.func @test_unreachable() {
+    cir.unreachable
+  }
+
+  //      MLIR: llvm.func @test_unreachable()
+  // MLIR-NEXT:   llvm.unreachable
+
+  cir.func @test_trap() {
+    cir.trap
+  }
+
+  //      MLIR: llvm.func @test_trap()
+  // MLIR-NEXT:   "llvm.intr.trap"() : () -> ()
+  // MLIR-NEXT:   llvm.unreachable
+
+  //      LLVM: define void @test_trap()
+  // LLVM-NEXT:   call void @llvm.trap()
+  // LLVM-NEXT:   unreachable
+}
diff --git a/clang/test/CIR/Lowering/libc.cir b/clang/test/CIR/Lowering/libc.cir
new file mode 100644
index 000000000000..5be5d44cd3c6
--- /dev/null
+++ b/clang/test/CIR/Lowering/libc.cir
@@ -0,0 +1,18 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!void = !cir.void
+!u64i = !cir.int<u, 64>
+module {
+  cir.func @shouldLowerLibcMemcpyBuiltin(%arg0: !cir.ptr<!void>, %arg1: !cir.ptr<!void>, %arg2: !u64i) {
+    cir.libc.memcpy %arg2 bytes from %arg0 to %arg1 : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+    // CHECK: "llvm.intr.memcpy"(%{{.+}}, %{{.+}}, %{{.+}}) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
+    cir.return
+  }
+
+  cir.func @shouldLowerLibcFAbsBuiltin(%arg0: !cir.double) -> !cir.double {
+    %0 = cir.fabs %arg0 : !cir.double
+    // CHECK: %0 = llvm.intr.fabs(%arg0) : (f64) -> f64
+    cir.return %0 : !cir.double
+  }
+}
diff --git a/clang/test/CIR/Lowering/loadstorealloca.cir b/clang/test/CIR/Lowering/loadstorealloca.cir
new file mode 100644
index 000000000000..5764d5afc8f5
--- /dev/null
+++ b/clang/test/CIR/Lowering/loadstorealloca.cir
@@ -0,0 +1,39 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir -check-prefix=MLIR
+!u32i = !cir.int<u, 32>
+
+module {
+  cir.func @foo() -> !u32i {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<1> : !u32i
+    cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    %2 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    cir.return %2 : !u32i
+  }
+
+  cir.func @test_volatile() -> !u32i {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<1> : !u32i
+    cir.store volatile %1, %0 : !u32i, !cir.ptr<!u32i>
+    %2 = cir.load volatile %0 : !cir.ptr<!u32i>, !u32i
+    cir.return %2 : !u32i
+  }
+}
+
+//      MLIR: module {
+// MLIR-NEXT:   func @foo() -> i32
+// MLIR-NEXT:     %0 = llvm.mlir.constant(1 : index) : i64
+// MLIR-NEXT:     %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR-NEXT:     %2 = llvm.mlir.constant(1 : i32) : i32
+// MLIR-NEXT:     llvm.store %2, %1 {{.*}}: i32, !llvm.ptr
+// MLIR-NEXT:     %3 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR-NEXT:     return %3 : i32
+
+
+//      MLIR:   func @test_volatile() -> i32
+// MLIR-NEXT:     %0 = llvm.mlir.constant(1 : index) : i64
+// MLIR-NEXT:     %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR-NEXT:     %2 = llvm.mlir.constant(1 : i32) : i32
+// MLIR-NEXT:     llvm.store volatile %2, %1 {{.*}}: i32, !llvm.ptr
+// MLIR-NEXT:     %3 = llvm.load volatile %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR-NEXT:     return %3 : i32
diff --git a/clang/test/CIR/Lowering/loop.cir b/clang/test/CIR/Lowering/loop.cir
new file mode 100644
index 000000000000..d15479a76a0d
--- /dev/null
+++ b/clang/test/CIR/Lowering/loop.cir
@@ -0,0 +1,126 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+#true = #cir.bool<true> : !cir.bool
+!s32i = !cir.int<s, 32>
+
+
+module {
+
+  cir.func @testFor(%arg0 : !cir.bool) {
+    cir.for : cond {
+      cir.condition(%arg0)
+    } body {
+      cir.yield
+    } step {
+      cir.yield
+    }
+    cir.return
+  }
+
+// CHECK: @testFor
+// CHECK:    llvm.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    llvm.br ^bb[[#STEP:]]
+// CHECK:  ^bb[[#STEP]]:
+// CHECK:    llvm.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+
+
+
+  // Test while cir.loop operation lowering.
+  cir.func @testWhile(%arg0 : !cir.bool) {
+    cir.while {
+      cir.condition(%arg0)
+    } do {
+      cir.yield
+    }
+    cir.return
+  }
+
+// CHECK: @testWhile
+// CHECK:    llvm.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    llvm.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+
+
+
+  // Test do-while cir.loop operation lowering.
+  cir.func @testDoWhile(%arg0 : !cir.bool) {
+    cir.do {
+      cir.yield
+    } while {
+      cir.condition(%arg0)
+    }
+    cir.return
+  }
+
+// CHECK: @testDoWhile
+// CHECK:    llvm.br ^bb[[#BODY:]]
+// CHECK:  ^bb[[#COND:]]:
+// CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    llvm.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+
+
+
+  // test corner case 
+  // while (1) {
+  //     break;
+  // }
+  cir.func @testWhileWithBreakTerminatedBody(%arg0 : !cir.bool) {
+    cir.while {
+      cir.condition(%arg0)
+    } do {
+      cir.break
+    }
+    cir.return
+  }
+
+// CHECK: @testWhileWithBreakTerminatedBody
+// CHECK:    llvm.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    llvm.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+
+
+
+  // test C only corner case - no fails during the lowering
+  // for (;;) {
+  //     break;
+  // }
+  cir.func @forWithBreakTerminatedScopeInBody(%arg0 : !cir.bool) {
+      cir.for : cond {
+        cir.condition(%arg0)
+      } body {
+        cir.scope { // FIXME(cir): Redundant scope emitted during C codegen.
+          cir.break
+        }
+        cir.yield
+      } step {
+        cir.yield
+      }
+    cir.return
+  }
+
+// CHECK: @forWithBreakTerminatedScopeInBody
+// CHECK:    llvm.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND:]]:
+// CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    llvm.br ^bb[[#SCOPE_IN:]]
+// CHECK:  ^bb[[#SCOPE_IN]]:
+// CHECK:    llvm.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#SCOPE_EXIT:]]:
+// CHECK:    llvm.br ^bb[[#STEP:]]
+// CHECK:  ^bb[[#STEP]]:
+// CHECK:    llvm.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+}
diff --git a/clang/test/CIR/Lowering/loops-with-break.cir b/clang/test/CIR/Lowering/loops-with-break.cir
new file mode 100644
index 000000000000..34b6bfd7618e
--- /dev/null
+++ b/clang/test/CIR/Lowering/loops-with-break.cir
@@ -0,0 +1,278 @@
+// RUN: cir-opt %s -cir-to-llvm -reconcile-unrealized-casts -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @testFor() {
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<1> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
+        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
+        cir.condition(%5)
+      } body {
+        cir.scope {
+          cir.scope {
+            %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+            %3 = cir.const #cir.int<5> : !s32i
+            %4 = cir.cmp(eq, %2, %3) : !s32i, !s32i
+            %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
+            cir.if %5 {
+              cir.break
+            }
+          }
+        }
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // CHECK:  llvm.func @testFor()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preBREAK1:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#preBREAK1]]:
+  // CHECK:    llvm.br ^bb[[#preBREAK2:]]
+  // CHECK:  ^bb[[#preBREAK2]]:
+  // CHECK:    llvm.br ^bb[[#BREAK:]]
+  // CHECK:  ^bb[[#BREAK]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preEXIT1:]], ^bb[[#preBODY0:]]
+  // CHECK:  ^bb[[#preEXIT1]]:
+  // CHECK:    llvm.br ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#preBODY0]]:
+  // CHECK:    llvm.br ^bb[[#preBODY1:]]
+  // CHECK:  ^bb[[#preBODY1]]:
+  // CHECK:    llvm.br ^bb[[#BODY:]]
+  // CHECK:  ^bb[[#BODY]]:
+  // CHECK:    llvm.br ^bb[[#STEP:]]
+  // CHECK:  ^bb[[#STEP]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+  cir.func @testForNested() {
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<1> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
+        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
+        cir.condition(%5)
+      } body {
+        cir.scope {
+          cir.scope {
+            %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["j", init] {alignment = 4 : i64}
+            %3 = cir.const #cir.int<1> : !s32i
+            cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+            cir.for : cond {
+              %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+              %5 = cir.const #cir.int<10> : !s32i
+              %6 = cir.cmp(lt, %4, %5) : !s32i, !s32i
+              %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+              cir.condition(%7)
+            } body {
+              cir.scope {
+                cir.scope {
+                  %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+                  %5 = cir.const #cir.int<5> : !s32i
+                  %6 = cir.cmp(eq, %4, %5) : !s32i, !s32i
+                  %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+                  cir.if %7 {
+                    cir.break
+                  }
+                }
+              }
+              cir.yield
+            } step {
+              %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+              %5 = cir.unary(inc, %4) : !s32i, !s32i
+              cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+              cir.yield
+            }
+          }
+        }
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // CHECK:  llvm.func @testForNested()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preNESTED1:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#preNESTED1]]:
+  // CHECK:    llvm.br ^bb[[#preNESTED2:]]
+  // CHECK:  ^bb[[#preNESTED2]]:
+  // CHECK:    llvm.br ^bb[[#NESTED:]]
+  // CHECK:  ^bb[[#NESTED]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND_NESTED:]]
+  // CHECK:  ^bb[[#COND_NESTED]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preBREAK1:]], ^bb[[#EXIT_NESTED:]]
+  // CHECK:  ^bb[[#preBREAK1]]:
+  // CHECK:    llvm.br ^bb[[#preBREAK2:]]
+  // CHECK:  ^bb[[#preBREAK2]]:
+  // CHECK:    llvm.br ^bb[[#BREAK:]]
+  // CHECK:  ^bb[[#BREAK]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preEXIT2:]], ^bb[[#preBODY0:]]
+  // CHECK:  ^bb[[#preEXIT2]]:
+  // CHECK:    llvm.br ^bb[[#EXIT_NESTED:]]
+  // CHECK:  ^bb[[#preBODY0]]:
+  // CHECK:    llvm.br ^bb[[#preBODY1:]]
+  // CHECK:  ^bb[[#preBODY1]]:
+  // CHECK:    llvm.br ^bb[[#BODY_NESTED:]]
+  // CHECK:  ^bb[[#BODY_NESTED]]:
+  // CHECK:    llvm.br ^bb[[#STEP_NESTED:]]
+  // CHECK:  ^bb[[#STEP_NESTED]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND_NESTED:]]
+  // CHECK:  ^bb[[#EXIT_NESTED]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#BODY:]]
+  // CHECK:  ^bb[[#BODY]]:
+  // CHECK:    llvm.br ^bb[[#STEP:]]
+  // CHECK:  ^bb[[#STEP]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+  cir.func  @testWhile() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<0> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      cir.while {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
+        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
+        cir.condition(%5)
+      } do {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.scope {
+          %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+          %5 = cir.const #cir.int<5> : !s32i
+          %6 = cir.cmp(eq, %4, %5) : !s32i, !s32i
+          %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+          cir.if %7 {
+            cir.break
+          }
+        }
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+
+  // CHECK:  llvm.func @testWhile()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#BODY]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#BREAK:]]
+  // CHECK:  ^bb[[#BREAK]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preEXIT1:]], ^bb[[#preCOND0:]]
+  // CHECK:  ^bb[[#preEXIT1]]:
+  // CHECK:    llvm.br ^bb[[#preEXIT2:]]
+  // CHECK:  ^bb[[#preCOND0]]:
+  // CHECK:    llvm.br ^bb[[#preCOND1:]]
+  // CHECK:  ^bb[[#preCOND1]]:
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#preEXIT2]]:
+  // CHECK:    llvm.br ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+cir.func @testDoWhile() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<0> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      cir.do {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.scope {
+          %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+          %5 = cir.const #cir.int<5> : !s32i
+          %6 = cir.cmp(eq, %4, %5) : !s32i, !s32i
+          %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+          cir.if %7 {
+            cir.break
+          }
+        }
+        cir.yield
+      } while {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
+        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
+        cir.condition(%5)
+      }
+    }
+    cir.return
+  }
+
+  // CHECK:  llvm.func @testDoWhile()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#BODY]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#BREAK:]]
+  // CHECK:  ^bb[[#BREAK]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preEXIT1:]], ^bb[[#preCOND0:]]
+  // CHECK:  ^bb[[#preEXIT1]]:
+  // CHECK:    llvm.br ^bb[[#preEXIT2:]]
+  // CHECK:  ^bb[[#preCOND0]]:
+  // CHECK:    llvm.br ^bb[[#preCOND1:]]
+  // CHECK:  ^bb[[#preCOND1]]:
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#preEXIT2]]:
+  // CHECK:    llvm.br ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/loops-with-continue.cir b/clang/test/CIR/Lowering/loops-with-continue.cir
new file mode 100644
index 000000000000..0371d416b61d
--- /dev/null
+++ b/clang/test/CIR/Lowering/loops-with-continue.cir
@@ -0,0 +1,274 @@
+// RUN: cir-opt %s -cir-to-llvm -reconcile-unrealized-casts -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @testFor() {
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<1> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
+        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
+        cir.condition(%5)
+      } body {
+        cir.scope {
+          cir.scope {
+            %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+            %3 = cir.const #cir.int<5> : !s32i
+            %4 = cir.cmp(eq, %2, %3) : !s32i, !s32i
+            %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
+            cir.if %5 {
+              cir.continue
+            }
+          }
+        }
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // CHECK:  llvm.func @testFor()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preCONTINUE1:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#preCONTINUE1]]:
+  // CHECK:    llvm.br ^bb[[#preCONTINUE2:]]
+  // CHECK:  ^bb[[#preCONTINUE2]]:
+  // CHECK:    llvm.br ^bb[[#CONTINUE:]]
+  // CHECK:  ^bb[[#CONTINUE]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preSTEP:]], ^bb[[#preBODY0:]]
+  // CHECK:  ^bb[[#preSTEP]]:
+  // CHECK:    llvm.br ^bb[[#STEP:]]
+  // CHECK:  ^bb[[#preBODY0]]:
+  // CHECK:    llvm.br ^bb[[#preBODY1:]]
+  // CHECK:  ^bb[[#preBODY1]]:
+  // CHECK:    llvm.br ^bb[[#BODY:]]
+  // CHECK:  ^bb[[#BODY]]:
+  // CHECK:    llvm.br ^bb[[#STEP:]]
+  // CHECK:  ^bb[[#STEP]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+
+  cir.func @testForNested() {
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<1> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
+        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
+        cir.condition(%5)
+      } body {
+        cir.scope {
+          cir.scope {
+            %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["j", init] {alignment = 4 : i64}
+            %3 = cir.const #cir.int<1> : !s32i
+            cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+            cir.for : cond {
+              %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+              %5 = cir.const #cir.int<10> : !s32i
+              %6 = cir.cmp(lt, %4, %5) : !s32i, !s32i
+              %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+              cir.condition(%7)
+            } body {
+              cir.scope {
+                cir.scope {
+                  %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+                  %5 = cir.const #cir.int<5> : !s32i
+                  %6 = cir.cmp(eq, %4, %5) : !s32i, !s32i
+                  %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+                  cir.if %7 {
+                    cir.continue
+                  }
+                }
+              }
+              cir.yield
+            } step {
+              %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+              %5 = cir.unary(inc, %4) : !s32i, !s32i
+              cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+              cir.yield
+            }
+          }
+        }
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // CHECK:  llvm.func @testForNested()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preNESTED1:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#preNESTED1]]:
+  // CHECK:    llvm.br ^bb[[#preNESTED2:]]
+  // CHECK:  ^bb[[#preNESTED2]]:
+  // CHECK:    llvm.br ^bb[[#NESTED:]]
+  // CHECK:  ^bb[[#NESTED]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND_NESTED:]]
+  // CHECK:  ^bb[[#COND_NESTED]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preCONTINUE1:]], ^bb[[#EXIT_NESTED:]]
+  // CHECK:  ^bb[[#preCONTINUE1]]:
+  // CHECK:    llvm.br ^bb[[#preCONTINUE2:]]
+  // CHECK:  ^bb[[#preCONTINUE2]]:
+  // CHECK:    llvm.br ^bb[[#CONTINUE:]]
+  // CHECK:  ^bb[[#CONTINUE]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preSTEP0:]], ^bb[[#preBODY0:]]
+  // CHECK:  ^bb[[#preSTEP0]]:
+  // CHECK:    llvm.br ^bb[[#STEP_NESTED:]]
+  // CHECK:  ^bb[[#preBODY0]]:
+  // CHECK:    llvm.br ^bb[[#preBODY1:]]
+  // CHECK:  ^bb[[#preBODY1]]:
+  // CHECK:    llvm.br ^bb[[#BODY_NESTED:]]
+  // CHECK:  ^bb[[#BODY_NESTED]]:
+  // CHECK:    llvm.br ^bb[[#STEP_NESTED:]]
+  // CHECK:  ^bb[[#STEP_NESTED]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND_NESTED:]]
+  // CHECK:  ^bb[[#EXIT_NESTED]]:
+  // CHECK:    llvm.br ^bb[[#BODY:]]
+  // CHECK:  ^bb[[#BODY]]:
+  // CHECK:    llvm.br ^bb[[#STEP:]]
+  // CHECK:  ^bb[[#STEP]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+cir.func @testWhile() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<0> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      cir.while {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
+        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
+        cir.condition(%5)
+      } do {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.scope {
+          %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+          %5 = cir.const #cir.int<5> : !s32i
+          %6 = cir.cmp(eq, %4, %5) : !s32i, !s32i
+          %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+          cir.if %7 {
+            cir.continue
+          }
+        }
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // CHECK:  llvm.func @testWhile()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#BODY]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#CONTINUE:]]
+  // CHECK:  ^bb[[#CONTINUE]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preCOND0:]], ^bb[[#preCOND1:]]
+  // CHECK:  ^bb[[#preCOND0]]:
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#preCOND1]]:
+  // CHECK:    llvm.br ^bb[[#preCOND2:]]
+  // CHECK:  ^bb[[#preCOND2]]:
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+  cir.func @testDoWhile() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<0> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      cir.do {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.scope {
+          %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+          %5 = cir.const #cir.int<5> : !s32i
+          %6 = cir.cmp(eq, %4, %5) : !s32i, !s32i
+          %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+          cir.if %7 {
+            cir.continue
+          }
+        }
+        cir.yield
+      } while {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !s32i
+        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
+        cir.condition(%5)
+      }
+    }
+    cir.return
+  }
+
+
+  // CHECK:  llvm.func @testDoWhile()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#BODY]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#CONTINUE:]]
+  // CHECK:  ^bb[[#CONTINUE]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preCOND0:]], ^bb[[#preCOND1:]]
+  // CHECK:  ^bb[[#preCOND0]]:
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#preCOND1]]:
+  // CHECK:    llvm.br ^bb[[#preCOND2:]]
+  // CHECK:  ^bb[[#preCOND2]]:
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/ptrdiff.cir b/clang/test/CIR/Lowering/ptrdiff.cir
new file mode 100644
index 000000000000..ff1248ddad66
--- /dev/null
+++ b/clang/test/CIR/Lowering/ptrdiff.cir
@@ -0,0 +1,18 @@
+// RUN: cir-translate %s -cir-to-llvmir | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!u64i = !cir.int<u, 64>
+
+module {
+  cir.func @foo(%arg0: !cir.ptr<!s32i>, %arg1: !cir.ptr<!s32i>) -> !s32i {
+    %1 = cir.ptr_diff(%arg0, %arg1) : !cir.ptr<!s32i> -> !u64i
+    %2 = cir.cast(integral, %1 : !u64i), !s32i
+    cir.return %2 : !s32i
+  }
+}
+
+//      CHECK: %3 = ptrtoint ptr %0 to i64
+// CHECK-NEXT: %4 = ptrtoint ptr %1 to i64
+// CHECK-NEXT: %5 = sub i64 %3, %4
+// CHECK-NEXT: %6 = udiv i64 %5, 4
+// CHECK-NEXT: %7 = trunc i64 %6 to i32
diff --git a/clang/test/CIR/Lowering/ptrstride.cir b/clang/test/CIR/Lowering/ptrstride.cir
new file mode 100644
index 000000000000..cbe136fd8d30
--- /dev/null
+++ b/clang/test/CIR/Lowering/ptrstride.cir
@@ -0,0 +1,33 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @f(%arg0: !cir.ptr<!s32i>) {
+    %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["a", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.ptr_stride(%1 : !cir.ptr<!s32i>, %2 : !s32i), !cir.ptr<!s32i>
+    %4 = cir.load %3 : !cir.ptr<!s32i>, !s32i
+    cir.return
+  }
+  cir.func @g(%arg0: !cir.ptr<!s32i>, %2 : !s32i) {
+    %3 = cir.ptr_stride(%arg0 : !cir.ptr<!s32i>, %2 : !s32i), !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+// MLIR-LABEL: @f
+// MLIR:   %[[VAL_1:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:   %[[VAL_2:.*]] = llvm.alloca %[[VAL_1]] x !llvm.ptr {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// MLIR:   llvm.store {{.*}}, %[[VAL_2]] {{.*}}: !llvm.ptr, !llvm.ptr
+// MLIR:   %[[VAL_3:.*]] = llvm.load %[[VAL_2]] {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+// MLIR:   %[[VAL_4:.*]] = llvm.mlir.constant(1 : i32) : i32
+// MLIR:   %[[VAL_5:.*]] = llvm.sext %[[VAL_4]] : i32 to i64
+// MLIR:   %[[VAL_6:.*]] = llvm.getelementptr %[[VAL_3]]{{\[}}%[[VAL_5]]] : (!llvm.ptr, i64) -> !llvm.ptr, i32
+// MLIR:   %[[VAL_7:.*]] = llvm.load %[[VAL_6]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:   llvm.return
+
+// MLIR-LABEL: @g
+// MLIR: llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i32) -> !llvm.ptr, i32
diff --git a/clang/test/CIR/Lowering/region-simplify.cir b/clang/test/CIR/Lowering/region-simplify.cir
new file mode 100644
index 000000000000..5f32205cb032
--- /dev/null
+++ b/clang/test/CIR/Lowering/region-simplify.cir
@@ -0,0 +1,38 @@
+// RUN: cir-opt %s -canonicalize -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -canonicalize -o - | cir-translate -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["b", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<1> : !u32i
+    cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    cir.br ^bb2
+  ^bb1:  // no predecessors
+    %2 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %3 = cir.const #cir.int<1> : !u32i
+    %4 = cir.binop(add, %2, %3) : !u32i
+    cir.store %4, %0 : !u32i, !cir.ptr<!u32i>
+    cir.br ^bb2
+  ^bb2:  // 2 preds: ^bb0, ^bb1
+    %5 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %6 = cir.const #cir.int<2> : !u32i
+    %7 = cir.binop(add, %5, %6) : !u32i
+    cir.store %7, %0 : !u32i, !cir.ptr<!u32i>
+    cir.return
+  }
+
+  //      MLIR: module {
+// MLIR-NEXT: llvm.func @foo
+//      MLIR: llvm.br ^bb1
+//      MLIR: ^bb1:
+//      MLIR: return
+
+//      LLVM: br label %[[Value:[0-9]+]]
+// LLVM-EMPTY:
+// LLVM-NEXT: [[Value]]:              ; preds =
+//      LLVM: ret void
+
+
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/scope.cir b/clang/test/CIR/Lowering/scope.cir
new file mode 100644
index 000000000000..add46429cba2
--- /dev/null
+++ b/clang/test/CIR/Lowering/scope.cir
@@ -0,0 +1,78 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+!u32i = !cir.int<u, 32>
+
+module {
+  cir.func @foo() {
+    cir.scope {
+      %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<4> : !u32i
+      cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    }
+    cir.return
+  }
+
+//      MLIR: llvm.func @foo()
+// MLIR-NEXT:   llvm.br ^bb1
+// MLIR-NEXT: ^bb1:
+//  MLIR-DAG:   [[v1:%[0-9]]] = llvm.mlir.constant(4 : i32) : i32
+//  MLIR-DAG:   [[v2:%[0-9]]] = llvm.mlir.constant(1 : index) : i64
+//  MLIR-DAG:   [[v3:%[0-9]]] = llvm.alloca [[v2]] x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR-NEXT:   llvm.store [[v1]], [[v3]] {{.*}}: i32, !llvm.ptr
+// MLIR-NEXT:   llvm.br ^bb2
+// MLIR-NEXT: ^bb2:
+// MLIR-NEXT:   llvm.return
+
+
+//      LLVM: define void @foo()
+//  LLVM-NEXT:   br label %1
+// LLVM-EMPTY:
+//  LLVM-NEXT: 1:
+//  LLVM-NEXT:   %2 = alloca i32, i64 1, align 4
+//  LLVM-NEXT:   store i32 4, ptr %2, align 4
+//  LLVM-NEXT:   br label %3
+// LLVM-EMPTY:
+//  LLVM-NEXT: 3:
+//  LLVM-NEXT:   ret void
+//  LLVM-NEXT: }
+
+
+  // Should drop empty scopes.
+  cir.func @empty_scope() {
+    cir.scope {
+    }
+    cir.return
+  }
+  //      MLIR: llvm.func @empty_scope()
+  // MLIR-NEXT:   llvm.return
+  // MLIR-NEXT: }
+
+
+  cir.func @scope_with_return() -> !u32i {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"] {alignment = 4 : i64}
+    cir.scope {
+      %2 = cir.const #cir.int<0> : !u32i
+      cir.store %2, %0 : !u32i, !cir.ptr<!u32i>
+      %3 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+      cir.return %3 : !u32i
+    }
+    %1 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    cir.return %1 : !u32i
+  }
+
+  //      MLIR: llvm.func @scope_with_return()
+  // MLIR-NEXT:  [[v0:%.*]] = llvm.mlir.constant(1 : index) : i64
+  // MLIR-NEXT:  [[v1:%.*]] = llvm.alloca [[v0]] x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+  // MLIR-NEXT:  llvm.br ^bb1
+  // MLIR-NEXT: ^bb1:  // pred: ^bb0
+  // MLIR-NEXT:  [[v2:%.*]] = llvm.mlir.constant(0 : i32) : i32
+  // MLIR-NEXT:  llvm.store [[v2]], [[v1]] {{.*}}: i32, !llvm.ptr
+  // MLIR-NEXT:  [[v3:%.*]] = llvm.load [[v1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // MLIR-NEXT:  llvm.return [[v3]] : i32
+  // MLIR-NEXT: ^bb2:  // no predecessors
+  // MLIR-NEXT:  [[v4:%.*]] = llvm.load [[v1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // MLIR-NEXT:  llvm.return [[v4]] : i32
+  // MLIR-NEXT: }
+
+  }
diff --git a/clang/test/CIR/Lowering/select.cir b/clang/test/CIR/Lowering/select.cir
new file mode 100644
index 000000000000..1836210d6a7c
--- /dev/null
+++ b/clang/test/CIR/Lowering/select.cir
@@ -0,0 +1,50 @@
+// RUN: cir-translate -cir-to-llvmir -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll -check-prefix=LLVM %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @select_int(%arg0 : !cir.bool, %arg1 : !s32i, %arg2 : !s32i) -> !s32i {
+    %0 = cir.select if %arg0 then %arg1 else %arg2 : (!cir.bool, !s32i, !s32i) -> !s32i
+    cir.return %0 : !s32i
+  }
+
+  //      LLVM: define i32 @select_int(i8 %[[#COND:]], i32 %[[#TV:]], i32 %[[#FV:]])
+  // LLVM-NEXT:   %[[#CONDF:]] = trunc i8 %[[#COND]] to i1
+  // LLVM-NEXT:   %[[#RES:]] = select i1 %[[#CONDF]], i32 %[[#TV]], i32 %[[#FV]]
+  // LLVM-NEXT:   ret i32 %[[#RES]]
+  // LLVM-NEXT: }
+
+  cir.func @select_bool(%arg0 : !cir.bool, %arg1 : !cir.bool, %arg2 : !cir.bool) -> !cir.bool {
+    %0 = cir.select if %arg0 then %arg1 else %arg2 : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+    cir.return %0 : !cir.bool
+  }
+
+  //      LLVM: define i8 @select_bool(i8 %[[#COND:]], i8 %[[#TV:]], i8 %[[#FV:]])
+  // LLVM-NEXT:   %[[#CONDF:]] = trunc i8 %[[#COND]] to i1
+  // LLVM-NEXT:   %[[#RES:]] = select i1 %[[#CONDF]], i8 %[[#TV]], i8 %[[#FV]]
+  // LLVM-NEXT:   ret i8 %[[#RES]]
+  // LLVM-NEXT: }
+
+  cir.func @logical_and(%arg0 : !cir.bool, %arg1 : !cir.bool) -> !cir.bool {
+    %0 = cir.const #cir.bool<false> : !cir.bool
+    %1 = cir.select if %arg0 then %arg1 else %0 : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+    cir.return %1 : !cir.bool
+  }
+
+  //      LLVM: define i8 @logical_and(i8 %[[#ARG0:]], i8 %[[#ARG1:]])
+  // LLVM-NEXT:   %[[#RES:]] = and i8 %[[#ARG0]], %[[#ARG1]]
+  // LLVM-NEXT:   ret i8 %[[#RES]]
+  // LLVM-NEXT: }
+
+  cir.func @logical_or(%arg0 : !cir.bool, %arg1 : !cir.bool) -> !cir.bool {
+    %0 = cir.const #cir.bool<true> : !cir.bool
+    %1 = cir.select if %arg0 then %0 else %arg1 : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+    cir.return %1 : !cir.bool
+  }
+
+  //      LLVM: define i8 @logical_or(i8 %[[#ARG0:]], i8 %[[#ARG1:]])
+  // LLVM-NEXT:   %[[#RES:]] = or i8 %[[#ARG0]], %[[#ARG1]]
+  // LLVM-NEXT:   ret i8 %[[#RES]]
+  // LLVM-NEXT: }
+}
diff --git a/clang/test/CIR/Lowering/shift.cir b/clang/test/CIR/Lowering/shift.cir
new file mode 100644
index 000000000000..78a7f89e13d0
--- /dev/null
+++ b/clang/test/CIR/Lowering/shift.cir
@@ -0,0 +1,28 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!u16i = !cir.int<u, 16>
+module {
+  cir.func @testShiftWithDifferentValueAndAmountTypes(%arg0: !s16i, %arg1: !s32i, %arg2: !s64i, %arg3: !u16i) {
+  // CHECK: testShiftWithDifferentValueAndAmountTypes
+
+    // Should allow shift with larger amount type.
+    %1 = cir.shift(left, %arg1: !s32i, %arg2 : !s64i) -> !s32i
+    // CHECK: %[[#CAST:]] = llvm.trunc %{{.+}} : i64 to i32
+    // CHECK: llvm.shl %{{.+}}, %[[#CAST]]  : i32
+
+    // Should allow shift with signed smaller amount type.
+    %2 = cir.shift(left, %arg1 : !s32i, %arg0 : !s16i) -> !s32i
+    // CHECK: %[[#CAST:]] = llvm.sext %{{.+}} : i16 to i32
+    // CHECK: llvm.shl %{{.+}}, %[[#CAST]]  : i32
+
+    // Should allow shift with unsigned smaller amount type.
+    %14 = cir.shift(left, %arg1 : !s32i, %arg3 : !u16i) -> !s32i
+    // CHECK: %[[#CAST:]] = llvm.zext %{{.+}} : i16 to i32
+    // CHECK: llvm.shl %{{.+}}, %[[#CAST]]  : i32
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/stack-save-restore.cir b/clang/test/CIR/Lowering/stack-save-restore.cir
new file mode 100644
index 000000000000..ad9dee66b53f
--- /dev/null
+++ b/clang/test/CIR/Lowering/stack-save-restore.cir
@@ -0,0 +1,19 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+
+!u8i = !cir.int<u, 8>
+
+module  {
+  cir.func @stack_save() {
+    %0 = cir.stack_save : !cir.ptr<!u8i>
+    cir.stack_restore %0 : !cir.ptr<!u8i>
+    cir.return
+  }
+}
+
+//      MLIR: module {
+// MLIR-NEXT:  llvm.func @stack_save
+// MLIR-NEXT:    %0 = llvm.intr.stacksave : !llvm.ptr
+// MLIR-NEXT:    llvm.intr.stackrestore %0 : !llvm.ptr
+// MLIR-NEXT:    llvm.return
+// MLIR-NEXT:  }
+// MLIR-NEXT: }
diff --git a/clang/test/CIR/Lowering/static-array.c b/clang/test/CIR/Lowering/static-array.c
new file mode 100644
index 000000000000..60cfce0245d6
--- /dev/null
+++ b/clang/test/CIR/Lowering/static-array.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+int test(int x) {
+  static int arr[10] = {0, 1, 0, 0};
+  return arr[x];
+}
+// LLVM: internal global [10 x i32] [i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0]
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/struct-init.c b/clang/test/CIR/Lowering/struct-init.c
new file mode 100644
index 000000000000..a8b84e9d20d9
--- /dev/null
+++ b/clang/test/CIR/Lowering/struct-init.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+struct S {
+    int x;
+};
+
+// LLVM: define dso_local void @zeroInit
+// LLVM: [[TMP0:%.*]] = alloca %struct.S, i64 1
+// LLVM: store %struct.S zeroinitializer, ptr [[TMP0]]
+void zeroInit() {
+  struct S s = {0};
+}
diff --git a/clang/test/CIR/Lowering/struct.cir b/clang/test/CIR/Lowering/struct.cir
new file mode 100644
index 000000000000..a1a3d352c8a1
--- /dev/null
+++ b/clang/test/CIR/Lowering/struct.cir
@@ -0,0 +1,96 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+!u8i = !cir.int<u, 8>
+!u32i = !cir.int<u, 32>
+!ty_S = !cir.struct<struct "S" {!u8i, !s32i}>
+!ty_S2A = !cir.struct<struct "S2A" {!s32i} #cir.record.decl.ast>
+!ty_S1_ = !cir.struct<struct "S1" {!s32i, !cir.float, !cir.ptr<!s32i>} #cir.record.decl.ast>
+!ty_S2_ = !cir.struct<struct "S2" {!ty_S2A} #cir.record.decl.ast>
+!ty_S3_ = !cir.struct<struct "S3" {!s32i} #cir.record.decl.ast>
+
+module {
+  cir.func @test() {
+    %1 = cir.alloca !ty_S, !cir.ptr<!ty_S>, ["x"] {alignment = 4 : i64}
+    // CHECK: %[[#ARRSIZE:]] = llvm.mlir.constant(1 : index) : i64
+    // CHECK: %[[#STRUCT:]] = llvm.alloca %[[#ARRSIZE]] x !llvm.struct<"struct.S", (i8, i32)>
+    %3 = cir.get_member %1[0] {name = "c"} : !cir.ptr<!ty_S> -> !cir.ptr<!u8i>
+    // CHECK: = llvm.getelementptr %[[#STRUCT]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.S", (i8, i32)>
+    %5 = cir.get_member %1[1] {name = "i"} : !cir.ptr<!ty_S> -> !cir.ptr<!s32i>
+    // CHECK: = llvm.getelementptr %[[#STRUCT]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.S", (i8, i32)>
+    cir.return
+  }
+
+  cir.func @shouldConstInitLocalStructsWithConstStructAttr() {
+    %0 = cir.alloca !ty_S2A, !cir.ptr<!ty_S2A>, ["s"] {alignment = 4 : i64}
+    %1 = cir.const #cir.const_struct<{#cir.int<1> : !s32i}> : !ty_S2A
+    cir.store %1, %0 : !ty_S2A, !cir.ptr<!ty_S2A>
+    cir.return
+  }
+  // CHECK: llvm.func @shouldConstInitLocalStructsWithConstStructAttr()
+  // CHECK:   %0 = llvm.mlir.constant(1 : index) : i64
+  // CHECK:   %1 = llvm.alloca %0 x !llvm.struct<"struct.S2A", (i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+  // CHECK:   %2 = llvm.mlir.undef : !llvm.struct<"struct.S2A", (i32)>
+  // CHECK:   %3 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %4 = llvm.insertvalue %3, %2[0] : !llvm.struct<"struct.S2A", (i32)>
+  // CHECK:   llvm.store %4, %1 {{.*}}: !llvm.struct<"struct.S2A", (i32)>, !llvm.ptr
+  // CHECK:   llvm.return
+  // CHECK: }
+
+  // Should lower basic #cir.const_struct initializer.
+  cir.global external @s1 = #cir.const_struct<{#cir.int<1> : !s32i, #cir.fp<1.000000e-01> : !cir.float, #cir.ptr<null> : !cir.ptr<!s32i>}> : !ty_S1_
+  // CHECK: llvm.mlir.global external @s1() {addr_space = 0 : i32} : !llvm.struct<"struct.S1", (i32, f32, ptr)> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.struct<"struct.S1", (i32, f32, ptr)>
+  // CHECK:   %1 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %2 = llvm.insertvalue %1, %0[0] : !llvm.struct<"struct.S1", (i32, f32, ptr)>
+  // CHECK:   %3 = llvm.mlir.constant(0.099999994 : f32) : f32
+  // CHECK:   %4 = llvm.insertvalue %3, %2[1] : !llvm.struct<"struct.S1", (i32, f32, ptr)>
+  // CHECK:   %5 = llvm.mlir.zero : !llvm.ptr
+  // CHECK:   %6 = llvm.insertvalue %5, %4[2] : !llvm.struct<"struct.S1", (i32, f32, ptr)>
+  // CHECK:   llvm.return %6 : !llvm.struct<"struct.S1", (i32, f32, ptr)>
+  // CHECK: }
+
+  // Should lower nested #cir.const_struct initializer.
+  cir.global external @s2 = #cir.const_struct<{#cir.const_struct<{#cir.int<1> : !s32i}> : !ty_S2A}> : !ty_S2_
+  // CHECK: llvm.mlir.global external @s2() {addr_space = 0 : i32} : !llvm.struct<"struct.S2", (struct<"struct.S2A", (i32)>)> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.struct<"struct.S2", (struct<"struct.S2A", (i32)>)>
+  // CHECK:   %1 = llvm.mlir.undef : !llvm.struct<"struct.S2A", (i32)>
+  // CHECK:   %2 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %3 = llvm.insertvalue %2, %1[0] : !llvm.struct<"struct.S2A", (i32)>
+  // CHECK:   %4 = llvm.insertvalue %3, %0[0] : !llvm.struct<"struct.S2", (struct<"struct.S2A", (i32)>)>
+  // CHECK:   llvm.return %4 : !llvm.struct<"struct.S2", (struct<"struct.S2A", (i32)>)>
+  // CHECK: }
+
+  cir.global external @s3 = #cir.const_array<[#cir.const_struct<{#cir.int<1> : !s32i}> : !ty_S3_, #cir.const_struct<{#cir.int<2> : !s32i}> : !ty_S3_, #cir.const_struct<{#cir.int<3> : !s32i}> : !ty_S3_]> : !cir.array<!ty_S3_ x 3>
+  // CHECK: llvm.mlir.global external @s3() {addr_space = 0 : i32} : !llvm.array<3 x struct<"struct.S3", (i32)>> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.array<3 x struct<"struct.S3", (i32)>>
+  // CHECK:   %1 = llvm.mlir.undef : !llvm.struct<"struct.S3", (i32)>
+  // CHECK:   %2 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %3 = llvm.insertvalue %2, %1[0] : !llvm.struct<"struct.S3", (i32)>
+  // CHECK:   %4 = llvm.insertvalue %3, %0[0] : !llvm.array<3 x struct<"struct.S3", (i32)>>
+  // CHECK:   %5 = llvm.mlir.undef : !llvm.struct<"struct.S3", (i32)>
+  // CHECK:   %6 = llvm.mlir.constant(2 : i32) : i32
+  // CHECK:   %7 = llvm.insertvalue %6, %5[0] : !llvm.struct<"struct.S3", (i32)>
+  // CHECK:   %8 = llvm.insertvalue %7, %4[1] : !llvm.array<3 x struct<"struct.S3", (i32)>>
+  // CHECK:   %9 = llvm.mlir.undef : !llvm.struct<"struct.S3", (i32)>
+  // CHECK:   %10 = llvm.mlir.constant(3 : i32) : i32
+  // CHECK:   %11 = llvm.insertvalue %10, %9[0] : !llvm.struct<"struct.S3", (i32)>
+  // CHECK:   %12 = llvm.insertvalue %11, %8[2] : !llvm.array<3 x struct<"struct.S3", (i32)>>
+  // CHECK:   llvm.return %12 : !llvm.array<3 x struct<"struct.S3", (i32)>>
+  // CHECK: }
+
+  cir.func @shouldLowerStructCopies() {
+  // CHECK: llvm.func @shouldLowerStructCopies()
+    %1 = cir.alloca !ty_S, !cir.ptr<!ty_S>, ["a"] {alignment = 4 : i64}
+    // CHECK: %[[#ONE:]] = llvm.mlir.constant(1 : index) : i64
+    // CHECK: %[[#SA:]] = llvm.alloca %[[#ONE]] x !llvm.struct<"struct.S", (i8, i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+    %2 = cir.alloca !ty_S, !cir.ptr<!ty_S>, ["b", init] {alignment = 4 : i64}
+    // CHECK: %[[#ONE:]] = llvm.mlir.constant(1 : index) : i64
+    // CHECK: %[[#SB:]] = llvm.alloca %[[#ONE]] x !llvm.struct<"struct.S", (i8, i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+    cir.copy %1 to %2 : !cir.ptr<!ty_S>
+    // CHECK: %[[#SIZE:]] = llvm.mlir.constant(8 : i32) : i32
+    // CHECK: "llvm.intr.memcpy"(%[[#SB]], %[[#SA]], %[[#SIZE]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/switch.cir b/clang/test/CIR/Lowering/switch.cir
new file mode 100644
index 000000000000..dee8e98db858
--- /dev/null
+++ b/clang/test/CIR/Lowering/switch.cir
@@ -0,0 +1,185 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s8i = !cir.int<s, 8>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+
+module {
+  cir.func @shouldLowerSwitchWithDefault(%arg0: !s8i) {
+    cir.switch (%arg0 : !s8i) [
+    // CHECK: llvm.switch %arg0 : i8, ^bb[[#DEFAULT:]] [
+    // CHECK:   1: ^bb[[#CASE1:]]
+    // CHECK: ]
+    case (equal, 1) {
+      cir.break
+    },
+    // CHECK: ^bb[[#CASE1]]:
+    // CHECK:   llvm.br ^bb[[#EXIT:]]
+    case (default) {
+      cir.break
+    }
+    // CHECK: ^bb[[#DEFAULT]]:
+    // CHECK:   llvm.br ^bb[[#EXIT]]
+    ]
+    // CHECK: ^bb[[#EXIT]]:
+    cir.return
+  }
+
+
+  cir.func @shouldLowerSwitchWithoutDefault(%arg0: !s32i) {
+    cir.switch (%arg0 : !s32i) [
+    // Default block is the exit block:
+    // CHECK: llvm.switch %arg0 : i32, ^bb[[#EXIT:]] [
+    // CHECK:   1: ^bb[[#CASE1:]]
+    // CHECK: ]
+    case (equal, 1) {
+      cir.break
+    }
+    // CHECK: ^bb[[#CASE1]]:
+    // CHECK:   llvm.br ^bb[[#EXIT]]
+    ]
+    // CHECK: ^bb[[#EXIT]]:
+    cir.return
+  }
+
+
+  cir.func @shouldLowerSwitchWithImplicitFallthrough(%arg0: !s64i) {
+    cir.switch (%arg0 : !s64i) [
+    // CHECK: llvm.switch %arg0 : i64, ^bb[[#EXIT:]] [
+    // CHECK:   1: ^bb[[#CASE1N2:]],
+    // CHECK:   2: ^bb[[#CASE1N2]]
+    // CHECK: ]
+    case (anyof, [1, 2] : !s64i) { // case 1 and 2 use same region
+      cir.break
+    }
+    // CHECK: ^bb[[#CASE1N2]]:
+    // CHECK:   llvm.br ^bb[[#EXIT]]
+    ]
+    // CHECK: ^bb[[#EXIT]]:
+    cir.return
+  }
+
+
+  cir.func @shouldLowerSwitchWithExplicitFallthrough(%arg0: !s64i) {
+      cir.switch (%arg0 : !s64i) [
+      // CHECK: llvm.switch %arg0 : i64, ^bb[[#EXIT:]] [
+      // CHECK:   1: ^bb[[#CASE1:]],
+      // CHECK:   2: ^bb[[#CASE2:]]
+      // CHECK: ]
+      case (equal, 1 : !s64i) { // case 1 has its own region
+        cir.yield // fallthrough to case 2
+      },
+      // CHECK: ^bb[[#CASE1]]:
+      // CHECK:   llvm.br ^bb[[#CASE2]]
+      case (equal, 2 : !s64i) {
+        cir.break
+      }
+      // CHECK: ^bb[[#CASE2]]:
+      // CHECK:   llvm.br ^bb[[#EXIT]]
+      ]
+      // CHECK: ^bb[[#EXIT]]:
+    cir.return
+  }
+
+
+  cir.func @shouldLowerSwitchWithFallthroughToExit(%arg0: !s64i) {
+      cir.switch (%arg0 : !s64i) [
+      // CHECK: llvm.switch %arg0 : i64, ^bb[[#EXIT:]] [
+      // CHECK:   1: ^bb[[#CASE1:]]
+      // CHECK: ]
+      case (equal, 1 : !s64i) {
+        cir.yield // fallthrough to exit
+      }
+      // CHECK: ^bb[[#CASE1]]:
+      // CHECK:   llvm.br ^bb[[#EXIT]]
+      ]
+      // CHECK: ^bb[[#EXIT]]:
+    cir.return
+  }
+
+
+  cir.func @shouldDropEmptySwitch(%arg0: !s64i) {
+    cir.switch (%arg0 : !s64i) [
+    ]
+    // CHECK-NOT: llvm.switch
+    cir.return
+  }
+
+  cir.func @shouldLowerMultiBlockCase(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.switch (%1 : !s32i) [
+      case (equal, 3) {
+        cir.return
+      ^bb1:  // no predecessors
+        cir.break
+      }
+      ]
+    }
+    cir.return
+  }
+  // CHECK: llvm.func @shouldLowerMultiBlockCase
+  // CHECK: ^bb1:  // pred: ^bb0
+  // CHECK:   llvm.switch {{.*}} : i32, ^bb4 [
+  // CHECK:     3: ^bb2
+  // CHECK:   ]
+  // CHECK: ^bb2:  // pred: ^bb1
+  // CHECK:   llvm.return
+  // CHECK: ^bb3:  // no predecessors
+  // CHECK:   llvm.br ^bb4
+  // CHECK: ^bb4:  // 2 preds: ^bb1, ^bb3
+  // CHECK:   llvm.br ^bb5
+  // CHECK: ^bb5:  // pred: ^bb4
+  // CHECK:   llvm.return
+  // CHECK: }
+
+  cir.func @shouldLowerNestedBreak(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.switch (%5 : !s32i) [
+      case (equal, 0) {
+        cir.scope {
+          %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+          %7 = cir.const #cir.int<0> : !s32i
+          %8 = cir.cmp(ge, %6, %7) : !s32i, !s32i
+          %9 = cir.cast(int_to_bool, %8 : !s32i), !cir.bool
+          cir.if %9 {
+            cir.break
+          }
+        }
+        cir.break
+      }
+      ]
+    }
+    %3 = cir.const #cir.int<3> : !s32i
+    cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+    %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    cir.return %4 : !s32i
+  }
+  // CHECK:  llvm.func @shouldLowerNestedBreak
+  // CHECK:    llvm.switch %6 : i32, ^bb7 [
+  // CHECK:      0: ^bb2
+  // CHECK:    ]
+  // CHECK:  ^bb2:  // pred: ^bb1
+  // CHECK:    llvm.br ^bb3
+  // CHECK:  ^bb3:  // pred: ^bb2
+  // CHECK:    llvm.cond_br {{%.*}}, ^bb4, ^bb5
+  // CHECK:  ^bb4:  // pred: ^bb3
+  // CHECK:    llvm.br ^bb7
+  // CHECK:  ^bb5:  // pred: ^bb3
+  // CHECK:    llvm.br ^bb6
+  // CHECK:  ^bb6:  // pred: ^bb5
+  // CHECK:    llvm.br ^bb7
+  // CHECK:  ^bb7:  // 3 preds: ^bb1, ^bb4, ^bb6
+  // CHECK:    llvm.br ^bb8
+  // CHECK:  ^bb8:  // pred: ^bb7
+  // CHECK:    llvm.return
+}
diff --git a/clang/test/CIR/Lowering/ternary.cir b/clang/test/CIR/Lowering/ternary.cir
new file mode 100644
index 000000000000..6e469f388d79
--- /dev/null
+++ b/clang/test/CIR/Lowering/ternary.cir
@@ -0,0 +1,49 @@
+// RUN: cir-opt %s -cir-to-llvm -reconcile-unrealized-casts -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+
+module {
+cir.func @_Z1xi(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %3 = cir.const #cir.int<0> : !s32i
+    %4 = cir.cmp(gt, %2, %3) : !s32i, !cir.bool
+    %5 = cir.ternary(%4, true {
+      %7 = cir.const #cir.int<3> : !s32i
+      cir.yield %7 : !s32i
+    }, false {
+      %7 = cir.const #cir.int<5> : !s32i
+      cir.yield %7 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+    %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %6 : !s32i
+  }
+}
+
+//      MLIR:  llvm.func @_Z1xi(%arg0: i32) -> i32
+// MLIR-NEXT:    %0 = llvm.mlir.constant(1 : index) : i64
+// MLIR-NEXT:    %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR-NEXT:    %2 = llvm.mlir.constant(1 : index) : i64
+// MLIR-NEXT:    %3 = llvm.alloca %2 x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR-NEXT:    llvm.store %arg0, %1 {{.*}}: i32, !llvm.ptr
+// MLIR-NEXT:    %4 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR-NEXT:    %5 = llvm.mlir.constant(0 : i32) : i32
+// MLIR-NEXT:    %6 = llvm.icmp "sgt" %4, %5 : i32
+// MLIR-NEXT:    llvm.cond_br %6, ^bb1, ^bb2
+// MLIR-NEXT:  ^bb1:  // pred: ^bb0
+// MLIR-NEXT:    %7 = llvm.mlir.constant(3 : i32) : i32
+// MLIR-NEXT:    llvm.br ^bb3(%7 : i32)
+// MLIR-NEXT:  ^bb2:  // pred: ^bb0
+// MLIR-NEXT:    %8 = llvm.mlir.constant(5 : i32) : i32
+// MLIR-NEXT:    llvm.br ^bb3(%8 : i32)
+// MLIR-NEXT:  ^bb3(%9: i32):  // 2 preds: ^bb1, ^bb2
+// MLIR-NEXT:    llvm.br ^bb4
+// MLIR-NEXT:  ^bb4:  // pred: ^bb3
+// MLIR-NEXT:    llvm.store %9, %3 {{.*}}: i32, !llvm.ptr
+// MLIR-NEXT:    %10 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR-NEXT:    llvm.return %10 : i32
+// MLIR-NEXT:  }
diff --git a/clang/test/CIR/Lowering/try-catch.cpp b/clang/test/CIR/Lowering/try-catch.cpp
new file mode 100644
index 000000000000..b985ecab8cca
--- /dev/null
+++ b/clang/test/CIR/Lowering/try-catch.cpp
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-cir-flat %s -o %t.flat.cir
+// RUN: FileCheck --input-file=%t.flat.cir --check-prefix=CIR_FLAT %s
+// RUN_DISABLED: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-llvm %s -o %t.ll
+// RUN_DISABLED: FileCheck --input-file=%t.flat.cir --check-prefix=CIR_LLVM %s
+double division(int a, int b);
+
+// CIR: cir.func @_Z2tcv()
+// CIR_FLAT: cir.func @_Z2tcv()
+unsigned long long tc() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  // CIR_FLAT:     cir.alloca !s32i, !cir.ptr<!s32i>, ["a"
+  // CIR_FLAT:     cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["msg"]
+  // CIR_FLAT:     cir.alloca !s32i, !cir.ptr<!s32i>, ["idx"]
+  // CIR_FLAT:     cir.br ^bb2
+  try {
+    // CIR_FLAT:   ^bb2:  // pred: ^bb1
+    // CIR_FLAT:     cir.try_call @_Z8divisionii({{.*}}) ^[[CONT:.*]], ^[[LPAD:.*]] : (!s32i, !s32i)
+    int a = 4;
+    z = division(x, y);
+
+    // CIR_FLAT: ^[[CONT:.*]]:  // pred: ^bb2
+    // CIR_FLAT: cir.cast(float_to_int, %12 : !cir.double), !u64i
+    a++;
+    // CIR_FLAT: cir.br ^[[AFTER_TRY:.*]] loc
+
+    // CIR_FLAT: ^[[LPAD]]:  // pred: ^bb2
+    // CIR_FLAT:   %[[EH:.*]], %[[SEL:.*]] = cir.eh.inflight_exception [@_ZTIi, @_ZTIPKc]
+    // CIR_FLAT:   cir.br ^[[BB_INT_IDX_SEL:.*]](%[[EH]], %[[SEL]] : {{.*}}) loc
+  } catch (int idx) {
+    // CIR_FLAT: ^[[BB_INT_IDX_SEL]](%[[INT_IDX_EH:.*]]: !cir.ptr<!void> loc({{.*}}), %[[INT_IDX_SEL:.*]]: !u32i
+    // CIR_FLAT:   %[[INT_IDX_ID:.*]] = cir.eh.typeid @_ZTIi
+    // CIR_FLAT:   %[[MATCH_CASE_INT_IDX:.*]] = cir.cmp(eq, %[[INT_IDX_SEL]], %[[INT_IDX_ID]]) : !u32i, !cir.bool
+    // CIR_FLAT:   cir.brcond %[[MATCH_CASE_INT_IDX]] ^[[BB_INT_IDX_CATCH:.*]](%[[INT_IDX_EH]] : {{.*}}), ^[[BB_CHAR_MSG_CMP:.*]](%[[INT_IDX_EH]], %[[INT_IDX_SEL]] : {{.*}}) loc
+    // CIR_FLAT: ^[[BB_INT_IDX_CATCH]](%[[INT_IDX_CATCH_SLOT:.*]]: !cir.ptr<!void>
+    // CIR_FLAT:   %[[PARAM_INT_IDX:.*]] = cir.catch_param begin %[[INT_IDX_CATCH_SLOT]] -> !cir.ptr<!s32i>
+    // CIR_FLAT:   cir.const #cir.int<98>
+    // CIR_FLAT:   cir.br ^[[AFTER_TRY]] loc
+    z = 98;
+    idx++;
+  } catch (const char* msg) {
+    // CIR_FLAT: ^[[BB_CHAR_MSG_CMP]](%[[CHAR_MSG_EH:.*]]: !cir.ptr<!void> loc({{.*}}), %[[CHAR_MSG_SEL:.*]]: !u32i
+    // CIR_FLAT:   %[[CHAR_MSG_ID:.*]] = cir.eh.typeid @_ZTIPKc
+    // CIR_FLAT:   %[[MATCH_CASE_CHAR_MSG:.*]] = cir.cmp(eq, %[[CHAR_MSG_SEL]], %[[CHAR_MSG_ID]])
+    // CIR_FLAT:   cir.brcond %[[MATCH_CASE_CHAR_MSG]] ^[[BB_CHAR_MSG_CATCH:.*]](%[[CHAR_MSG_EH]] : {{.*}}), ^[[BB_RESUME:.*]](%[[CHAR_MSG_EH]], %[[CHAR_MSG_SEL]] : {{.*}}) loc
+    // CIR_FLAT: ^[[BB_CHAR_MSG_CATCH]](%[[CHAR_MSG_CATCH_SLOT:.*]]: !cir.ptr<!void>
+    // CIR_FLAT:   %[[PARAM_CHAR_MSG:.*]] = cir.catch_param begin %[[CHAR_MSG_CATCH_SLOT]] -> !cir.ptr<!s8i>
+    // CIR_FLAT:   cir.const #cir.int<99> : !s32i
+    // CIR_FLAT:   cir.br ^[[AFTER_TRY]] loc
+    z = 99;
+    (void)msg[0];
+  }
+  // CIR_FLAT: ^[[BB_RESUME]](%[[RESUME_EH:.*]]: !cir.ptr<!void> loc({{.*}}), %[[RESUME_SEL:.*]]: !u32i
+  // CIR_FLAT:   cir.resume %[[RESUME_EH]], %[[RESUME_SEL]]
+
+  // CIR_FLAT: ^[[AFTER_TRY]]:
+  // CIR_FLAT: cir.load
+
+  return z;
+}
+
+// CIR_FLAT: cir.func @_Z3tc2v
+unsigned long long tc2() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  try {
+    int a = 4;
+    z = division(x, y);
+    a++;
+  } catch (int idx) {
+    // CIR_FLAT: cir.eh.inflight_exception [@_ZTIi, @_ZTIPKc]
+    z = 98;
+    idx++;
+  } catch (const char* msg) {
+    z = 99;
+    (void)msg[0];
+  } catch (...) {
+    // CIR_FLAT:   cir.catch_param
+    // CIR_FLAT:   cir.const #cir.int<100> : !s32i
+    // CIR_FLAT:   cir.br ^[[AFTER_TRY:.*]] loc
+    // CIR_FLAT: ^[[AFTER_TRY]]:  // 4 preds
+    // CIR_FLAT:   cir.load
+    // CIR_FLAT:   cir.return
+    z = 100;
+  }
+
+  return z;
+}
+
+// CIR_FLAT: cir.func @_Z3tc3v
+unsigned long long tc3() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  try {
+    z = division(x, y);
+  } catch (...) {
+    // CIR_FLAT:   cir.eh.inflight_exception loc
+    // CIR_FLAT:   cir.br ^[[CATCH_ALL:.*]]({{.*}} : {{.*}}) loc
+    // CIR_FLAT: ^[[CATCH_ALL]](%[[CATCH_ALL_EH:.*]]: !cir.ptr<!void>
+    // CIR_FLAT:   cir.catch_param begin %[[CATCH_ALL_EH]] -> !cir.ptr<!void>
+    // CIR_FLAT:   cir.const #cir.int<100> : !s32i
+    // CIR_FLAT:   cir.br ^[[AFTER_TRY:.*]] loc
+    // CIR_FLAT: ^[[AFTER_TRY]]:  // 2 preds
+    // CIR_FLAT:   cir.load
+    z = 100;
+  }
+
+  return z;
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/types.cir b/clang/test/CIR/Lowering/types.cir
new file mode 100644
index 000000000000..f91f25cb5e41
--- /dev/null
+++ b/clang/test/CIR/Lowering/types.cir
@@ -0,0 +1,18 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!void = !cir.void
+!u8i = !cir.int<u, 8>
+module {
+  cir.global external @testVTable = #cir.vtable<{#cir.const_array<[#cir.ptr<-8> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 1>}> : !cir.struct<struct {!cir.array<!cir.ptr<!u8i> x 1>}>
+  // CHECK: llvm.mlir.constant(-8 : i64) : i64
+  // CHECK:  llvm.inttoptr %{{[0-9]+}} : i64 to !llvm.ptr
+  cir.func @testTypeLowering() {
+    // Should lower void pointers as opaque pointers.
+    %0 = cir.const #cir.ptr<null> : !cir.ptr<!void>
+    // CHECK: llvm.mlir.zero : !llvm.ptr
+    %1 = cir.const #cir.ptr<null> : !cir.ptr<!cir.ptr<!void>>
+    // CHECK: llvm.mlir.zero : !llvm.ptr
+    cir.return
+  }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Lowering/unary-inc-dec.cir b/clang/test/CIR/Lowering/unary-inc-dec.cir
new file mode 100644
index 000000000000..9ba26b36f61c
--- /dev/null
+++ b/clang/test/CIR/Lowering/unary-inc-dec.cir
@@ -0,0 +1,63 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.const #cir.int<2> : !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+
+    %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %4 = cir.unary(inc, %3) : !s32i, !s32i
+    cir.store %4, %0 : !s32i, !cir.ptr<!s32i>
+
+    %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.unary(dec, %5) : !s32i, !s32i
+    cir.store %6, %1 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+
+// MLIR: = llvm.mlir.constant(1 : i32)
+// MLIR: = llvm.add
+// MLIR: = llvm.mlir.constant(1 : i32)
+// MLIR: = llvm.sub
+
+// LLVM: = add i32 %[[#]], 1
+// LLVM: = sub i32 %[[#]], 1
+
+  cir.func @floatingPoint(%arg0: !cir.float, %arg1: !cir.double) {
+  // MLIR: llvm.func @floatingPoint
+    %0 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["f", init] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["d", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.float, !cir.ptr<!cir.float>
+    cir.store %arg1, %1 : !cir.double, !cir.ptr<!cir.double>
+
+    %2 = cir.load %0 : !cir.ptr<!cir.float>, !cir.float
+    %3 = cir.unary(inc, %2) : !cir.float, !cir.float
+    cir.store %3, %0 : !cir.float, !cir.ptr<!cir.float>
+    // MLIR: %[[#F_ONE:]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
+    // MLIR: = llvm.fadd %[[#F_ONE]], %{{[0-9]+}}  : f32
+
+    %4 = cir.load %0 : !cir.ptr<!cir.float>, !cir.float
+    %5 = cir.unary(dec, %4) : !cir.float, !cir.float
+    cir.store %5, %0 : !cir.float, !cir.ptr<!cir.float>
+    // MLIR: %[[#D_ONE:]] = llvm.mlir.constant(-1.000000e+00 : f32) : f32
+    // MLIR: = llvm.fadd %[[#D_ONE]], %{{[0-9]+}}  : f32
+
+    %6 = cir.load %1 : !cir.ptr<!cir.double>, !cir.double
+    %7 = cir.unary(inc, %6) : !cir.double, !cir.double
+    cir.store %7, %1 : !cir.double, !cir.ptr<!cir.double>
+    // MLIR: %[[#D_ONE:]] = llvm.mlir.constant(1.000000e+00 : f64) : f64
+    // MLIR: = llvm.fadd %[[#D_ONE]], %{{[0-9]+}}  : f64
+
+    %8 = cir.load %1 : !cir.ptr<!cir.double>, !cir.double
+    %9 = cir.unary(dec, %8) : !cir.double, !cir.double
+    cir.store %9, %1 : !cir.double, !cir.ptr<!cir.double>
+    // MLIR: %[[#D_ONE:]] = llvm.mlir.constant(-1.000000e+00 : f64) : f64
+    // MLIR: = llvm.fadd %[[#D_ONE]], %{{[0-9]+}}  : f64
+
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/unary-not.cir b/clang/test/CIR/Lowering/unary-not.cir
new file mode 100644
index 000000000000..48e2705e756d
--- /dev/null
+++ b/clang/test/CIR/Lowering/unary-not.cir
@@ -0,0 +1,82 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir  | FileCheck %s -check-prefix=LLVM
+!s32i = !cir.int<s, 32>
+module {
+    cir.func @foo() -> !s32i  {
+        %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+        %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+        %2 = cir.const #cir.int<1> : !s32i
+        cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+        %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+        %4 = cir.unary(not, %3) : !s32i, !s32i
+        cir.store %4, %0 : !s32i, !cir.ptr<!s32i>
+        %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        cir.return %5 : !s32i
+    }
+
+// MLIR: = llvm.load
+// MLIR: = llvm.mlir.constant(-1 : i32)
+// MLIR: = llvm.xor
+
+// LLVM: = xor i32 -1, %[[#]]
+
+
+    cir.func @floatingPoint(%arg0: !cir.float, %arg1: !cir.double) {
+    // MLIR: llvm.func @floatingPoint
+        %0 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["f", init] {alignment = 4 : i64}
+        %1 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["d", init] {alignment = 8 : i64}
+        cir.store %arg0, %0 : !cir.float, !cir.ptr<!cir.float>
+        cir.store %arg1, %1 : !cir.double, !cir.ptr<!cir.double>
+        %2 = cir.load %0 : !cir.ptr<!cir.float>, !cir.float
+        %3 = cir.cast(float_to_bool, %2 : !cir.float), !cir.bool
+        // MLIR: %[[#F_ZERO:]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
+        // MLIR: %[[#F_BOOL:]] = llvm.fcmp "une" %{{.+}}, %[[#F_ZERO]] : f32
+        // MLIR: %[[#F_ZEXT:]] = llvm.zext %[[#F_BOOL]] : i1 to i8
+        %4 = cir.unary(not, %3) : !cir.bool, !cir.bool
+        // MLIR: %[[#F_ONE:]] = llvm.mlir.constant(1 : i8) : i8
+        // MLIR: = llvm.xor %[[#F_ZEXT]], %[[#F_ONE]]  : i8
+        %5 = cir.load %1 : !cir.ptr<!cir.double>, !cir.double
+        %6 = cir.cast(float_to_bool, %5 : !cir.double), !cir.bool
+        // MLIR: %[[#D_ZERO:]] = llvm.mlir.constant(0.000000e+00 : f64) : f64
+        // MLIR: %[[#D_BOOL:]] = llvm.fcmp "une" %{{.+}}, %[[#D_ZERO]] : f64
+        // MLIR: %[[#D_ZEXT:]] = llvm.zext %[[#D_BOOL]] : i1 to i8
+        %7 = cir.unary(not, %6) : !cir.bool, !cir.bool
+        // MLIR: %[[#D_ONE:]] = llvm.mlir.constant(1 : i8) : i8
+        // MLIR: = llvm.xor %[[#D_ZEXT]], %[[#D_ONE]]  : i8
+        cir.return
+    }
+
+    cir.func @CStyleValueNegation(%arg0: !s32i, %arg1: !cir.float) {
+    // MLIR: llvm.func @CStyleValueNegation
+        %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+        %3 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["f", init] {alignment = 4 : i64}
+        cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+        cir.store %arg1, %3 : !cir.float, !cir.ptr<!cir.float>
+
+        %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %6 = cir.cast(int_to_bool, %5 : !s32i), !cir.bool
+        %7 = cir.unary(not, %6) : !cir.bool, !cir.bool
+        %8 = cir.cast(bool_to_int, %7 : !cir.bool), !s32i
+        // MLIR: %[[#INT:]] = llvm.load %{{.+}} : !llvm.ptr
+        // MLIR: %[[#IZERO:]] = llvm.mlir.constant(0 : i32) : i32
+        // MLIR: %[[#ICMP:]] = llvm.icmp "ne" %[[#INT]], %[[#IZERO]] : i32
+        // MLIR: %[[#IEXT:]] = llvm.zext %[[#ICMP]] : i1 to i8
+        // MLIR: %[[#IONE:]] = llvm.mlir.constant(1 : i8) : i8
+        // MLIR: %[[#IXOR:]] = llvm.xor %[[#IEXT]], %[[#IONE]]  : i8
+        // MLIR: = llvm.zext %[[#IXOR]] : i8 to i32
+
+        %17 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+        %18 = cir.cast(float_to_bool, %17 : !cir.float), !cir.bool
+        %19 = cir.unary(not, %18) : !cir.bool, !cir.bool
+        %20 = cir.cast(bool_to_int, %19 : !cir.bool), !s32i
+        // MLIR: %[[#FLOAT:]] = llvm.load %{{.+}} : !llvm.ptr
+        // MLIR: %[[#FZERO:]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
+        // MLIR: %[[#FCMP:]] = llvm.fcmp "une" %[[#FLOAT]], %[[#FZERO]] : f32
+        // MLIR: %[[#FEXT:]] = llvm.zext %[[#FCMP]] : i1 to i8
+        // MLIR: %[[#FONE:]] = llvm.mlir.constant(1 : i8) : i8
+        // MLIR: %[[#FXOR:]] = llvm.xor %[[#FEXT]], %[[#FONE]]  : i8
+        // MLIR: = llvm.zext %[[#FXOR]] : i8 to i32
+
+        cir.return
+    }
+}
diff --git a/clang/test/CIR/Lowering/unary-plus-minus.cir b/clang/test/CIR/Lowering/unary-plus-minus.cir
new file mode 100644
index 000000000000..cdb4d90fa854
--- /dev/null
+++ b/clang/test/CIR/Lowering/unary-plus-minus.cir
@@ -0,0 +1,43 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.const #cir.int<2> : !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+
+    %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %4 = cir.unary(plus, %3) : !s32i, !s32i
+    cir.store %4, %0 : !s32i, !cir.ptr<!s32i>
+
+    %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.unary(minus, %5) : !s32i, !s32i
+    cir.store %6, %1 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+
+// MLIR: %[[#INPUT_PLUS:]] = llvm.load
+// MLIR: llvm.store %[[#INPUT_PLUS]]
+// MLIR: %[[#INPUT_MINUS:]] = llvm.load
+// MLIR: %[[ZERO:[a-z0-9_]+]] = llvm.mlir.constant(0 : i32)
+// MLIR: llvm.sub %[[ZERO]], %[[#INPUT_MINUS]]
+
+  cir.func @floatingPoints(%arg0: !cir.double) {
+  // MLIR: llvm.func @floatingPoints(%arg0: f64)
+    %0 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["X", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.double, !cir.ptr<!cir.double>
+    %1 = cir.load %0 : !cir.ptr<!cir.double>, !cir.double
+    %2 = cir.unary(plus, %1) : !cir.double, !cir.double
+    // MLIR: llvm.store %arg0, %[[#F_PLUS:]] {{.*}}: f64, !llvm.ptr
+    // MLIR: %{{[0-9]}} = llvm.load %[[#F_PLUS]] {alignment = 8 : i64} : !llvm.ptr -> f64
+    %3 = cir.load %0 : !cir.ptr<!cir.double>, !cir.double
+    %4 = cir.unary(minus, %3) : !cir.double, !cir.double
+    // MLIR: %[[#F_MINUS:]] = llvm.load %{{[0-9]}} {alignment = 8 : i64} : !llvm.ptr -> f64
+    // MLIR: %{{[0-9]}} = llvm.fneg %[[#F_MINUS]] : f64
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/unions.cir b/clang/test/CIR/Lowering/unions.cir
new file mode 100644
index 000000000000..0cc9d1d15749
--- /dev/null
+++ b/clang/test/CIR/Lowering/unions.cir
@@ -0,0 +1,42 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+#true = #cir.bool<true> : !cir.bool
+!ty_U1_ = !cir.struct<union "U1" {!cir.bool, !s16i, !s32i} #cir.record.decl.ast>
+!ty_U2_ = !cir.struct<union "U2" {f64, !ty_U1_} #cir.record.decl.ast>
+!ty_U3_ = !cir.struct<union "U3" {!s16i, !ty_U1_} #cir.record.decl.ast>
+module {
+  // Should lower union to struct with only the largest member.
+  cir.global external @u1 = #cir.zero : !ty_U1_
+  // CHECK: llvm.mlir.global external @u1() {addr_space = 0 : i32} : !llvm.struct<"union.U1", (i32)>
+
+  // Should recursively find the largest member if there are nested unions.
+  cir.global external @u2 = #cir.zero : !ty_U2_
+  cir.global external @u3 = #cir.zero : !ty_U3_
+  // CHECK: llvm.mlir.global external @u2() {addr_space = 0 : i32} : !llvm.struct<"union.U2", (f64)>
+  // CHECK: llvm.mlir.global external @u3() {addr_space = 0 : i32} : !llvm.struct<"union.U3", (i32)>
+
+  // CHECK: llvm.func @test
+  cir.func @test(%arg0: !cir.ptr<!ty_U1_>) {
+
+    // Should store directly to the union's base address.
+    %5 = cir.const #true
+    %6 = cir.get_member %arg0[0] {name = "b"} : !cir.ptr<!ty_U1_> -> !cir.ptr<!cir.bool>
+    cir.store %5, %6 : !cir.bool, !cir.ptr<!cir.bool>
+    // CHECK: %[[#VAL:]] = llvm.mlir.constant(1 : i8) : i8
+    // The bitcast it just to bypass the type checker. It will be replaced by an opaque pointer.
+    // CHECK: %[[#ADDR:]] = llvm.bitcast %{{.+}} : !llvm.ptr
+    // CHECK: llvm.store %[[#VAL]], %[[#ADDR]] {{.*}}: i8, !llvm.ptr
+
+    // Should load direclty from the union's base address.
+    %7 = cir.get_member %arg0[0] {name = "b"} : !cir.ptr<!ty_U1_> -> !cir.ptr<!cir.bool>
+    %8 = cir.load %7 : !cir.ptr<!cir.bool>, !cir.bool
+    // The bitcast it just to bypass the type checker. It will be replaced by an opaque pointer.
+    // CHECK: %[[#BASE:]] = llvm.bitcast %{{.+}} : !llvm.ptr
+    // CHECK: %{{.+}} = llvm.load %[[#BASE]] {alignment = 1 : i64} : !llvm.ptr -> i8
+
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Lowering/variadics.cir b/clang/test/CIR/Lowering/variadics.cir
new file mode 100644
index 000000000000..dfbfbf66e56f
--- /dev/null
+++ b/clang/test/CIR/Lowering/variadics.cir
@@ -0,0 +1,40 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+!u8i = !cir.int<u, 8>
+
+!ty___va_list_tag = !cir.struct<struct "__va_list_tag" {!u32i, !u32i, !cir.ptr<!u8i>, !cir.ptr<!u8i>} #cir.record.decl.ast>
+
+module {
+  cir.func @average(%arg0: !s32i, ...) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["count", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %2 = cir.alloca !cir.array<!ty___va_list_tag x 1>, !cir.ptr<!cir.array<!ty___va_list_tag x 1>>, ["args"] {alignment = 16 : i64}
+    %3 = cir.alloca !cir.array<!ty___va_list_tag x 1>, !cir.ptr<!cir.array<!ty___va_list_tag x 1>>, ["args_copy"] {alignment = 16 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %4 = cir.cast(array_to_ptrdecay, %2 : !cir.ptr<!cir.array<!ty___va_list_tag x 1>>), !cir.ptr<!ty___va_list_tag>
+    cir.va.start %4 : !cir.ptr<!ty___va_list_tag>
+    //      MLIR: %{{[0-9]+}} = llvm.getelementptr %{{[0-9]+}}[0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.__va_list_tag", (i32, i32, ptr, ptr)>
+    // MLIR-NEXT: %{{[0-9]+}} = llvm.bitcast %{{[0-9]+}} : !llvm.ptr to !llvm.ptr
+    // MLIR-NEXT: llvm.intr.vastart %{{[0-9]+}} : !llvm.ptr
+    %5 = cir.cast(array_to_ptrdecay, %3 : !cir.ptr<!cir.array<!ty___va_list_tag x 1>>), !cir.ptr<!ty___va_list_tag>
+    %6 = cir.cast(array_to_ptrdecay, %2 : !cir.ptr<!cir.array<!ty___va_list_tag x 1>>), !cir.ptr<!ty___va_list_tag>
+    cir.va.copy %6 to %5 : !cir.ptr<!ty___va_list_tag>, !cir.ptr<!ty___va_list_tag>
+    //      MLIR: %{{[0-9]+}} = llvm.getelementptr %{{[0-9]+}}[0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.__va_list_tag", (i32, i32, ptr, ptr)>
+    // MLIR-NEXT: %{{[0-9]+}} = llvm.getelementptr %{{[0-9]+}}[0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.__va_list_tag", (i32, i32, ptr, ptr)>
+    // MLIR-NEXT: %{{[0-9]+}} = llvm.bitcast %{{[0-9]+}} : !llvm.ptr to !llvm.ptr
+    // MLIR-NEXT: %{{[0-9]+}} = llvm.bitcast %{{[0-9]+}} : !llvm.ptr to !llvm.ptr
+    // MLIR-NEXT: llvm.intr.vacopy %13 to %{{[0-9]+}} : !llvm.ptr, !llvm.ptr
+    %7 = cir.cast(array_to_ptrdecay, %2 : !cir.ptr<!cir.array<!ty___va_list_tag x 1>>), !cir.ptr<!ty___va_list_tag>
+    cir.va.end %7 : !cir.ptr<!ty___va_list_tag>
+    //      MLIR: %{{[0-9]+}} = llvm.getelementptr %{{[0-9]+}}[0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.__va_list_tag", (i32, i32, ptr, ptr)>
+    // MLIR-NEXT: %{{[0-9]+}} = llvm.bitcast %{{[0-9]+}} : !llvm.ptr to !llvm.ptr
+    // MLIR-NEXT: llvm.intr.vaend %{{[0-9]+}} : !llvm.ptr
+    %8 = cir.const #cir.int<0> : !s32i
+    cir.store %8, %1 : !s32i, !cir.ptr<!s32i>
+    %9 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %9 : !s32i
+  }
+}
diff --git a/clang/test/CIR/Lowering/vectype.cpp b/clang/test/CIR/Lowering/vectype.cpp
new file mode 100644
index 000000000000..f5e56c8e87bc
--- /dev/null
+++ b/clang/test/CIR/Lowering/vectype.cpp
@@ -0,0 +1,330 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: cir-opt %t.cir -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+typedef int vi4 __attribute__((vector_size(16)));
+typedef double vd2 __attribute__((vector_size(16)));
+typedef long long vll2 __attribute__((vector_size(16)));
+typedef unsigned short vus2 __attribute__((vector_size(4)));
+
+void vector_int_test(int x) {
+
+  // Vector constant.
+  vi4 a = { 1, 2, 3, 4 };
+  // CHECK: %[[#T42:]] = llvm.mlir.constant(dense<[1, 2, 3, 4]> : vector<4xi32>) : vector<4xi32>
+  // CHECK: llvm.store %[[#T42]], %[[#T3:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Non-const vector initialization.
+  vi4 b = { x, 5, 6, x + 1 };
+  // CHECK: %[[#T43:]] = llvm.load %[[#T1:]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T44:]] = llvm.mlir.constant(5 : i32) : i32
+  // CHECK: %[[#T45:]] = llvm.mlir.constant(6 : i32) : i32
+  // CHECK: %[[#T46:]] = llvm.load %[[#T1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T47:]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK: %[[#T48:]] = llvm.add %[[#T46]], %[[#T47]] overflow<nsw> : i32
+  // CHECK: %[[#T49:]] = llvm.mlir.undef : vector<4xi32>
+  // CHECK: %[[#T50:]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[#T51:]] = llvm.insertelement %[[#T43]], %[[#T49]][%[[#T50]] : i64] : vector<4xi32>
+  // CHECK: %[[#T52:]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[#T53:]] = llvm.insertelement %[[#T44]], %[[#T51]][%[[#T52]] : i64] : vector<4xi32>
+  // CHECK: %[[#T54:]] = llvm.mlir.constant(2 : i64) : i64
+  // CHECK: %[[#T55:]] = llvm.insertelement %[[#T45]], %[[#T53]][%[[#T54]] : i64] : vector<4xi32>
+  // CHECK: %[[#T56:]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: %[[#T57:]] = llvm.insertelement %[[#T48]], %[[#T55]][%[[#T56]] : i64] : vector<4xi32>
+  // CHECK: llvm.store %[[#T57]], %[[#T5:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Vector to vector conversion
+  vd2 bb = (vd2)b;
+  // CHECK: %[[#bval:]] = llvm.load %[[#bmem:]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#bbval:]] = llvm.bitcast %[[#bval]] : vector<4xi32> to vector<2xf64>
+  // CHECK: llvm.store %[[#bbval]], %[[#bbmem:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+
+  // Scalar to vector conversion, a.k.a. vector splat.
+  b = a + 7;
+  // CHECK: %[[#undef:]] = llvm.mlir.undef : vector<4xi32>
+  // CHECK: %[[#zeroInt:]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[#inserted:]] = llvm.insertelement %[[#seven:]], %[[#undef]][%[[#zeroInt]] : i64] : vector<4xi32>
+  // CHECK: %[[#shuffled:]] = llvm.shufflevector %[[#inserted]], %[[#undef]] [0, 0, 0, 0] : vector<4xi32>
+
+  // Extract element.
+  int c = a[x];
+  // CHECK: %[[#T58:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T59:]] = llvm.load %[[#T1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T60:]] = llvm.extractelement %[[#T58]][%[[#T59]] : i32] : vector<4xi32>
+  // CHECK: llvm.store %[[#T60]], %[[#T7:]] {alignment = 4 : i64} : i32, !llvm.ptr
+
+  // Insert element.
+  a[x] = x;
+  // CHECK: %[[#T61:]] = llvm.load %[[#T1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T62:]] = llvm.load %[[#T1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T63:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T64:]] = llvm.insertelement %[[#T61]], %[[#T63]][%[[#T62]] : i32] : vector<4xi32>
+  // CHECK: llvm.store %[[#T64]], %[[#T3]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Compound assignment
+  a[x] += a[0];
+  // CHECK: %[[#LOADCA1:]] = llvm.load %{{[0-9]+}} {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#RHSCA:]] = llvm.extractelement %[[#LOADCA1:]][%{{[0-9]+}} : i32] : vector<4xi32>
+  // CHECK: %[[#LOADCAIDX2:]] = llvm.load %{{[0-9]+}} {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#LOADCAVEC3:]] = llvm.load %{{[0-9]+}} {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#LHSCA:]] = llvm.extractelement %[[#LOADCAVEC3:]][%[[#LOADCAIDX2:]] : i32] : vector<4xi32>
+  // CHECK: %[[#SUMCA:]] = llvm.add %[[#LHSCA:]], %[[#RHSCA:]] overflow<nsw> : i32
+  // CHECK: %[[#LOADCAVEC4:]] = llvm.load %{{[0-9]+}} {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#RESULTCAVEC:]] = llvm.insertelement %[[#SUMCA:]], %[[#LOADCAVEC4:]][%[[#LOADCAIDX2:]] : i32] : vector<4xi32>
+  // CHECK: llvm.store %[[#RESULTCAVEC:]], %{{[0-9]+}} {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Binary arithmetic operators.
+  vi4 d = a + b;
+  // CHECK: %[[#T65:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T66:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T67:]] = llvm.add %[[#T65]], %[[#T66]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T67]], %[[#T9:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 e = a - b;
+  // CHECK: %[[#T68:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T69:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T70:]] = llvm.sub %[[#T68]], %[[#T69]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T70]], %[[#T11:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 f = a * b;
+  // CHECK: %[[#T71:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T72:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T73:]] = llvm.mul %[[#T71]], %[[#T72]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T73]], %[[#T13:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 g = a / b;
+  // CHECK: %[[#T74:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T75:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T76:]] = llvm.sdiv %[[#T74]], %[[#T75]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T76]], %[[#T15:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 h = a % b;
+  // CHECK: %[[#T77:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T78:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T79:]] = llvm.srem %[[#T77]], %[[#T78]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T79]], %[[#T17:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 i = a & b;
+  // CHECK: %[[#T80:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T81:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T82:]] = llvm.and %[[#T80]], %[[#T81]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T82]], %[[#T19:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 j = a | b;
+  // CHECK: %[[#T83:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T84:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T85:]] = llvm.or %[[#T83]], %[[#T84]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T85]], %[[#T21:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 k = a ^ b;
+  // CHECK: %[[#T86:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T87:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T88:]] = llvm.xor %[[#T86]], %[[#T87]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T88]], %[[#T23:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Unary arithmetic operators.
+  vi4 l = +a;
+  // CHECK: %[[#T89:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: llvm.store %[[#T89]], %[[#T25:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 m = -a;
+  // CHECK: %[[#T90:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T91:]] = llvm.mlir.zero : vector<4xi32>
+  // CHECK: %[[#T92:]] = llvm.sub %[[#T91]], %[[#T90]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T92]], %[[#T27:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 n = ~a;
+  // CHECK: %[[#T93:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T94:]] = llvm.mlir.constant(-1 : i32) : i32
+  // CHECK: %[[#T95:]] = llvm.mlir.undef : vector<4xi32>
+  // CHECK: %[[#T96:]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[#T97:]] = llvm.insertelement %[[#T94]], %[[#T95]][%[[#T96]] : i64] : vector<4xi32>
+  // CHECK: %[[#T98:]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[#T99:]] = llvm.insertelement %[[#T94]], %[[#T97]][%[[#T98]] : i64] : vector<4xi32>
+  // CHECK: %[[#T100:]] = llvm.mlir.constant(2 : i64) : i64
+  // CHECK: %[[#T101:]] = llvm.insertelement %[[#T94]], %[[#T99]][%[[#T100]] : i64] : vector<4xi32>
+  // CHECK: %[[#T102:]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: %[[#T103:]] = llvm.insertelement %[[#T94]], %[[#T101]][%[[#T102]] : i64] : vector<4xi32>
+  // CHECK: %[[#T104:]] = llvm.xor %[[#T103]], %[[#T93]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T104]], %[[#T29:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Ternary conditional operator
+  vi4 tc = a ? b : d;
+  // CHECK: %[[#Zero:]] = llvm.mlir.zero : vector<4xi32>
+  // CHECK: %[[#BitVec:]] = llvm.icmp "ne" %[[#A:]], %[[#Zero]] : vector<4xi32>
+  // CHECK: %[[#Res:]] = llvm.select %[[#BitVec]], %[[#B:]], %[[#D:]] : vector<4xi1>, vector<4xi32>
+
+  // Comparisons
+  vi4 o = a == b;
+  // CHECK: %[[#T105:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T106:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T107:]] = llvm.icmp "eq" %[[#T105]], %[[#T106]] : vector<4xi32>
+  // CHECK: %[[#T108:]] = llvm.sext %[[#T107]] : vector<4xi1> to vector<4xi32>
+  // CHECK: llvm.store %[[#T108]], %[[#To:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 p = a != b;
+  // CHECK: %[[#T109:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T110:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T111:]] = llvm.icmp "ne" %[[#T109]], %[[#T110]] : vector<4xi32>
+  // CHECK: %[[#T112:]] = llvm.sext %[[#T111]] : vector<4xi1> to vector<4xi32>
+  // CHECK: llvm.store %[[#T112]], %[[#Tp:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 q = a < b;
+  // CHECK: %[[#T113:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T114:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T115:]] = llvm.icmp "slt" %[[#T113]], %[[#T114]] : vector<4xi32>
+  // CHECK: %[[#T116:]] = llvm.sext %[[#T115]] : vector<4xi1> to vector<4xi32>
+  // CHECK: llvm.store %[[#T116]], %[[#Tq:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 r = a > b;
+  // CHECK: %[[#T117:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T118:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T119:]] = llvm.icmp "sgt" %[[#T117]], %[[#T118]] : vector<4xi32>
+  // CHECK: %[[#T120:]] = llvm.sext %[[#T119]] : vector<4xi1> to vector<4xi32>
+  // CHECK: llvm.store %[[#T120]], %[[#Tr:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 s = a <= b;
+  // CHECK: %[[#T121:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T122:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T123:]] = llvm.icmp "sle" %[[#T121]], %[[#T122]] : vector<4xi32>
+  // CHECK: %[[#T124:]] = llvm.sext %[[#T123]] : vector<4xi1> to vector<4xi32>
+  // CHECK: llvm.store %[[#T124]], %[[#Ts:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 t = a >= b;
+  // CHECK: %[[#T125:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T126:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T127:]] = llvm.icmp "sge" %[[#T125]], %[[#T126]] : vector<4xi32>
+  // CHECK: %[[#T128:]] = llvm.sext %[[#T127]] : vector<4xi1> to vector<4xi32>
+  // CHECK: llvm.store %[[#T128]], %[[#Tt:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // __builtin_shufflevector
+  vi4 u = __builtin_shufflevector(a, b, 7, 5, 3, 1);
+  // CHECK: %[[#Tu:]] = llvm.shufflevector %[[#bsva:]], %[[#bsvb:]] [7, 5, 3, 1] : vector<4xi32>
+  vi4 v = __builtin_shufflevector(a, b);
+  // CHECK: %[[#sv_a:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#sv_b:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#sv0:]] = llvm.mlir.constant(3 : i32) : i32
+  // CHECK: %[[#sv1:]] = llvm.mlir.undef : vector<4xi32>
+  // CHECK: %[[#sv2:]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[#sv3:]] = llvm.insertelement %[[#sv0]], %[[#sv1]][%[[#sv2]] : i64] : vector<4xi32>
+  // CHECK: %[[#sv4:]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[#sv5:]] = llvm.insertelement %[[#sv0]], %[[#sv3]][%[[#sv4]] : i64] : vector<4xi32>
+  // CHECK: %[[#sv6:]] = llvm.mlir.constant(2 : i64) : i64
+  // CHECK: %[[#sv7:]] = llvm.insertelement %[[#sv0]], %[[#sv5]][%[[#sv6]] : i64] : vector<4xi32>
+  // CHECK: %[[#sv8:]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: %[[#sv9:]] = llvm.insertelement %[[#sv0]], %[[#sv7]][%[[#sv8]] : i64] : vector<4xi32>
+  // CHECK: %[[#svA:]] = llvm.and %[[#sv_b]], %[[#sv9]]  : vector<4xi32>
+  // CHECK: %[[#svB:]] = llvm.mlir.undef : vector<4xi32>
+  // CHECK: %[[#svC:]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[#svD:]] = llvm.extractelement %[[#svA]][%[[#svC]] : i64] : vector<4xi32>
+  // CHECK: %[[#svE:]] = llvm.extractelement %[[#sv_a]][%[[#svD]] : i32] : vector<4xi32>
+  // CHECK: %[[#svF:]] = llvm.insertelement %[[#svE]], %[[#svB]][%[[#svC]] : i64] : vector<4xi32>
+  // CHECK: %[[#svG:]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[#svH:]] = llvm.extractelement %[[#svA]][%[[#svG]] : i64] : vector<4xi32>
+  // CHECK: %[[#svI:]] = llvm.extractelement %[[#sv_a]][%[[#svH]] : i32] : vector<4xi32>
+  // CHECK: %[[#svJ:]] = llvm.insertelement %[[#svI]], %[[#svF]][%[[#svG]] : i64] : vector<4xi32>
+  // CHECK: %[[#svK:]] = llvm.mlir.constant(2 : i64) : i64
+  // CHECK: %[[#svL:]] = llvm.extractelement %[[#svA]][%[[#svK]] : i64] : vector<4xi32>
+  // CHECK: %[[#svM:]] = llvm.extractelement %[[#sv_a]][%[[#svL]] : i32] : vector<4xi32>
+  // CHECK: %[[#svN:]] = llvm.insertelement %[[#svM]], %[[#svJ]][%[[#svK]] : i64] : vector<4xi32>
+  // CHECK: %[[#svO:]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: %[[#svP:]] = llvm.extractelement %[[#svA]][%[[#svO]] : i64] : vector<4xi32>
+  // CHECK: %[[#svQ:]] = llvm.extractelement %[[#sv_a]][%[[#svP:]] : i32] : vector<4xi32>
+  // CHECK: %[[#svR:]] = llvm.insertelement %[[#svQ]], %[[#svN]][%[[#svO]] : i64] : vector<4xi32>
+  // CHECK: llvm.store %[[#svR]], %[[#sv_v:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+}
+
+void vector_double_test(int x, double y) {
+
+  // Vector constant.
+  vd2 a = { 1.5, 2.5 };
+  // CHECK: %[[#T28:]] = llvm.mlir.constant(dense<[1.500000e+00, 2.500000e+00]> : vector<2xf64>) : vector<2xf64>
+  // CHECK: llvm.store %[[#T28]], %[[#T5:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+
+  // Non-const vector initialization.
+  vd2 b = { y, y + 1.0 };
+  // CHECK: %[[#T29:]] = llvm.load %[[#T3:]] {alignment = 8 : i64} : !llvm.ptr -> f64
+  // CHECK: %[[#T30:]] = llvm.load %[[#T3]] {alignment = 8 : i64} : !llvm.ptr -> f64
+  // CHECK: %[[#T31:]] = llvm.mlir.constant(1.000000e+00 : f64) : f64
+  // CHECK: %[[#T32:]] = llvm.fadd %[[#T30]], %[[#T31]]  : f64
+  // CHECK: %[[#T33:]] = llvm.mlir.undef : vector<2xf64>
+  // CHECK: %[[#T34:]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[#T35:]] = llvm.insertelement %[[#T29]], %[[#T33]][%[[#T34]] : i64] : vector<2xf64>
+  // CHECK: %[[#T36:]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[#T37:]] = llvm.insertelement %[[#T32]], %[[#T35]][%[[#T36]] : i64] : vector<2xf64>
+  // CHECK: llvm.store %[[#T37]], %[[#T7:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+
+  // Extract element.
+  double c = a[x];
+  // CHECK: %[[#T38:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T39:]] = llvm.load %[[#T1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T40:]] = llvm.extractelement %[[#T38]][%[[#T39]] : i32] : vector<2xf64>
+  // CHECK: llvm.store %[[#T40]], %[[#T9:]] {alignment = 8 : i64} : f64, !llvm.ptr
+
+  // Insert element.
+  a[x] = y;
+  // CHECK: %[[#T41:]] = llvm.load %[[#T3]] {alignment = 8 : i64} : !llvm.ptr -> f64
+  // CHECK: %[[#T42:]] = llvm.load %[[#T1:]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T43:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T44:]] = llvm.insertelement %[[#T41]], %[[#T43]][%[[#T42]] : i32] : vector<2xf64>
+  // CHECK: llvm.store %[[#T44]], %[[#T5]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+
+  // Binary arithmetic operators.
+  vd2 d = a + b;
+  // CHECK: %[[#T45:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T46:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T47:]] = llvm.fadd %[[#T45]], %[[#T46]]  : vector<2xf64>
+  // CHECK: llvm.store %[[#T47]], %[[#T11:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+  vd2 e = a - b;
+  // CHECK: %[[#T48:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T49:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T50:]] = llvm.fsub %[[#T48]], %[[#T49]]  : vector<2xf64>
+  // CHECK: llvm.store %[[#T50]], %[[#T13:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+  vd2 f = a * b;
+  // CHECK: %[[#T51:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T52:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T53:]] = llvm.fmul %[[#T51]], %[[#T52]]  : vector<2xf64>
+  // CHECK: llvm.store %[[#T53]], %[[#T15:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+  vd2 g = a / b;
+  // CHECK: %[[#T54:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T55:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T56:]] = llvm.fdiv %[[#T54]], %[[#T55]]  : vector<2xf64>
+  // CHECK: llvm.store %[[#T56]], %[[#T17:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+
+  // Unary arithmetic operators.
+  vd2 l = +a;
+  // CHECK: %[[#T57:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: llvm.store %[[#T57]], %[[#T19:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+  vd2 m = -a;
+  // CHECK: %[[#T58:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T59:]] = llvm.fneg %[[#T58]]  : vector<2xf64>
+  // CHECK: llvm.store %[[#T59]], %[[#T21:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+
+  // Comparisons
+  vll2 o = a == b;
+  // CHECK: %[[#T60:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T61:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T62:]] = llvm.fcmp "oeq" %[[#T60]], %[[#T61]] : vector<2xf64>
+  // CHECK: %[[#T63:]] = llvm.sext %[[#T62]] : vector<2xi1> to vector<2xi64>
+  // CHECK: llvm.store %[[#T63]], %[[#To:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
+  vll2 p = a != b;
+  // CHECK: %[[#T64:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T65:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T66:]] = llvm.fcmp "une" %[[#T64]], %[[#T65]] : vector<2xf64>
+  // CHECK: %[[#T67:]] = llvm.sext %[[#T66]] : vector<2xi1> to vector<2xi64>
+  // CHECK: llvm.store %[[#T67]], %[[#Tp:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
+  vll2 q = a < b;
+  // CHECK: %[[#T68:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T69:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T70:]] = llvm.fcmp "olt" %[[#T68]], %[[#T69]] : vector<2xf64>
+  // CHECK: %[[#T71:]] = llvm.sext %[[#T70]] : vector<2xi1> to vector<2xi64>
+  // CHECK: llvm.store %[[#T71]], %[[#Tq:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
+  vll2 r = a > b;
+  // CHECK: %[[#T72:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T73:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T74:]] = llvm.fcmp "ogt" %[[#T72]], %[[#T73]] : vector<2xf64>
+  // CHECK: %[[#T75:]] = llvm.sext %[[#T74]] : vector<2xi1> to vector<2xi64>
+  // CHECK: llvm.store %[[#T75]], %[[#Tr:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
+  vll2 s = a <= b;
+  // CHECK: %[[#T76:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T77:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T78:]] = llvm.fcmp "ole" %[[#T76]], %[[#T77]] : vector<2xf64>
+  // CHECK: %[[#T79:]] = llvm.sext %[[#T78]] : vector<2xi1> to vector<2xi64>
+  // CHECK: llvm.store %[[#T79]], %[[#Ts:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
+  vll2 t = a >= b;
+  // CHECK: %[[#T80:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T81:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T82:]] = llvm.fcmp "oge" %[[#T80]], %[[#T81]] : vector<2xf64>
+  // CHECK: %[[#T83:]] = llvm.sext %[[#T82]] : vector<2xi1> to vector<2xi64>
+  // CHECK: llvm.store %[[#T83]], %[[#Tt:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
+
+  // __builtin_convertvector
+  vus2 w = __builtin_convertvector(a, vus2);
+  // CHECK: %[[#cv0:]] = llvm.fptoui %[[#cv1:]] : vector<2xf64> to vector<2xi16>
+}
diff --git a/clang/test/CIR/Transforms/Inputs/folly-coro.h b/clang/test/CIR/Transforms/Inputs/folly-coro.h
new file mode 100644
index 000000000000..21e4b337eb22
--- /dev/null
+++ b/clang/test/CIR/Transforms/Inputs/folly-coro.h
@@ -0,0 +1,44 @@
+#include "std.h"
+
+namespace folly {
+namespace coro {
+
+using std::suspend_always;
+using std::suspend_never;
+using std::coroutine_handle;
+
+using SemiFuture = int;
+
+template<class T>
+struct Task {
+    struct promise_type {
+        Task<T> get_return_object() noexcept;
+        suspend_always initial_suspend() noexcept;
+        suspend_always final_suspend() noexcept;
+        void return_value(T);
+        void unhandled_exception();
+        auto yield_value(Task<T>) noexcept { return final_suspend(); }
+    };
+    bool await_ready() noexcept { return false; }
+    void await_suspend(coroutine_handle<>) noexcept {}
+    T await_resume();
+};
+
+template<>
+struct Task<void> {
+    struct promise_type {
+        Task<void> get_return_object() noexcept;
+        suspend_always initial_suspend() noexcept;
+        suspend_always final_suspend() noexcept;
+        void return_void() noexcept;
+        void unhandled_exception() noexcept;
+        auto yield_value(Task<void>) noexcept { return final_suspend(); }
+    };
+    bool await_ready() noexcept { return false; }
+    void await_suspend(coroutine_handle<>) noexcept {}
+    void await_resume() noexcept {}
+    SemiFuture semi();
+};
+
+} // coro
+} // folly
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/Inputs/std.h b/clang/test/CIR/Transforms/Inputs/std.h
new file mode 100644
index 000000000000..1bc2b8504784
--- /dev/null
+++ b/clang/test/CIR/Transforms/Inputs/std.h
@@ -0,0 +1,29 @@
+namespace std {
+
+template <class Ret, typename... T>
+struct coroutine_traits { using promise_type = typename Ret::promise_type; };
+
+template <class Promise = void>
+struct coroutine_handle {
+  static coroutine_handle from_address(void *) noexcept;
+};
+template <>
+struct coroutine_handle<void> {
+  template <class PromiseType>
+  coroutine_handle(coroutine_handle<PromiseType>) noexcept;
+  static coroutine_handle from_address(void *);
+};
+
+struct suspend_always {
+  bool await_ready() noexcept { return false; }
+  void await_suspend(coroutine_handle<>) noexcept {}
+  void await_resume() noexcept {}
+};
+
+struct suspend_never {
+  bool await_ready() noexcept { return true; }
+  void await_suspend(coroutine_handle<>) noexcept {}
+  void await_resume() noexcept {}
+};
+
+} // namespace std
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/Target/aarch64/aarch64-call-conv-lowering-pass.cpp b/clang/test/CIR/Transforms/Target/aarch64/aarch64-call-conv-lowering-pass.cpp
new file mode 100644
index 000000000000..209679ebf383
--- /dev/null
+++ b/clang/test/CIR/Transforms/Target/aarch64/aarch64-call-conv-lowering-pass.cpp
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-unknown-linux-gnu -fclangir -fclangir-call-conv-lowering -emit-cir -mmlir --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: @_Z4Voidv()
+void Void(void) {
+// CHECK:   cir.call @_Z4Voidv() : () -> ()
+  Void();
+}
+
+// Test call conv lowering for trivial usinged integer cases.
+
+// CHECK: @_Z4Boolb(%arg0: !cir.bool loc({{.+}})) -> !cir.bool
+bool Bool(bool a) {
+// CHECK:   cir.call @_Z4Boolb({{.+}}) : (!cir.bool) -> !cir.bool
+  return Bool(a);
+}
+
+// CHECK: cir.func @_Z5UCharh(%arg0: !u8i loc({{.+}})) -> !u8i
+unsigned char UChar(unsigned char c) {
+  // CHECK: cir.call @_Z5UCharh(%2) : (!u8i) -> !u8i
+  return UChar(c);
+}
+// CHECK: cir.func @_Z6UShortt(%arg0: !u16i loc({{.+}})) -> !u16i
+unsigned short UShort(unsigned short s) {
+  // CHECK: cir.call @_Z6UShortt(%2) : (!u16i) -> !u16i
+  return UShort(s);
+}
+// CHECK: cir.func @_Z4UIntj(%arg0: !u32i loc({{.+}})) -> !u32i
+unsigned int UInt(unsigned int i) {
+  // CHECK: cir.call @_Z4UIntj(%2) : (!u32i) -> !u32i
+  return UInt(i);
+}
+// CHECK: cir.func @_Z5ULongm(%arg0: !u64i loc({{.+}})) -> !u64i
+unsigned long ULong(unsigned long l) {
+  // CHECK: cir.call @_Z5ULongm(%2) : (!u64i) -> !u64i
+  return ULong(l);
+}
+// CHECK: cir.func @_Z9ULongLongy(%arg0: !u64i loc({{.+}})) -> !u64i
+unsigned long long ULongLong(unsigned long long l) {
+  // CHECK: cir.call @_Z9ULongLongy(%2) : (!u64i) -> !u64i
+  return ULongLong(l);
+}
+
+
+/// Test call conv lowering for trivial signed cases. ///
+
+// CHECK: cir.func @_Z4Chara(%arg0: !s8i loc({{.+}})) -> !s8i
+char Char(signed char c) {
+  // CHECK: cir.call @_Z4Chara(%{{.+}}) : (!s8i) -> !s8i
+  return Char(c);
+}
+// CHECK: cir.func @_Z5Shorts(%arg0: !s16i loc({{.+}})) -> !s16i
+short Short(short s) {
+  // CHECK: cir.call @_Z5Shorts(%{{.+}}) : (!s16i) -> !s16i
+  return Short(s);
+}
+// CHECK: cir.func @_Z3Inti(%arg0: !s32i loc({{.+}})) -> !s32i
+int Int(int i) {
+  // CHECK: cir.call @_Z3Inti(%{{.+}}) : (!s32i) -> !s32i
+  return Int(i);
+}
+// CHECK: cir.func @_Z4Longl(%arg0: !s64i loc({{.+}})) -> !s64i
+long Long(long l) {
+  // CHECK: cir.call @_Z4Longl(%{{.+}}) : (!s64i) -> !s64i
+  return Long(l);
+}
+// CHECK: cir.func @_Z8LongLongx(%arg0: !s64i loc({{.+}})) -> !s64i
+long long LongLong(long long l) {
+  // CHECK: cir.call @_Z8LongLongx(%{{.+}}) : (!s64i) -> !s64i
+  return LongLong(l);
+}
+
+
+/// Test call conv lowering for floating point. ///
+
+// CHECK: cir.func @_Z5Floatf(%arg0: !cir.float loc({{.+}})) -> !cir.float
+float Float(float f) {
+  // cir.call @_Z5Floatf(%{{.+}}) : (!cir.float) -> !cir.float
+  return Float(f);
+}
+// CHECK: cir.func @_Z6Doubled(%arg0: !cir.double loc({{.+}})) -> !cir.double
+double Double(double d) {
+  // cir.call @_Z6Doubled(%{{.+}}) : (!cir.double) -> !cir.double
+  return Double(d);
+}
diff --git a/clang/test/CIR/Transforms/Target/x86_64/x86_64-call-conv-lowering-pass.cpp b/clang/test/CIR/Transforms/Target/x86_64/x86_64-call-conv-lowering-pass.cpp
new file mode 100644
index 000000000000..3789550ce33b
--- /dev/null
+++ b/clang/test/CIR/Transforms/Target/x86_64/x86_64-call-conv-lowering-pass.cpp
@@ -0,0 +1,118 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-call-conv-lowering -emit-cir -mmlir --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Test call conv lowering for trivial cases. //
+
+// CHECK: @_Z4Voidv()
+void Void(void) {
+// CHECK:   cir.call @_Z4Voidv() : () -> ()
+  Void();
+}
+
+// Test call conv lowering for trivial zeroext cases.
+
+// Bools are a bit of an odd case in CIR's x86_64 representation: they are considered i8
+// everywhere except in the function return/arguments, where they are considered i1. To
+// match LLVM's behavior, we need to zero-extend them when passing them as arguments.
+
+// CHECK: @_Z4Boolb(%arg0: !cir.bool {cir.zeroext} loc({{.+}})) -> (!cir.bool {cir.zeroext})
+bool Bool(bool a) {
+// CHECK:   cir.call @_Z4Boolb({{.+}}) : (!cir.bool) -> !cir.bool
+  return Bool(a);
+}
+
+// CHECK: cir.func @_Z5UCharh(%arg0: !u8i {cir.zeroext} loc({{.+}})) -> (!u8i {cir.zeroext})
+unsigned char UChar(unsigned char c) {
+  // CHECK: cir.call @_Z5UCharh(%2) : (!u8i) -> !u8i
+  return UChar(c);
+}
+// CHECK: cir.func @_Z6UShortt(%arg0: !u16i {cir.zeroext} loc({{.+}})) -> (!u16i {cir.zeroext})
+unsigned short UShort(unsigned short s) {
+  // CHECK: cir.call @_Z6UShortt(%2) : (!u16i) -> !u16i
+  return UShort(s);
+}
+// CHECK: cir.func @_Z4UIntj(%arg0: !u32i loc({{.+}})) -> !u32i
+unsigned int UInt(unsigned int i) {
+  // CHECK: cir.call @_Z4UIntj(%2) : (!u32i) -> !u32i
+  return UInt(i);
+}
+// CHECK: cir.func @_Z5ULongm(%arg0: !u64i loc({{.+}})) -> !u64i
+unsigned long ULong(unsigned long l) {
+  // CHECK: cir.call @_Z5ULongm(%2) : (!u64i) -> !u64i
+  return ULong(l);
+}
+// CHECK: cir.func @_Z9ULongLongy(%arg0: !u64i loc({{.+}})) -> !u64i
+unsigned long long ULongLong(unsigned long long l) {
+  // CHECK: cir.call @_Z9ULongLongy(%2) : (!u64i) -> !u64i
+  return ULongLong(l);
+}
+
+/// Test call conv lowering for trivial signext cases. ///
+
+// CHECK: cir.func @_Z4Chara(%arg0: !s8i {cir.signext} loc({{.+}})) -> (!s8i {cir.signext})
+char Char(signed char c) {
+  // CHECK: cir.call @_Z4Chara(%{{.+}}) : (!s8i) -> !s8i
+  return Char(c);
+}
+// CHECK: cir.func @_Z5Shorts(%arg0: !s16i {cir.signext} loc({{.+}})) -> (!s16i {cir.signext})
+short Short(short s) {
+  // CHECK: cir.call @_Z5Shorts(%{{.+}}) : (!s16i) -> !s16i
+  return Short(s);
+}
+// CHECK: cir.func @_Z3Inti(%arg0: !s32i loc({{.+}})) -> !s32i
+int Int(int i) {
+  // CHECK: cir.call @_Z3Inti(%{{.+}}) : (!s32i) -> !s32i
+  return Int(i);
+}
+// CHECK: cir.func @_Z4Longl(%arg0: !s64i loc({{.+}})) -> !s64i
+long Long(long l) {
+  // CHECK: cir.call @_Z4Longl(%{{.+}}) : (!s64i) -> !s64i
+  return Long(l);
+}
+// CHECK: cir.func @_Z8LongLongx(%arg0: !s64i loc({{.+}})) -> !s64i
+long long LongLong(long long l) {
+  // CHECK: cir.call @_Z8LongLongx(%{{.+}}) : (!s64i) -> !s64i
+  return LongLong(l);
+}
+
+/// Test call conv lowering for floating point. ///
+
+// CHECK: cir.func @_Z5Floatf(%arg0: !cir.float loc({{.+}})) -> !cir.float
+float Float(float f) {
+  // cir.call @_Z5Floatf(%{{.+}}) : (!cir.float) -> !cir.float
+  return Float(f);
+}
+// CHECK: cir.func @_Z6Doubled(%arg0: !cir.double loc({{.+}})) -> !cir.double
+double Double(double d) {
+  // cir.call @_Z6Doubled(%{{.+}}) : (!cir.double) -> !cir.double
+  return Double(d);
+}
+
+
+/// Test call conv lowering for struct type coercion scenarios. ///
+
+struct S1 {
+  int a, b;
+};
+
+
+/// Validate coerced argument and cast it to the expected type.
+
+/// Cast arguments to the expected type.
+// CHECK: cir.func @_Z2s12S1(%arg0: !u64i loc({{.+}})) -> !u64i
+// CHECK: %[[#V0:]] = cir.alloca !ty_S1_, !cir.ptr<!ty_S1_>
+// CHECK: %[[#V1:]] = cir.cast(bitcast, %arg0 : !u64i), !ty_S1_
+// CHECK: cir.store %[[#V1]], %[[#V0]] : !ty_S1_, !cir.ptr<!ty_S1_>
+S1 s1(S1 arg) {
+
+  /// Cast argument and result of the function call to the expected types.
+  // CHECK: %[[#V9:]] = cir.cast(bitcast, %{{.+}} : !ty_S1_), !u64i
+  // CHECK: %[[#V10:]] = cir.call @_Z2s12S1(%[[#V9]]) : (!u64i) -> !u64i
+  // CHECK: %[[#V11:]] = cir.cast(bitcast, %[[#V10]] : !u64i), !ty_S1_
+  s1({1, 2});
+
+  // CHECK: %[[#V12:]] = cir.load %{{.+}} : !cir.ptr<!ty_S1_>, !ty_S1_
+  // CHECK: %[[#V13:]] = cir.cast(bitcast, %[[#V12]] : !ty_S1_), !u64i
+  // CHECK: cir.return %[[#V13]] : !u64i
+  return {1, 2};
+}
diff --git a/clang/test/CIR/Transforms/builtin-assume.cir b/clang/test/CIR/Transforms/builtin-assume.cir
new file mode 100644
index 000000000000..c4f1317abb2b
--- /dev/null
+++ b/clang/test/CIR/Transforms/builtin-assume.cir
@@ -0,0 +1,38 @@
+// RUN: cir-opt -cir-canonicalize -o %t.cir %s
+// RUN: FileCheck --input-file %t.cir %s
+
+!s32i = !cir.int<s, 32>
+module {
+  // Make sure canonicalizers don't erase assume builtins.
+
+  cir.func @assume(%arg0: !s32i) {
+    %0 = cir.const #cir.int<0> : !s32i
+    %1 = cir.cmp(gt, %arg0, %0) : !s32i, !cir.bool
+    cir.assume %1 : !cir.bool
+    cir.return
+  }
+  //      CHECK: cir.func @assume(%arg0: !s32i) {
+  // CHECK-NEXT:   %0 = cir.const #cir.int<0> : !s32i
+  // CHECK-NEXT:   %1 = cir.cmp(gt, %arg0, %0) : !s32i, !cir.bool
+  // CHECK-NEXT:   cir.assume %1 : !cir.bool
+  // CHECK-NEXT:   cir.return
+  // CHECK-NEXT: }
+
+  cir.func @assume_aligned(%arg0: !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {
+    %0 = cir.assume.aligned %arg0 : !cir.ptr<!s32i>[alignment 8]
+    cir.return %0 : !cir.ptr<!s32i>
+  }
+  //      CHECK: cir.func @assume_aligned(%arg0: !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {
+  // CHECK-NEXT:   %0 = cir.assume.aligned %arg0 : !cir.ptr<!s32i>[alignment 8]
+  // CHECK-NEXT:   cir.return %0 : !cir.ptr<!s32i>
+  // CHECK-NEXT: }
+
+  cir.func @assume_separate_storage(%arg0: !cir.ptr<!cir.void>, %arg1: !cir.ptr<!cir.void>) {
+    cir.assume.separate_storage %arg0, %arg1 : !cir.ptr<!cir.void>
+    cir.return
+  }
+  //      CHECK: cir.func @assume_separate_storage(%arg0: !cir.ptr<!void>, %arg1: !cir.ptr<!void>) {
+  // CHECK-NEXT:   cir.assume.separate_storage %arg0, %arg1 : !cir.ptr<!void>
+  // CHECK-NEXT:   cir.return
+  // CHECK-NEXT: }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/complex-fold.cir b/clang/test/CIR/Transforms/complex-fold.cir
new file mode 100644
index 000000000000..34f6b67e1dc4
--- /dev/null
+++ b/clang/test/CIR/Transforms/complex-fold.cir
@@ -0,0 +1,44 @@
+// RUN: cir-opt --canonicalize -o %t.cir %s
+// RUN: FileCheck --input-file %t.cir %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @complex_create_fold() -> !cir.complex<!s32i> {
+    %0 = cir.const #cir.int<1> : !s32i
+    %1 = cir.const #cir.int<2> : !s32i
+    %2 = cir.complex.create %0, %1 : !s32i -> !cir.complex<!s32i>
+    cir.return %2 : !cir.complex<!s32i>
+  }
+
+  // CHECK-LABEL: cir.func @complex_create_fold() -> !cir.complex<!s32i> {
+  //  CHECK-NEXT:   %[[#A:]] = cir.const #cir.complex<#cir.int<1> : !s32i, #cir.int<2> : !s32i> : !cir.complex<!s32i>
+  //  CHECK-NEXT:   cir.return %[[#A]] : !cir.complex<!s32i>
+  //  CHECK-NEXT: }
+
+  cir.func @fold_complex_real() -> !s32i {
+    %0 = cir.const #cir.int<1> : !s32i
+    %1 = cir.const #cir.int<2> : !s32i
+    %2 = cir.complex.create %0, %1 : !s32i -> !cir.complex<!s32i>
+    %3 = cir.complex.real %2 : !cir.complex<!s32i> -> !s32i
+    cir.return %3 : !s32i
+  }
+
+  // CHECK-LABEL: cir.func @fold_complex_real() -> !s32i {
+  //  CHECK-NEXT:   %[[#A:]] = cir.const #cir.int<1> : !s32i
+  //  CHECK-NEXT:   cir.return %[[#A]] : !s32i
+  //  CHECK-NEXT: }
+
+  cir.func @fold_complex_imag() -> !s32i {
+    %0 = cir.const #cir.int<1> : !s32i
+    %1 = cir.const #cir.int<2> : !s32i
+    %2 = cir.complex.create %0, %1 : !s32i -> !cir.complex<!s32i>
+    %3 = cir.complex.imag %2 : !cir.complex<!s32i> -> !s32i
+    cir.return %3 : !s32i
+  }
+
+  // CHECK-LABEL: cir.func @fold_complex_imag() -> !s32i {
+  //  CHECK-NEXT:   %[[#A:]] = cir.const #cir.int<2> : !s32i
+  //  CHECK-NEXT:   cir.return %[[#A]] : !s32i
+  //  CHECK-NEXT: }
+}
diff --git a/clang/test/CIR/Transforms/idiom-iter.cpp b/clang/test/CIR/Transforms/idiom-iter.cpp
new file mode 100644
index 000000000000..5591baa04ff6
--- /dev/null
+++ b/clang/test/CIR/Transforms/idiom-iter.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -I%S/../Inputs -fclangir-idiom-recognizer="remarks=found-calls" -clangir-verify-diagnostics %s -o %t.cir
+
+namespace std {
+template<typename T, unsigned N> struct array {
+  T arr[N];
+  struct iterator {
+    T *p;
+    constexpr explicit iterator(T *p) : p(p) {}
+    constexpr bool operator!=(iterator o) { return p != o.p; }
+    constexpr iterator &operator++() { ++p; return *this; }
+    constexpr T &operator*() { return *p; }
+  };
+  constexpr iterator begin() { return iterator(arr); }
+};
+}
+
+void iter_test()
+{
+  std::array<unsigned char, 3> v2 = {1, 2, 3};
+  (void)v2.begin(); // no remark should be produced.
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/idiom-recognizer.cpp b/clang/test/CIR/Transforms/idiom-recognizer.cpp
new file mode 100644
index 000000000000..7264444cd98f
--- /dev/null
+++ b/clang/test/CIR/Transforms/idiom-recognizer.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-idiom-recognizer -emit-cir -I%S/../Inputs -mmlir --mlir-print-ir-after-all %s -o - 2>&1 | FileCheck %s -check-prefix=PASS_ENABLED
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -I%S/../Inputs -fclangir-idiom-recognizer="remarks=found-calls" -clangir-verify-diagnostics %s -o %t.cir
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -fclangir-idiom-recognizer -emit-cir -I%S/../Inputs -mmlir --mlir-print-ir-before=cir-idiom-recognizer %s -o - 2>&1 | FileCheck %s -check-prefix=BEFORE-IDIOM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -fclangir-idiom-recognizer -emit-cir -I%S/../Inputs -mmlir --mlir-print-ir-after=cir-idiom-recognizer %s -o - 2>&1 | FileCheck %s -check-prefix=AFTER-IDIOM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -fclangir-idiom-recognizer -emit-cir -I%S/../Inputs -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o - 2>&1 | FileCheck %s -check-prefix=AFTER-LOWERING-PREPARE
+
+// PASS_ENABLED:  IR Dump After IdiomRecognizer (cir-idiom-recognizer)
+
+#include "std-cxx.h"
+
+int test_find(unsigned char n = 3)
+{
+    unsigned num_found = 0;
+    std::array<unsigned char, 9> v = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto f = std::find(v.begin(), v.end(), n); // expected-remark {{found call to std::find()}}
+                                               // expected-remark@-1 {{found call to begin() iterator}}
+                                               // expected-remark@-2 {{found call to end() iterator}}
+
+    // BEFORE-IDIOM: {{.*}} cir.call @_ZNSt5arrayIhLj9EE5beginEv(
+    // AFTER-IDIOM: {{.*}} cir.iterator_begin(@_ZNSt5arrayIhLj9EE5beginEv,
+    // AFTER-LOWERING-PREPARE: {{.*}} cir.call @_ZNSt5arrayIhLj9EE5beginEv(
+
+    // BEFORE-IDIOM: {{.*}} cir.call @_ZNSt5arrayIhLj9EE3endEv(
+    // AFTER-IDIOM: {{.*}} cir.iterator_end(@_ZNSt5arrayIhLj9EE3endEv,
+    // AFTER-LOWERING-PREPARE: {{.*}} cir.call @_ZNSt5arrayIhLj9EE3endEv(
+
+    // BEFORE-IDIOM: {{.*}} cir.call @_ZSt4findIPhhET_S1_S1_RKT0_(
+    // AFTER-IDIOM: {{.*}} cir.std.find(@_ZSt4findIPhhET_S1_S1_RKT0_,
+    // AFTER-LOWERING-PREPARE: {{.*}} cir.call @_ZSt4findIPhhET_S1_S1_RKT0_(
+
+    if (f != v.end()) // expected-remark {{found call to end() iterator}}
+        num_found++;
+    return num_found;
+}
+
+namespace yolo {
+template<typename T, unsigned N> struct array {
+  T arr[N];
+  typedef T value_type;
+  typedef value_type* iterator;
+  constexpr iterator begin() { return iterator(arr); }
+};
+}
+
+int iter_test()
+{
+  yolo::array<unsigned char, 3> v = {1, 2, 3};
+  (void)v.begin(); // no remark should be produced.
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/if.cir b/clang/test/CIR/Transforms/if.cir
new file mode 100644
index 000000000000..7ca069fe9399
--- /dev/null
+++ b/clang/test/CIR/Transforms/if.cir
@@ -0,0 +1,48 @@
+// RUN: cir-opt %s -cir-flatten-cfg -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @foo(%arg0: !s32i) -> !s32i {
+    %4 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    cir.if %4 {
+      %5 = cir.const #cir.int<1> : !s32i
+      cir.return %5 : !s32i
+    } else {
+      %5 = cir.const #cir.int<0> : !s32i
+      cir.return %5 : !s32i
+    }
+    cir.return %arg0 : !s32i
+  }
+//      CHECK: cir.func @foo(%arg0: !s32i) -> !s32i {
+// CHECK-NEXT:   %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+// CHECK-NEXT:   cir.brcond %0 ^bb2, ^bb1
+// CHECK-NEXT: ^bb1:  // pred: ^bb0
+// CHECK-NEXT:   %1 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:   cir.return %1 : !s32i
+// CHECK-NEXT: ^bb2:  // pred: ^bb0
+// CHECK-NEXT:   %2 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   cir.return %2 : !s32i
+// CHECK-NEXT: ^bb3:  // no predecessors
+// CHECK-NEXT:   cir.return %arg0 : !s32i
+// CHECK-NEXT: }
+
+  cir.func @onlyIf(%arg0: !s32i) -> !s32i {
+    %4 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    cir.if %4 {
+      %5 = cir.const #cir.int<1> : !s32i
+      cir.return %5 : !s32i
+    }
+    cir.return %arg0 : !s32i
+  }
+//      CHECK: cir.func @onlyIf(%arg0: !s32i) -> !s32i {
+// CHECK-NEXT:   %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+// CHECK-NEXT:   cir.brcond %0 ^bb1, ^bb2
+// CHECK-NEXT: ^bb1:  // pred: ^bb0
+// CHECK-NEXT:   %1 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   cir.return %1 : !s32i
+// CHECK-NEXT: ^bb2:  // pred: ^bb0
+// CHECK-NEXT:   cir.return %arg0 : !s32i
+// CHECK-NEXT: }
+
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/lib-opt-find.cpp b/clang/test/CIR/Transforms/lib-opt-find.cpp
new file mode 100644
index 000000000000..c11daba10f28
--- /dev/null
+++ b/clang/test/CIR/Transforms/lib-opt-find.cpp
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -clangir-disable-emit-cxx-default -fclangir -fclangir-idiom-recognizer -fclangir-lib-opt -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "std-cxx.h"
+
+int test1(unsigned char n = 3)
+{
+    // CHECK: test1
+    unsigned num_found = 0;
+    // CHECK: %[[pattern_addr:.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, ["n"
+    std::array<unsigned char, 9> v = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+    auto f = std::find(v.begin(), v.end(), n);
+
+    // CHECK: %[[first:.*]] = cir.call @_ZNSt5arrayIhLj9EE5beginEv
+    // CHECK: %[[last:.*]] = cir.call @_ZNSt5arrayIhLj9EE3endEv
+    // CHECK: %[[cast_to_void:.*]] = cir.cast(bitcast, %[[first]] : !cir.ptr<!u8i>), !cir.ptr<!void>
+    // CHECK: %[[load_pattern:.*]] = cir.load %[[pattern_addr]] : !cir.ptr<!u8i>, !u8i
+    // CHECK: %[[pattern:.*]] = cir.cast(integral, %[[load_pattern:.*]] : !u8i), !s32i
+
+    // CHECK-NOT: {{.*}} cir.call @_ZSt4findIPhhET_S1_S1_RKT0_(
+    // CHECK: %[[array_size:.*]] = cir.const #cir.int<9> : !u64i
+
+    // CHECK: %[[result_cast:.*]] = cir.libc.memchr(%[[cast_to_void]], %[[pattern]], %[[array_size]])
+    // CHECK: %[[memchr_res:.*]] = cir.cast(bitcast, %[[result_cast]] : !cir.ptr<!void>), !cir.ptr<!u8i>
+    // CHECK: %[[nullptr:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!u8i>
+    // CHECK: %[[cmp_res:.*]] = cir.cmp(eq, %[[nullptr]], %[[memchr_res]]) : !cir.ptr<!u8i>, !cir.bool
+    // CHECK: cir.ternary(%[[cmp_res]], true {
+    // CHECK:   cir.yield %[[last]] : !cir.ptr<!u8i>
+    // CHECK: }, false {
+    // CHECK:   cir.yield %[[memchr_res]] : !cir.ptr<!u8i>
+    // CHECK: }) : (!cir.bool) -> !cir.ptr<!u8i>
+
+    if (f != v.end())
+        num_found++;
+
+    return num_found;
+}
+
+unsigned char* test2(unsigned char* first, unsigned char* last, unsigned char v)
+{
+    return std::find(first, last, v);
+    // CHECK: test2
+
+    // CHECK: %[[first_storage:.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["first", init]
+    // CHECK: %[[last_storage:.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["last", init]
+    // CHECK: %[[pattern_storage:.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, ["v", init]
+    // CHECK: %[[first:.*]] = cir.load %[[first_storage]]
+    // CHECK: %[[last:.*]] = cir.load %[[last_storage]]
+    // CHECK: %[[cast_to_void:.*]] = cir.cast(bitcast, %[[first]] : !cir.ptr<!u8i>), !cir.ptr<!void>
+    // CHECK: %[[load_pattern:.*]] = cir.load %[[pattern_storage]] : !cir.ptr<!u8i>, !u8i
+    // CHECK: %[[pattern:.*]] = cir.cast(integral, %[[load_pattern:.*]] : !u8i), !s32i
+
+    // CHECK-NOT: {{.*}} cir.call @_ZSt4findIPhhET_S1_S1_RKT0_(
+    // CHECK: %[[array_size:.*]] = cir.ptr_diff(%[[last]], %[[first]]) : !cir.ptr<!u8i> -> !u64i
+
+    // CHECK: %[[result_cast:.*]] = cir.libc.memchr(%[[cast_to_void]], %[[pattern]], %[[array_size]])
+    // CHECK: %[[memchr_res:.*]] = cir.cast(bitcast, %[[result_cast]] : !cir.ptr<!void>), !cir.ptr<!u8i>
+    // CHECK: %[[nullptr:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!u8i>
+    // CHECK: %[[cmp_res:.*]] = cir.cmp(eq, %[[nullptr]], %[[memchr_res]]) : !cir.ptr<!u8i>, !cir.bool
+    // CHECK: cir.ternary(%[[cmp_res]], true {
+    // CHECK:   cir.yield %[[last]] : !cir.ptr<!u8i>
+    // CHECK: }, false {
+    // CHECK:   cir.yield %[[memchr_res]] : !cir.ptr<!u8i>
+    // CHECK: }) : (!cir.bool) -> !cir.ptr<!u8i>
+}
diff --git a/clang/test/CIR/Transforms/lib-opt.cpp b/clang/test/CIR/Transforms/lib-opt.cpp
new file mode 100644
index 000000000000..17895e567645
--- /dev/null
+++ b/clang/test/CIR/Transforms/lib-opt.cpp
@@ -0,0 +1,3 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-idiom-recognizer -fclangir-lib-opt -emit-cir -mmlir --mlir-print-ir-after-all %s -o - 2>&1 | FileCheck %s -check-prefix=CIR
+
+// CIR: IR Dump After LibOpt (cir-lib-opt)
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/lifetime-check-agg.cpp b/clang/test/CIR/Transforms/lifetime-check-agg.cpp
new file mode 100644
index 000000000000..ebfe00c2ad56
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-check-agg.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -clangir-disable-emit-cxx-default -fclangir-lifetime-check="history=all;remarks=all" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir-analysis-only -fclangir-lifetime-check="history=all;remarks=all" %s -clangir-verify-diagnostics -emit-obj -o /dev/null
+
+typedef enum SType {
+  INFO_ENUM_0 = 9,
+  INFO_ENUM_1 = 2020,
+} SType;
+
+typedef struct InfoRaw {
+    SType type;
+    const void* __attribute__((__may_alias__)) next;
+    unsigned int fa;
+    unsigned f;
+    unsigned s;
+    unsigned w;
+    unsigned h;
+    unsigned g;
+    unsigned a;
+} InfoRaw;
+
+typedef unsigned long long FlagsPriv;
+typedef struct InfoPriv {
+    SType type;
+    void* __attribute__((__may_alias__)) next;
+    FlagsPriv flags;
+} InfoPriv;
+
+static const FlagsPriv PrivBit = 0x00000001;
+
+void escape_info(InfoRaw *info);
+typedef SType ( *FnPtr)(unsigned s, const InfoRaw* i);
+struct X {
+  struct entries {
+    FnPtr wildfn = nullptr;
+  };
+  static entries e;
+};
+
+void exploded_fields(bool cond, int c) {
+  for (int i = 0; i < c; i++) {
+    InfoRaw info = {INFO_ENUM_0}; // expected-note {{invalidated here}}
+    if (cond) {
+      InfoPriv privTmp = {INFO_ENUM_1};
+      privTmp.flags = PrivBit;
+      info.next = &privTmp;
+    } // expected-note {{pointee 'privTmp' invalidated at end of scope}}
+
+    // If the 'if' above is taken, info.next is invalidated at the end of the scope, otherwise
+    // it's also invalid because it was initialized with 'nullptr'. This could be a noisy
+    // check if calls like `escape_info` are used to further initialize `info`.
+
+    escape_info(&info); // expected-remark {{pset => { invalid, nullptr }}}
+                        // expected-warning@-1 {{passing aggregate containing invalid pointer member 'info.next'}}
+    X::e.wildfn(0, &info); // expected-remark {{pset => { invalid, nullptr }}}
+                           // expected-warning@-1 {{passing aggregate containing invalid pointer member 'info.next'}}
+  }
+}
+
+void exploded_fields1(bool cond, unsigned t) {
+  {
+    InfoRaw info = {INFO_ENUM_0, &t};
+    if (cond) {
+      InfoPriv privTmp = {INFO_ENUM_1};
+      privTmp.flags = PrivBit;
+      info.next = &privTmp;
+    }
+
+    // A warning is not emitted here, lack of context for inferring
+    // anything about `cond` would make it too noisy given `info.next`
+    // wasn't null initialized.
+
+    escape_info(&info); // expected-remark {{pset => { t }}}
+  }
+}
diff --git a/clang/test/CIR/Transforms/lifetime-check-coro-task.cpp b/clang/test/CIR/Transforms/lifetime-check-coro-task.cpp
new file mode 100644
index 000000000000..cf101b790491
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-check-coro-task.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -I%S/Inputs -fclangir -fclangir-lifetime-check="history=all;remarks=all;history_limit=1" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+#include "folly-coro.h"
+
+folly::coro::Task<int> go(int const& val);
+folly::coro::Task<int> go1() {
+  auto task = go(1); // expected-note {{coroutine bound to resource with expired lifetime}}
+                     // expected-note@-1 {{at the end of scope or full-expression}}
+  co_return co_await task; // expected-remark {{pset => { task, invalid }}}
+                           // expected-warning@-1 {{use of coroutine 'task' with dangling reference}}
+}
+
+folly::coro::Task<int> go1_lambda() {
+  auto task = [i = 3]() -> folly::coro::Task<int> { // expected-note {{coroutine bound to lambda with expired lifetime}}
+    co_return i;
+  }(); // expected-note {{at the end of scope or full-expression}}
+  co_return co_await task; // expected-remark {{pset => { task, invalid }}}
+                           // expected-warning@-1 {{use of coroutine 'task' with dangling reference}}
+}
+
+folly::coro::Task<int> go2_lambda() {
+  auto task = []() -> folly::coro::Task<int> { // expected-note {{coroutine bound to lambda with expired lifetime}}
+    co_return 3;
+  }(); // expected-note {{at the end of scope or full-expression}}
+  co_return co_await task; // expected-remark {{pset => { task, invalid }}}
+                           // expected-warning@-1 {{use of coroutine 'task' with dangling reference}}
+}
+
+folly::coro::Task<int> go3_lambda() {
+  auto* fn = +[](int const& i) -> folly::coro::Task<int> { co_return i; };
+  auto task = fn(3); // expected-note {{coroutine bound to resource with expired lifetime}}
+                     // expected-note@-1 {{at the end of scope or full-expression}}
+  co_return co_await task; // expected-remark {{pset => { task, invalid }}}
+                           // expected-warning@-1 {{use of coroutine 'task' with dangling reference}}
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/lifetime-check-lambda.cpp b/clang/test/CIR/Transforms/lifetime-check-lambda.cpp
new file mode 100644
index 000000000000..617e18edf499
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-check-lambda.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -I%S/Inputs -Wno-return-stack-address -fclangir -fclangir-lifetime-check="history=all;history_limit=1" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+// Check also implements:
+// EXP61-CPP. A lambda object must not outlive any of its reference captured objects
+
+// This can be diagnosed by clang with -Wreturn-stack-address
+auto g() {
+  int i = 12; // expected-note {{declared here but invalid after enclosing function ends}}
+  return [&] { // expected-warning {{returned lambda captures local variable}}
+    i += 100;
+    return i;
+  };
+}
+
+// This cannot be diagnosed by -Wreturn-stack-address
+auto g2() {
+  int i = 12; // expected-note {{declared here but invalid after enclosing function ends}}
+  auto lam = [&] {
+    i += 100;
+    return i;
+  };
+  return lam; // expected-warning {{returned lambda captures local variable}}
+}
+
+auto g3(int val) {
+  auto outer = [val] {
+    int i = val; // expected-note {{declared here but invalid after enclosing lambda ends}}
+    auto inner = [&] {
+      i += 30;
+      return i;
+    };
+    return inner; // expected-warning {{returned lambda captures local variable}}
+  };
+  return outer();
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/lifetime-check-owner.cpp b/clang/test/CIR/Transforms/lifetime-check-owner.cpp
new file mode 100644
index 000000000000..23643c821884
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-check-owner.cpp
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="history=all;remarks=all;history_limit=1" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+struct [[gsl::Owner(int)]] MyIntOwner {
+  int val;
+  MyIntOwner(int v) : val(v) {}
+  void changeInt(int i);
+  int &operator*();
+  int read() const;
+};
+
+struct [[gsl::Pointer(int)]] MyIntPointer {
+  int *ptr;
+  MyIntPointer(int *p = nullptr) : ptr(p) {}
+  MyIntPointer(const MyIntOwner &);
+  int &operator*();
+  MyIntOwner toOwner();
+  int read() { return *ptr; }
+};
+
+void yolo() {
+  MyIntPointer p;
+  {
+    MyIntOwner o(1);
+    p = o;
+    *p = 3; // expected-remark {{pset => { o__1' }}}
+  }       // expected-note {{pointee 'o' invalidated at end of scope}}
+  *p = 4; // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+}
+
+void yolo2() {
+  MyIntPointer p;
+  MyIntOwner o(1);
+  p = o;
+  (void)o.read();
+  (void)p.read(); // expected-remark {{pset => { o__1' }}}
+  o.changeInt(42); // expected-note {{invalidated by non-const use of owner type}}
+  (void)p.read(); // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+  p = o;
+  (void)p.read(); // expected-remark {{pset => { o__2' }}}
+  o.changeInt(33); // expected-note {{invalidated by non-const use of owner type}}
+  (void)p.read(); // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+}
+
+void yolo3() {
+  MyIntPointer p, q;
+  MyIntOwner o(1);
+  p = o;
+  q = o;
+  (void)q.read(); // expected-remark {{pset => { o__1' }}}
+  (void)p.read(); // expected-remark {{pset => { o__1' }}}
+  o.changeInt(42); // expected-note {{invalidated by non-const use of owner type}}
+  (void)p.read(); // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+  (void)q.read(); // expected-warning {{use of invalid pointer 'q'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+}
+
+void yolo4() {
+  MyIntOwner o0(1);
+  MyIntOwner o1(2);
+  MyIntPointer p{o0}, q{o1};
+  p.read(); // expected-remark {{pset => { o0__1' }}}
+  q.read(); // expected-remark {{pset => { o1__1' }}}
+  o0 = o1; // expected-note {{invalidated by non-const use of owner type}}
+  p.read(); // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+  q.read(); // expected-remark {{pset => { o1__1' }}}
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/lifetime-check-range-for-vector.cpp b/clang/test/CIR/Transforms/lifetime-check-range-for-vector.cpp
new file mode 100644
index 000000000000..e9c6d62b6f64
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-check-range-for-vector.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -mconstructor-aliases -fclangir -clangir-disable-emit-cxx-default -fclangir-lifetime-check="history=all" -fclangir-skip-system-headers -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+#include "std-cxx.h"
+
+// expected-no-diagnostics
+
+typedef enum SType {
+  INFO_ENUM_0 = 9,
+  INFO_ENUM_1 = 2020,
+} SType;
+
+typedef struct InfoRaw {
+    SType type;
+    const void* __attribute__((__may_alias__)) next;
+    unsigned u;
+} InfoRaw;
+
+void swappy(unsigned c) {
+  std::vector<InfoRaw> images(c);
+  for (auto& image : images) {
+    image = {INFO_ENUM_1};
+  }
+
+  std::vector<InfoRaw> images2(c);
+  for (unsigned i = 0; i < c; i++) {
+    images2[i] = {INFO_ENUM_1};
+  }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/lifetime-check-remarks.cpp b/clang/test/CIR/Transforms/lifetime-check-remarks.cpp
new file mode 100644
index 000000000000..83cef25c54da
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-check-remarks.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="remarks=pset-invalid" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+int *p0() {
+  int *p = nullptr;
+  {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+  return p;
+}
+
+int *p1(bool b = true) {
+  int *p = nullptr;
+  if (b) {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark@-1 {{pset => { invalid, nullptr }}}
+  return p;
+}
+
+void p2(int b) {
+  int *p = nullptr;
+  switch (int x = 0; b) {
+  case 1:
+    p = &x;
+  case 2:
+    *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+    // expected-remark@-1 {{pset => { nullptr }}}
+    break;
+  }
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark@-1 {{pset => { nullptr, invalid }}}
+}
diff --git a/clang/test/CIR/Transforms/lifetime-check-string.cpp b/clang/test/CIR/Transforms/lifetime-check-string.cpp
new file mode 100644
index 000000000000..383f3b5da626
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-check-string.cpp
@@ -0,0 +1,87 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -clangir-disable-emit-cxx-default -fclangir-lifetime-check="history=all;remarks=all" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+int strlen(char const *);
+
+struct [[gsl::Owner(char *)]] String {
+  long size;
+  long capacity;
+  const char *storage;
+  char operator[](int);
+  String() : size{0}, capacity{0} {}
+  String(char const *s) : size{strlen(s)}, capacity{size}, storage{s} {}
+};
+
+struct [[gsl::Pointer(int)]] StringView {
+  long size;
+  const char *storage;
+  char operator[](int);
+  StringView(const String &s) : size{s.size}, storage{s.storage} {}
+  StringView() : size{0}, storage{nullptr} {}
+  int getSize() const;
+};
+
+void sv0() {
+  StringView sv;
+  String name = "abcdefghijklmnop";
+  sv = name;
+  (void)sv.getSize(); // expected-remark {{pset => { name__1' }}}
+  name = "frobozz"; // expected-note {{invalidated by non-const use of owner type}}
+  (void)sv.getSize(); // expected-warning {{use of invalid pointer 'sv'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+  sv = name;
+  (void)sv.getSize(); // expected-remark {{pset => { name__2' }}}
+}
+
+void sv1() {
+  StringView sv, sv_other;
+  String name = "abcdefghijklmnop";
+  sv = name;
+  sv_other = sv;
+  (void)sv.getSize();  // expected-remark {{pset => { name__1' }}}
+  (void)sv_other.getSize();  // expected-remark {{pset => { name__1' }}}
+  name = "frobozz"; // expected-note {{invalidated by non-const use of owner type}}
+  (void)sv.getSize(); // expected-warning {{use of invalid pointer 'sv'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+  (void)sv_other.getSize(); // expected-warning {{use of invalid pointer 'sv_other'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+  sv = name;
+  (void)sv.getSize(); // expected-remark {{pset => { name__2' }}}
+}
+
+void sv2() {
+  StringView sv;
+  String name = "abcdefghijklmnop";
+  sv = name;
+  char read0 = sv[0]; // expected-remark {{pset => { name__1' }}}
+  name = "frobozz"; // expected-note {{invalidated by non-const use of owner type}}
+  char read1 = sv[0]; // expected-warning {{use of invalid pointer 'sv'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+  sv = name;
+  char read2 = sv[0]; // expected-remark {{pset => { name__2' }}}
+  char read3 = name[1]; // expected-note {{invalidated by non-const use of owner type}}
+  char read4 = sv[1]; // expected-warning {{use of invalid pointer 'sv'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+}
+
+class Stream {
+ public:
+  Stream& operator<<(char);
+  Stream& operator<<(const StringView &);
+  // FIXME: conservative for now, but do not invalidate const Owners?
+  Stream& operator<<(const String &);
+};
+
+void sv3() {
+  Stream cout;
+  StringView sv;
+  String name = "abcdefghijklmnop";
+  sv = name;
+  cout << sv; // expected-remark {{pset => { name__1' }}}
+  name = "frobozz"; // expected-note {{invalidated by non-const use of owner type}}
+  cout << sv[2]; // expected-warning {{use of invalid pointer 'sv'}}
+  sv = name; // expected-remark@-1 {{pset => { invalid }}}
+  cout << sv; // expected-remark {{pset => { name__2' }}}
+  cout << name; // expected-note {{invalidated by non-const use of owner type}}
+  cout << sv; // expected-warning {{passing invalid pointer 'sv'}}
+  // expected-remark@-1 {{pset => { invalid }}}
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/lifetime-check.cpp b/clang/test/CIR/Transforms/lifetime-check.cpp
new file mode 100644
index 000000000000..017de9f6495d
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-check.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="history=invalid,null" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+int *p0() {
+  int *p = nullptr;
+  {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+  return p;
+}
+
+int *p1(bool b = true) {
+  int *p = nullptr; // expected-note {{invalidated here}}
+  if (b) {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+  return p;
+}
+
+void p2() {
+  int *p = nullptr; // expected-note {{invalidated here}}
+  *p = 42;          // expected-warning {{use of invalid pointer 'p'}}
+}
+
+void p3() {
+  int *p;
+  p = nullptr; // expected-note {{invalidated here}}
+  *p = 42;     // expected-warning {{use of invalid pointer 'p'}}
+}
+
+void p4() {
+  int *p;  // expected-note {{uninitialized here}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+}
+
+void p5() {
+  int *p = nullptr;
+  {
+    int a[10];
+    p = &a[0];
+  }        // expected-note {{pointee 'a' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+}
diff --git a/clang/test/CIR/Transforms/lifetime-fn-args.cpp b/clang/test/CIR/Transforms/lifetime-fn-args.cpp
new file mode 100644
index 000000000000..6c1b297f1b32
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-fn-args.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -clangir-disable-emit-cxx-default -fclangir-lifetime-check="history=all;remarks=all" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+struct A {
+  void* ctx;
+  void setInfo(void** ctxPtr);
+};
+
+void A::setInfo(void** ctxPtr) {
+  if (ctxPtr != nullptr) {
+    *ctxPtr = ctx; // expected-remark {{pset => { fn_arg:1 }}}
+  }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/lifetime-invalid-option.cpp b/clang/test/CIR/Transforms/lifetime-invalid-option.cpp
new file mode 100644
index 000000000000..64486b6aa166
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-invalid-option.cpp
@@ -0,0 +1,3 @@
+// RUN: not %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="yolo=invalid,null" -emit-cir %s -o - 2>&1 | FileCheck %s
+
+// CHECK: clangir pass option 'yolo=invalid,null' not recognized
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/lifetime-loop-valid.cpp b/clang/test/CIR/Transforms/lifetime-loop-valid.cpp
new file mode 100644
index 000000000000..e7ee7aca7cf3
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-loop-valid.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="history=invalid,null;remarks=pset-always" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+// Loops that do not change psets
+
+// p1179r1: 2.4.9.1
+// No diagnostic needed, pset(p) = {a} before and after the loop
+void valid0(bool b, int j) {
+  int a[10];
+  int *p = &a[0];
+  while (j) {
+    if (b) {
+      p = &a[j];
+    }
+    j = j - 1;
+  }
+  *p = 12; // expected-remark {{pset => { a }}}
+}
+
+// p1179r1: 2.4.9.2
+void valid1(bool b, int j) {
+  int a[4], c[5];
+  int *p = &a[0];
+  while (j) {
+    if (b) {
+      p = &c[j];
+    }
+    j = j - 1;
+  }
+  *p = 0; // expected-remark {{pset => { a, c }}}
+
+  while (j) {
+    if (b) {
+      p = &c[j];
+    }
+    j = j - 1;
+  }
+  *p = 0; // expected-remark {{pset => { a, c }}}
+}
diff --git a/clang/test/CIR/Transforms/lifetime-loop.cpp b/clang/test/CIR/Transforms/lifetime-loop.cpp
new file mode 100644
index 000000000000..cf58ddf48f73
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-loop.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="history=invalid,null;remarks=pset-invalid" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+void loop_basic_for() {
+  int *p = nullptr; // expected-note {{invalidated here}}
+  for (int i = 0; i < 10; i = i + 1) {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+           // expected-remark@-1 {{pset => { nullptr, invalid }}}
+}
+
+void loop_basic_while() {
+  int *p = nullptr; // expected-note {{invalidated here}}
+  int i = 0;
+  while (i < 10) {
+    int x = 0;
+    p = &x;
+    *p = 42;
+    i = i + 1;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+           // expected-remark@-1 {{pset => { nullptr, invalid }}}
+}
+
+void loop_basic_dowhile() {
+  int *p = nullptr; // expected-note {{invalidated here}}
+  int i = 0;
+  do {
+    int x = 0;
+    p = &x;
+    *p = 42;
+    i = i + 1;
+  } while (i < 10); // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42;          // expected-warning {{use of invalid pointer 'p'}}
+                    // expected-remark@-1 {{pset => { nullptr, invalid }}}
+}
+
+// p1179r1: 2.4.9.3
+void loop0(bool b, int j) {
+  int a[4], c[4];
+  int *p = &a[0];
+  while (j) {
+    // This access is invalidated after the first iteration
+    *p = 42;     // expected-warning {{use of invalid pointer 'p'}}
+                 // expected-remark@-1 {{pset => { c, nullptr }}}
+    p = nullptr; // expected-note {{invalidated here}}
+    if (b) {
+      p = &c[j];
+    }
+    j = j - 1;
+  }
+  *p = 0; // expected-warning {{use of invalid pointer 'p'}}
+          // expected-remark@-1 {{pset => { a, c, nullptr }}}
+}
diff --git a/clang/test/CIR/Transforms/lifetime-null-passing.cpp b/clang/test/CIR/Transforms/lifetime-null-passing.cpp
new file mode 100644
index 000000000000..e26210b56234
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-null-passing.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -clangir-disable-emit-cxx-default -fclangir-lifetime-check="history=all" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+class _j {};
+typedef _j* jobj;
+
+typedef enum SType {
+  INFO_ENUM_0 = 9,
+  INFO_ENUM_1 = 2020,
+} SType;
+
+typedef SType ( *FnPtr2)(unsigned session, jobj* surface);
+
+struct X {
+  struct entries {
+    FnPtr2 wildfn = nullptr;
+  };
+  static entries e;
+};
+
+void nullpassing() {
+  jobj o = nullptr;
+  X::e.wildfn(0, &o);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/lifetime-switch.cpp b/clang/test/CIR/Transforms/lifetime-switch.cpp
new file mode 100644
index 000000000000..ca56b95f71a0
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-switch.cpp
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="history=invalid,null" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+void s0(int b) {
+  int *p = nullptr;
+  switch (b) {
+  default: {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  } // expected-note {{pointee 'x' invalidated at end of scope}}
+  }
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+}
+
+void s1(int b) {
+  int *p = nullptr;
+  switch (b) {
+  default:
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+}
+
+void s2(int b) {
+  int *p = nullptr;
+  switch (int x = 0; b) {
+  default:
+    p = &x;
+    *p = 42;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+}
+
+void s3(int b) {
+  int *p = nullptr; // expected-note {{invalidated here}}
+  switch (int x = 0; b) {
+  case 1:
+    p = &x;
+  case 2:
+    *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+    break;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+}
diff --git a/clang/test/CIR/Transforms/lifetime-this.cpp b/clang/test/CIR/Transforms/lifetime-this.cpp
new file mode 100644
index 000000000000..8e18af8a9e16
--- /dev/null
+++ b/clang/test/CIR/Transforms/lifetime-this.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -mconstructor-aliases -fclangir -clangir-disable-emit-cxx-default -fclangir-lifetime-check="history=all;remarks=all" -fclangir-skip-system-headers -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+#include "std-cxx.h"
+
+struct S {
+  S(int, int, const S* s);
+  void f(int a, int b);
+};
+
+void S::f(int a, int b) {
+  std::shared_ptr<S> l = std::make_shared<S>(a, b, this); // expected-remark {{pset => { this }}}
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/loop.cir b/clang/test/CIR/Transforms/loop.cir
new file mode 100644
index 000000000000..8204216b6f52
--- /dev/null
+++ b/clang/test/CIR/Transforms/loop.cir
@@ -0,0 +1,122 @@
+// RUN: cir-opt %s -cir-flatten-cfg -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+
+  cir.func @testFor(%arg0 : !cir.bool) {
+    cir.for : cond {
+      cir.condition(%arg0)
+    } body {
+      cir.yield
+    } step {
+      cir.yield
+    }
+    cir.return
+  }
+// CHECK:  cir.func @testFor(%arg0: !cir.bool) {
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#STEP:]]
+// CHECK:  ^bb[[#STEP]]:
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  // Test while cir.loop operation lowering.
+  cir.func @testWhile(%arg0 : !cir.bool) {
+    cir.while {
+      cir.condition(%arg0)
+    } do {
+      cir.yield
+    }
+    cir.return
+  }
+// CHECK:  cir.func @testWhile(%arg0: !cir.bool) {
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  // Test do-while cir.loop operation lowering.
+  cir.func @testDoWhile(%arg0 : !cir.bool) {
+    cir.do {
+      cir.yield
+    } while {
+      cir.condition(%arg0)
+    }
+    cir.return
+  }
+// CHECK:  cir.func @testDoWhile(%arg0: !cir.bool) {
+// CHECK:    cir.br ^bb[[#BODY:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  // test corner case 
+  // while (1) {
+  //     break;
+  // }
+  cir.func @testWhileWithBreakTerminatedBody(%arg0 : !cir.bool) {
+    cir.while {
+      cir.condition(%arg0)
+    } do {
+      cir.break
+    }
+    cir.return
+  }
+// CHECK:  cir.func @testWhileWithBreakTerminatedBody(%arg0: !cir.bool) {
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  // test C only corner case - no fails during the lowering
+  // for (;;) {
+  //     break;
+  // }
+  cir.func @forWithBreakTerminatedScopeInBody(%arg0 : !cir.bool) {
+      cir.for : cond {
+        cir.condition(%arg0)
+      } body {
+        cir.scope { // FIXME(cir): Redundant scope emitted during C codegen.
+          cir.break
+        }
+        cir.yield
+      } step {
+        cir.yield
+      }
+    cir.return
+  }
+// CHECK:  cir.func @forWithBreakTerminatedScopeInBody(%arg0: !cir.bool) {
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#EX_SCOPE_IN:]]
+// CHECK:  ^bb[[#EX_SCOPE_IN]]:
+// CHECK:    cir.br ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#EX_SCOPE_EXIT:]]:
+// CHECK:    cir.br ^bb[[#STEP:]]
+// CHECK:  ^bb[[#STEP]]:
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+}  
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/mem2reg.c b/clang/test/CIR/Transforms/mem2reg.c
new file mode 100644
index 000000000000..83c975fd6d13
--- /dev/null
+++ b/clang/test/CIR/Transforms/mem2reg.c
@@ -0,0 +1,195 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir 
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fclangir-mem2reg %s -o %t.cir 
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=MEM2REG
+
+int return_42() {
+  int y = 42;
+  return y;  
+}
+
+// BEFORE: cir.func {{.*@return_42}}
+// BEFORE:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// BEFORE:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+// BEFORE:   %2 = cir.const #cir.int<42> : !s32i
+// BEFORE:   cir.store %2, %1 : !s32i, !cir.ptr<!s32i> 
+// BEFORE:   %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// BEFORE:   cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+// BEFORE:   %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// BEFORE:   cir.return %4 : !s32i
+
+// MEM2REG:  cir.func {{.*@return_42()}}
+// MEM2REG:    %0 = cir.const #cir.int<42> : !s32i
+// MEM2REG:    cir.return %0 : !s32i
+
+void alloca_in_loop(int* ar, int n) {
+  for (int i = 0; i < n; ++i) {
+    int a = 4;
+    ar[i] = a;
+  }
+}
+
+// BEFORE:  cir.func {{.*@alloca_in_loop}}
+// BEFORE:    %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ar", init] {alignment = 8 : i64}
+// BEFORE:    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["n", init] {alignment = 4 : i64}
+// BEFORE:    cir.store %arg0, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// BEFORE:    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+// BEFORE:    cir.scope {
+// BEFORE:      %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// BEFORE:      %3 = cir.const #cir.int<0> : !s32i
+// BEFORE:      cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:      cir.for : cond {
+// BEFORE:        %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %6 = cir.cmp(lt, %4, %5) : !s32i, !s32i
+// BEFORE:        %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+// BEFORE:        cir.condition(%7)
+// BEFORE:      } body {
+// BEFORE:        cir.scope {
+// BEFORE:          %4 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// BEFORE:          %5 = cir.const #cir.int<4> : !s32i
+// BEFORE:          cir.store %5, %4 : !s32i, !cir.ptr<!s32i>
+// BEFORE:          %6 = cir.load %4 : !cir.ptr<!s32i>, !s32i
+// BEFORE:          %7 = cir.load %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// BEFORE:          %8 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// BEFORE:          %9 = cir.ptr_stride(%7 : !cir.ptr<!s32i>, %8 : !s32i), !cir.ptr<!s32i>
+// BEFORE:          cir.store %6, %9 : !s32i, !cir.ptr<!s32i>
+// BEFORE:        }
+// BEFORE:        cir.yield
+// BEFORE:      } step {
+// BEFORE:        %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %5 = cir.unary(inc, %4) : !s32i, !s32i
+// BEFORE:        cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:        cir.yield
+// BEFORE:      }
+// BEFORE:    }
+// BEFORE:    cir.return  
+
+// MEM2REG:  cir.func {{.*@alloca_in_loop}}
+// MEM2REG:    cir.br ^bb1
+// MEM2REG:  ^bb1:  // pred: ^bb0
+// MEM2REG:    %0 = cir.const #cir.int<0> : !s32i
+// MEM2REG:    cir.br ^bb2(%0 : !s32i)
+// MEM2REG:  ^bb2(%1: !s32i{{.*}}):  // 2 preds: ^bb1, ^bb6
+// MEM2REG:    %2 = cir.cmp(lt, %1, %arg1) : !s32i, !s32i
+// MEM2REG:    %3 = cir.cast(int_to_bool, %2 : !s32i), !cir.bool
+// MEM2REG:    cir.brcond %3 ^bb3, ^bb7
+// MEM2REG:  ^bb3:  // pred: ^bb2
+// MEM2REG:    cir.br ^bb4
+// MEM2REG:  ^bb4:  // pred: ^bb3
+// MEM2REG:    %4 = cir.const #cir.int<4> : !s32i
+// MEM2REG:    %5 = cir.ptr_stride(%arg0 : !cir.ptr<!s32i>, %1 : !s32i), !cir.ptr<!s32i>
+// MEM2REG:    cir.store %4, %5 : !s32i, !cir.ptr<!s32i>
+// MEM2REG:    cir.br ^bb5
+// MEM2REG:  ^bb5:  // pred: ^bb4
+// MEM2REG:    cir.br ^bb6
+// MEM2REG:  ^bb6:  // pred: ^bb5
+// MEM2REG:    %6 = cir.unary(inc, %1) : !s32i, !s32i
+// MEM2REG:    cir.br ^bb2(%6 : !s32i)
+// MEM2REG:  ^bb7:  // pred: ^bb2
+// MEM2REG:    cir.br ^bb8
+// MEM2REG:  ^bb8:  // pred: ^bb7
+// MEM2REG:    cir.return
+
+
+int alloca_in_ifelse(int x) {
+  int y = 0;
+  if (x > 42) {
+    int z = 2;
+    y = x * z;
+  } else  {
+    int z = 3;
+    y = x * z;
+  }
+
+  y = y + 1;
+  return y;
+}
+
+// BEFORE:  cir.func {{.*@alloca_in_ifelse}}
+// BEFORE:    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// BEFORE:    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// BEFORE:    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+// BEFORE:    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// BEFORE:    %3 = cir.const #cir.int<0> : !s32i
+// BEFORE:    cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:    cir.scope {
+// BEFORE:      %9 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// BEFORE:      %10 = cir.const #cir.int<42> : !s32i
+// BEFORE:      %11 = cir.cmp(gt, %9, %10) : !s32i, !s32i
+// BEFORE:      %12 = cir.cast(int_to_bool, %11 : !s32i), !cir.bool
+// BEFORE:      cir.if %12 {
+// BEFORE:        %13 = cir.alloca !s32i, !cir.ptr<!s32i>, ["z", init] {alignment = 4 : i64}
+// BEFORE:        %14 = cir.const #cir.int<2> : !s32i
+// BEFORE:        cir.store %14, %13 : !s32i, !cir.ptr<!s32i>
+// BEFORE:        %15 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %16 = cir.load %13 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %17 = cir.binop(mul, %15, %16) nsw : !s32i
+// BEFORE:        cir.store %17, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:      } else {
+// BEFORE:        %13 = cir.alloca !s32i, !cir.ptr<!s32i>, ["z", init] {alignment = 4 : i64}
+// BEFORE:        %14 = cir.const #cir.int<3> : !s32i
+// BEFORE:        cir.store %14, %13 : !s32i, !cir.ptr<!s32i>
+// BEFORE:        %15 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %16 = cir.load %13 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %17 = cir.binop(mul, %15, %16) nsw : !s32i
+// BEFORE:        cir.store %17, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:     }
+// BEFORE:    }
+// BEFORE:    %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// BEFORE:    %5 = cir.const #cir.int<1> : !s32i
+// BEFORE:    %6 = cir.binop(add, %4, %5) nsw : !s32i
+// BEFORE:    cir.store %6, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:    %7 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// BEFORE:    cir.store %7, %1 : !s32i, !cir.ptr<!s32i>
+// BEFORE:    %8 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// BEFORE:    cir.return %8 : !s32i
+
+// MEM2REG:  cir.func {{.*@alloca_in_ifelse}}
+// MEM2REG:    %0 = cir.const #cir.int<0> : !s32i
+// MEM2REG:    cir.br ^bb1
+// MEM2REG:  ^bb1:  // pred: ^bb0
+// MEM2REG:    %1 = cir.const #cir.int<42> : !s32i
+// MEM2REG:    %2 = cir.cmp(gt, %arg0, %1) : !s32i, !s32i
+// MEM2REG:    %3 = cir.cast(int_to_bool, %2 : !s32i), !cir.bool
+// MEM2REG:    cir.brcond %3 ^bb3, ^bb2
+// MEM2REG:  ^bb2:  // pred: ^bb1
+// MEM2REG:    %4 = cir.const #cir.int<3> : !s32i
+// MEM2REG:    %5 = cir.binop(mul, %arg0, %4) nsw : !s32i
+// MEM2REG:    cir.br ^bb4(%5 : !s32i)
+// MEM2REG:  ^bb3:  // pred: ^bb1
+// MEM2REG:    %6 = cir.const #cir.int<2> : !s32i
+// MEM2REG:    %7 = cir.binop(mul, %arg0, %6) nsw : !s32i
+// MEM2REG:    cir.br ^bb4(%7 : !s32i)
+// MEM2REG:  ^bb4(%8: !s32i{{.*}}):  // 2 preds: ^bb2, ^bb3
+// MEM2REG:    cir.br ^bb5
+// MEM2REG:  ^bb5:  // pred: ^bb4
+// MEM2REG:    %9 = cir.const #cir.int<1> : !s32i
+// MEM2REG:    %10 = cir.binop(add, %8, %9) nsw : !s32i
+// MEM2REG:    cir.return %10 : !s32i
+// MEM2REG:  }
+
+
+
+
+typedef __SIZE_TYPE__ size_t;
+void *alloca(size_t size);
+  
+void test_bitcast(size_t n) {
+  int *c1 = alloca(n);
+}
+
+// BEFORE:  cir.func {{.*@test_bitcast}}
+// BEFORE:    %0 = cir.alloca !u64i, !cir.ptr<!u64i>, ["n", init] {alignment = 8 : i64}
+// BEFORE:    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["c1", init] {alignment = 8 : i64}
+// BEFORE:    cir.store %arg0, %0 : !u64i, !cir.ptr<!u64i>
+// BEFORE:    %2 = cir.load %0 : !cir.ptr<!u64i>, !u64i
+// BEFORE:    %3 = cir.alloca !u8i, !cir.ptr<!u8i>, %2 : !u64i, ["bi_alloca"] {alignment = 16 : i64}
+// BEFORE:    %4 = cir.cast(bitcast, %3 : !cir.ptr<!u8i>), !cir.ptr<!void>
+// BEFORE:    %5 = cir.cast(bitcast, %4 : !cir.ptr<!void>), !cir.ptr<!s32i>
+// BEFORE:    cir.store %5, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// BEFORE:    cir.return
+  
+// MEM2REG:  cir.func {{.*@test_bitcast}}
+// MEM2REG:    cir.return
+// MEM2REG:  }
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/mem2reg.cir b/clang/test/CIR/Transforms/mem2reg.cir
new file mode 100644
index 000000000000..dca55d3c3068
--- /dev/null
+++ b/clang/test/CIR/Transforms/mem2reg.cir
@@ -0,0 +1,31 @@
+// RUN: cir-opt %s -cir-flatten-cfg -mem2reg -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!u64i = !cir.int<u, 64>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+module {
+
+  //  ====   Simple case
+  // C code
+  // int return_42() {
+  //   int y = 42;
+  //   return y;  
+  // }
+  cir.func @return_42() -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %2 = cir.const #cir.int<42> : !s32i
+    cir.store %2, %1 : !s32i, !cir.ptr<!s32i> 
+    %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+    %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %4 : !s32i
+  }
+  // CHECK:  cir.func @return_42() -> !s32i {
+  // CHECK:    %0 = cir.const #cir.int<42> : !s32i
+  // CHECK:    cir.return %0 : !s32i
+  // CHECK:  }
+
+} //module
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/merge-cleanups.cir b/clang/test/CIR/Transforms/merge-cleanups.cir
new file mode 100644
index 000000000000..c9d927b7cae7
--- /dev/null
+++ b/clang/test/CIR/Transforms/merge-cleanups.cir
@@ -0,0 +1,138 @@
+// RUN: cir-opt %s -cir-canonicalize -o %t.out.cir
+// RUN: FileCheck --input-file=%t.out.cir %s
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+!s32i = !cir.int<s, 32>
+module  {
+  cir.func @sw1(%arg0: !s32i, %arg1: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["c", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+      %3 = cir.const #cir.int<1> : !s32i
+      cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+      %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.switch (%4 : !s32i) [
+      case (equal, 0 : !s32i)  {
+        %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %6 = cir.const #cir.int<1> : !s32i
+        %7 = cir.binop(add, %5, %6) : !s32i
+        cir.store %7, %2 : !s32i, !cir.ptr<!s32i>
+        cir.br ^bb1
+      ^bb1:  // pred: ^bb0
+        cir.return
+      },
+      case (equal, 1 : !s32i)  {
+        cir.scope {
+          cir.scope {
+            %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+            %6 = cir.const #cir.int<3> : !s32i
+            %7 = cir.cmp(eq, %5, %6) : !s32i, !cir.bool
+            cir.if %7 {
+              cir.br ^bb1
+            ^bb1:  // pred: ^bb0
+              cir.return
+            }
+          }
+          cir.break
+        }
+        cir.yield
+      },
+      case (equal, 2 : !s32i)  {
+        cir.scope {
+          %5 = cir.alloca !s32i, !cir.ptr<!s32i>, ["yolo", init] {alignment = 4 : i64}
+          %6 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+          %7 = cir.const #cir.int<1> : !s32i
+          %8 = cir.binop(add, %6, %7) : !s32i
+          cir.store %8, %2 : !s32i, !cir.ptr<!s32i>
+          %9 = cir.const #cir.int<100> : !s32i
+          cir.store %9, %5 : !s32i, !cir.ptr<!s32i>
+          cir.br ^bb1
+        ^bb1:  // pred: ^bb0
+          cir.return
+        }
+        cir.yield
+      }
+      ]
+    }
+    cir.return
+  }
+
+// CHECK: cir.switch (%4 : !s32i) [
+// CHECK-NEXT:   case (equal, 0)  {
+// CHECK-NEXT:     %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     %6 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:     %7 = cir.binop(add, %5, %6) : !s32i
+// CHECK-NEXT:     cir.store %7, %2 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     cir.return
+// CHECK-NEXT:   },
+// CHECK-NEXT:   case (equal, 1)  {
+// CHECK-NEXT:     cir.scope {
+// CHECK-NEXT:       cir.scope {
+// CHECK-NEXT:         %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:         %6 = cir.const #cir.int<3> : !s32i
+// CHECK-NEXT:         %7 = cir.cmp(eq, %5, %6) : !s32i, !cir.bool
+// CHECK-NEXT:         cir.if %7 {
+// CHECK-NEXT:           cir.return
+// CHECK-NEXT:         }
+// CHECK-NEXT:       }
+// CHECK-NEXT:       cir.break
+// CHECK-NEXT:     }
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   },
+// CHECK-NEXT:   case (equal, 2)  {
+// CHECK-NEXT:     cir.scope {
+// CHECK-NEXT:       %5 = cir.alloca !s32i, !cir.ptr<!s32i>, ["yolo", init] {alignment = 4 : i64}
+// CHECK-NEXT:       %6 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:       %7 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       %8 = cir.binop(add, %6, %7) : !s32i
+// CHECK-NEXT:       cir.store %8, %2 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       %9 = cir.const #cir.int<100> : !s32i
+// CHECK-NEXT:       cir.store %9, %5 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       cir.return
+// CHECK-NEXT:     }
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+  // Should remove empty scopes.
+  cir.func @removeEmptyScope() {
+    cir.scope {
+    }
+    cir.return
+  }
+  //      CHECK: cir.func @removeEmptyScope
+  // CHECK-NEXT: cir.return
+
+  // Should remove empty switch-case statements.
+  cir.func @removeEmptySwitch(%arg0: !s32i) {
+  //      CHECK: cir.func @removeEmptySwitch
+    cir.switch (%arg0 : !s32i) [
+    ]
+    // CHECK-NOT: cir.switch
+    cir.return
+    // CHECK: cir.return
+  }
+
+  // Should remove redundant bitcasts.
+  // CHECK-LABEL: @ptrbitcastfold
+  //       CHECK:  %[[ARG0:.+]]: !cir.ptr<!s32i>
+  //       CHECK:  cir.return %[[ARG0]] : !cir.ptr<!s32i>
+  cir.func @ptrbitcastfold(%arg0: !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {
+    %0 = cir.cast(bitcast, %arg0: !cir.ptr<!s32i>), !cir.ptr<!s32i>
+    cir.return %0 : !cir.ptr<!s32i>
+  }
+
+  // Should remove redundant address space casts.
+  // CHECK-LABEL: @addrspacecastfold
+  //       CHECK:  %[[ARG0:.+]]: !cir.ptr<!s32i, addrspace(target<2>)>
+  //       CHECK:  cir.return %[[ARG0]] : !cir.ptr<!s32i, addrspace(target<2>)>
+  cir.func @addrspacecastfold(%arg0: !cir.ptr<!s32i, addrspace(target<2>)>) -> !cir.ptr<!s32i, addrspace(target<2>)> {
+    %0 = cir.cast(address_space, %arg0: !cir.ptr<!s32i, addrspace(target<2>)>), !cir.ptr<!s32i, addrspace(target<2>)>
+    cir.return %0 : !cir.ptr<!s32i, addrspace(target<2>)>
+  }
+
+}
diff --git a/clang/test/CIR/Transforms/scf-prepare.cir b/clang/test/CIR/Transforms/scf-prepare.cir
new file mode 100644
index 000000000000..063420b1c516
--- /dev/null
+++ b/clang/test/CIR/Transforms/scf-prepare.cir
@@ -0,0 +1,211 @@
+// RUN: cir-opt %s -cir-mlir-scf-prepare -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.global "private" external @a : !cir.array<!s32i x 100>
+
+  // for (int i = l; u > i; ++i)
+  //   a[i] = 3;
+  //
+  // Check that the loop boundary been hoisted out of loop and the comparison
+  // been transferred from gt to lt.
+  cir.func @variableLoopBound(%arg0: !s32i, %arg1: !s32i) {
+    // CHECK: %[[BOUND:.*]] = cir.load %[[BOUND_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
+    // CHECK: cir.for : cond {
+    // CHECK:   %[[IV:.*]] = cir.load %[[IV_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
+    // CHECK:   %[[COND:.*]] = cir.cmp(lt, %[[IV]], %4) : !s32i, !s32i
+
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["l", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["u", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %6 = cir.cmp(gt, %4, %5) : !s32i, !s32i
+        %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+        cir.condition(%7)
+      } body {
+        %4 = cir.const #cir.int<3> : !s32i
+        %5 = cir.get_global @a : !cir.ptr<!cir.array<!s32i x 100>>
+        %6 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %7 = cir.cast(array_to_ptrdecay, %5 : !cir.ptr<!cir.array<!s32i x 100>>), !cir.ptr<!s32i>
+        %8 = cir.ptr_stride(%7 : !cir.ptr<!s32i>, %6 : !s32i), !cir.ptr<!s32i>
+        cir.store %4, %8 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      } step {
+        %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.unary(inc, %4) : !s32i, !s32i
+        cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // for (int i = 0; 50 >= i; ++i)
+  //   a[i] = 3;
+  //
+  // Check that the loop boundary been hoisted out of loop and the comparison
+  // been transferred from ge to le.
+  cir.func @constantLoopBound() {
+    // CHECK: %[[BOUND:.*]] = cir.const #cir.int<50> : !s32i
+    // CHECK: cir.for : cond {
+    // CHECK:   %[[IV:.*]] = cir.load %[[IV_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
+    // CHECK:   %[[COND:.*]] = cir.cmp(le, %[[IV]], %[[BOUND]]) : !s32i, !s32i
+ 
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<0> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.const #cir.int<50> : !s32i
+        %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %4 = cir.cmp(ge, %2, %3) : !s32i, !s32i
+        %5 = cir.cast(int_to_bool, %4 : !s32i), !cir.bool
+        cir.condition(%5)
+      } body {
+        %2 = cir.const #cir.int<3> : !s32i
+        %3 = cir.get_global @a : !cir.ptr<!cir.array<!s32i x 100>>
+        %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.cast(array_to_ptrdecay, %3 : !cir.ptr<!cir.array<!s32i x 100>>), !cir.ptr<!s32i>
+        %6 = cir.ptr_stride(%5 : !cir.ptr<!s32i>, %4 : !s32i), !cir.ptr<!s32i>
+        cir.store %2, %6 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // for (int i = l; u > i; ++i) {
+  //   --u;
+  //   a[i] = 3;
+  // }
+  //
+  // Check that the loop boundary not been hoisted because it's not loop
+  // invariant and the loop comparison been transferred from gt to lt.
+  cir.func @variableLoopBoundNotLoopInvariant(%arg0: !s32i, %arg1: !s32i) {
+    // CHECK: cir.store %[[IV_INIT:.*]], %[[IV_ADDR:.*]] : !s32i, !cir.ptr<!s32i>
+    // CHECK: cir.for : cond {
+    // CHECK:   %[[BOUND:.*]] = cir.load %[[BOUND_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
+    // CHECK:   %[[IV:.*]] = cir.load %[[IV_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
+    // CHECK:   %[[COND:.*]] = cir.cmp(lt, %[[IV]], %[[BOUND]]) : !s32i, !s32i
+
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["l", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["u", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %6 = cir.cmp(gt, %4, %5) : !s32i, !s32i
+        %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+        cir.condition(%7)
+      } body {
+        cir.scope {
+          %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+          %5 = cir.unary(dec, %4) : !s32i, !s32i
+          cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+          %6 = cir.const #cir.int<3> : !s32i
+          %7 = cir.get_global @a : !cir.ptr<!cir.array<!s32i x 100>>
+          %8 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+          %9 = cir.cast(array_to_ptrdecay, %7 : !cir.ptr<!cir.array<!s32i x 100>>), !cir.ptr<!s32i>
+          %10 = cir.ptr_stride(%9 : !cir.ptr<!s32i>, %8 : !s32i), !cir.ptr<!s32i>
+          cir.store %6, %10 : !s32i, !cir.ptr<!s32i>
+        }
+        cir.yield
+      } step {
+        %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.unary(inc, %4) : !s32i, !s32i
+        cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // for (int i = 0; i < 100 - 1; ++i) {}
+  //
+  // Check that the loop upper bound operations(100 - 1) will be hoisted out
+  // of loop.
+  cir.func @loopInvariantBinOp() {
+    // CHECK: %[[C100:.*]] = cir.const #cir.int<100> : !s32i
+    // CHECK: %[[C1:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK: %[[UPPER_BOUND:.*]] = cir.binop(sub, %[[C100]], %[[C1]]) nsw : !s32i
+    // CHECK: cir.for : cond {
+
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<0> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<100> : !s32i
+        %4 = cir.const #cir.int<1> : !s32i
+        %5 = cir.binop(sub, %3, %4) nsw : !s32i
+        %6 = cir.cmp(lt, %2, %5) : !s32i, !s32i
+        %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+        cir.condition(%7)
+      } body {
+        cir.scope {
+        }
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // It's a hand-writing test case to check that the operation has block
+  // argument as operand won't be hoisted out of loop.
+  // Note that the current codegen will store the argument first and then
+  // load the value to user. This test case is manually created to check
+  // that the hoisting pass won't break when encounter block argument.
+  cir.func @loopInvariantBinOp_blockArg(%arg0: !s32i) {
+    // CHECK: cir.for : cond {
+    // CHECK: %[[C100:.*]] = cir.const #cir.int<100> : !s32i
+    // CHECK: %[[UPPER_BOUND:.*]] = cir.binop(sub, %[[C100]], %arg0) nsw : !s32i
+
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<0> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<100> : !s32i
+        %5 = cir.binop(sub, %3, %arg0) nsw : !s32i
+        %6 = cir.cmp(lt, %2, %5) : !s32i, !s32i
+        %7 = cir.cast(int_to_bool, %6 : !s32i), !cir.bool
+        cir.condition(%7)
+      } body {
+        cir.scope {
+        }
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Transforms/scope.cir b/clang/test/CIR/Transforms/scope.cir
new file mode 100644
index 000000000000..2d14784c33f8
--- /dev/null
+++ b/clang/test/CIR/Transforms/scope.cir
@@ -0,0 +1,60 @@
+// RUN: cir-opt %s -cir-flatten-cfg -o - | FileCheck %s
+
+!u32i = !cir.int<u, 32>
+
+module {
+  cir.func @foo() {
+    cir.scope {
+      %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<4> : !u32i
+      cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    }
+    cir.return
+  }
+// CHECK:  cir.func @foo() {
+// CHECK:    cir.br ^bb1
+// CHECK:  ^bb1:  // pred: ^bb0
+// CHECK:    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+// CHECK:    %1 = cir.const #cir.int<4> : !u32i
+// CHECK:    cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+// CHECK:    cir.br ^bb2
+// CHECK:  ^bb2:  // pred: ^bb1
+// CHECK:    cir.return
+// CHECK:  }
+
+  // Should drop empty scopes.
+  cir.func @empty_scope() {
+    cir.scope {
+    }
+    cir.return
+  }
+// CHECK:  cir.func @empty_scope() {
+// CHECK:    cir.return
+// CHECK:  }
+
+  cir.func @scope_with_return() -> !u32i {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"] {alignment = 4 : i64}
+    cir.scope {
+      %2 = cir.const #cir.int<0> : !u32i
+      cir.store %2, %0 : !u32i, !cir.ptr<!u32i>
+      %3 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+      cir.return %3 : !u32i
+    }
+    %1 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    cir.return %1 : !u32i
+  }
+
+// CHECK:  cir.func @scope_with_return() -> !u32i {
+// CHECK:    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:    cir.br ^bb1
+// CHECK:  ^bb1:  // pred: ^bb0
+// CHECK:    %1 = cir.const #cir.int<0> : !u32i
+// CHECK:    cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+// CHECK:    %2 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+// CHECK:    cir.return %2 : !u32i
+// CHECK:  ^bb2:  // no predecessors
+// CHECK:    %3 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+// CHECK:    cir.return %3 : !u32i
+// CHECK:  }
+
+}
diff --git a/clang/test/CIR/Transforms/select.cir b/clang/test/CIR/Transforms/select.cir
new file mode 100644
index 000000000000..29a5d1ed1dde
--- /dev/null
+++ b/clang/test/CIR/Transforms/select.cir
@@ -0,0 +1,60 @@
+// RUN: cir-opt -cir-canonicalize -cir-simplify -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @fold_true(%arg0 : !s32i, %arg1 : !s32i) -> !s32i {
+    %0 = cir.const #cir.bool<true> : !cir.bool
+    %1 = cir.select if %0 then %arg0 else %arg1 : (!cir.bool, !s32i, !s32i) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @fold_true(%[[ARG0:.+]]: !s32i, %[[ARG1:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   cir.return %[[ARG0]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @fold_false(%arg0 : !s32i, %arg1 : !s32i) -> !s32i {
+    %0 = cir.const #cir.bool<false> : !cir.bool
+    %1 = cir.select if %0 then %arg0 else %arg1 : (!cir.bool, !s32i, !s32i) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @fold_false(%[[ARG0:.+]]: !s32i, %[[ARG1:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   cir.return %[[ARG1]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @fold_to_const(%arg0 : !cir.bool) -> !s32i {
+    %0 = cir.const #cir.int<42> : !s32i
+    %1 = cir.select if %arg0 then %0 else %0 : (!cir.bool, !s32i, !s32i) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @fold_to_const(%{{.+}}: !cir.bool) -> !s32i {
+  // CHECK-NEXT:   %[[#A:]] = cir.const #cir.int<42> : !s32i
+  // CHECK-NEXT:   cir.return %[[#A]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @simplify_1(%arg0 : !cir.bool) -> !cir.bool {
+    %0 = cir.const #cir.bool<true> : !cir.bool
+    %1 = cir.const #cir.bool<false> : !cir.bool
+    %2 = cir.select if %arg0 then %0 else %1 : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+    cir.return %2 : !cir.bool
+  }
+
+  //      CHECK: cir.func @simplify_1(%[[ARG0:.+]]: !cir.bool) -> !cir.bool {
+  // CHECK-NEXT:   cir.return %[[ARG0]] : !cir.bool
+  // CHECK-NEXT: }
+
+  cir.func @simplify_2(%arg0 : !cir.bool) -> !cir.bool {
+    %0 = cir.const #cir.bool<false> : !cir.bool
+    %1 = cir.const #cir.bool<true> : !cir.bool
+    %2 = cir.select if %arg0 then %0 else %1 : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+    cir.return %2 : !cir.bool
+  }
+
+  //      CHECK: cir.func @simplify_2(%[[ARG0:.+]]: !cir.bool) -> !cir.bool {
+  // CHECK-NEXT:   %[[#A:]] = cir.unary(not, %[[ARG0]]) : !cir.bool, !cir.bool
+  // CHECK-NEXT:   cir.return %[[#A]] : !cir.bool
+  // CHECK-NEXT: }
+}
diff --git a/clang/test/CIR/Transforms/simpl.c b/clang/test/CIR/Transforms/simpl.c
new file mode 100644
index 000000000000..dda9f495ca4c
--- /dev/null
+++ b/clang/test/CIR/Transforms/simpl.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize %s -o %t1.cir 2>&1 | FileCheck -check-prefix=BEFORE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-canonicalize %s -o %t2.cir 2>&1 | FileCheck -check-prefix=AFTER %s
+
+
+#define CHECK_PTR(ptr)  \
+  do {                   \
+    if (__builtin_expect((!!((ptr) == 0)), 0))\
+      return -42; \
+  } while(0)
+
+int foo(int* ptr) {
+  CHECK_PTR(ptr);
+
+  (*ptr)++;
+  return 0;
+}
+
+// BEFORE:  cir.func {{.*@foo}}
+// BEFORE:  [[X0:%.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// BEFORE:  [[X1:%.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// BEFORE:  [[X2:%.*]] = cir.cmp(eq, [[X0]], [[X1]]) : !cir.ptr<!s32i>, !s32i
+// BEFORE:  [[X3:%.*]] = cir.cast(int_to_bool, [[X2]] : !s32i), !cir.bool
+// BEFORE:  [[X4:%.*]] = cir.unary(not, [[X3]]) : !cir.bool, !cir.bool
+// BEFORE:  [[X5:%.*]] = cir.cast(bool_to_int, [[X4]] : !cir.bool), !s32i
+// BEFORE:  [[X6:%.*]] = cir.cast(int_to_bool, [[X5]] : !s32i), !cir.bool
+// BEFORE:  [[X7:%.*]] = cir.unary(not, [[X6]]) : !cir.bool, !cir.bool
+// BEFORE:  [[X8:%.*]] = cir.cast(bool_to_int, [[X7]] : !cir.bool), !s32i
+// BEFORE:  [[X9:%.*]] = cir.cast(integral, [[X8]] : !s32i), !s64i
+// BEFORE:  [[X10:%.*]] = cir.const #cir.int<0> : !s32i
+// BEFORE:  [[X11:%.*]] = cir.cast(integral, [[X10]] : !s32i), !s64i
+// BEFORE:  [[X12:%.*]] = cir.cast(int_to_bool, [[X9]] : !s64i), !cir.bool
+// BEFORE:  cir.if [[X12]]
+
+// AFTER:   [[X0:%.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// AFTER:   [[X1:%.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// AFTER:   [[X2:%.*]] = cir.cmp(eq, [[X0]], [[X1]]) : !cir.ptr<!s32i>, !s32i
+// AFTER:   [[X3:%.*]] = cir.cast(int_to_bool, [[X2]] : !s32i), !cir.bool
+// AFTER:   cir.if [[X3]]
\ No newline at end of file
diff --git a/clang/test/CIR/Transforms/simpl.cir b/clang/test/CIR/Transforms/simpl.cir
new file mode 100644
index 000000000000..3d5d5ec75ad6
--- /dev/null
+++ b/clang/test/CIR/Transforms/simpl.cir
@@ -0,0 +1,50 @@
+// RUN: cir-opt %s -cir-canonicalize -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+module {
+  cir.func @unary_not(%arg0: !cir.bool) -> !cir.bool {
+    %0 = cir.unary(not, %arg0) : !cir.bool, !cir.bool
+    %1 = cir.unary(not, %0) : !cir.bool, !cir.bool
+    cir.return %1 : !cir.bool
+  }
+  // CHECK:  cir.func @unary_not(%arg0: !cir.bool) -> !cir.bool
+  // CHECK:     cir.return %arg0 : !cir.bool
+
+  cir.func @cast1(%arg0: !cir.bool) -> !cir.bool {
+    %0 = cir.cast(bool_to_int, %arg0 : !cir.bool), !s32i
+    %1 = cir.cast(int_to_bool, %0 : !s32i), !cir.bool
+    cir.return %1 : !cir.bool
+  }
+  // CHECK:  cir.func @cast1(%arg0: !cir.bool) -> !cir.bool
+  // CHECK:     cir.return %arg0 : !cir.bool
+
+  cir.func @cast2(%arg0: !s32i) -> !cir.bool {
+    %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    %1 = cir.cast(bool_to_int, %0 : !cir.bool), !s32i
+    %2 = cir.cast(integral, %1 : !s32i), !s64i
+    %3 = cir.cast(int_to_bool, %2 : !s64i), !cir.bool
+    cir.return %3 : !cir.bool
+  }
+  // CHECK:  cir.func @cast2(%arg0: !s32i) -> !cir.bool
+  // CHECK:    %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+  // CHECK:    cir.return %0 : !cir.bool
+
+  cir.func @no_cast(%arg0: !s32i) -> !s64i {
+    %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+    %1 = cir.cast(bool_to_int, %0 : !cir.bool), !s32i
+    %2 = cir.cast(integral, %1 : !s32i), !s64i
+    cir.return %2 : !s64i
+  }
+  // CHECK:  cir.func @no_cast(%arg0: !s32i) -> !s64i
+  // CHECK:    %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool
+  // CHECK:    %1 = cir.cast(bool_to_int, %0 : !cir.bool), !s32i
+  // CHECK:    %2 = cir.cast(integral, %1 : !s32i), !s64i
+  // CHECK:    cir.return %2 : !s64i
+
+}
+
+
+
+
+
diff --git a/clang/test/CIR/Transforms/switch.cir b/clang/test/CIR/Transforms/switch.cir
new file mode 100644
index 000000000000..177dfc98c8af
--- /dev/null
+++ b/clang/test/CIR/Transforms/switch.cir
@@ -0,0 +1,270 @@
+// RUN: cir-opt %s -cir-flatten-cfg -o - | FileCheck %s
+
+!s8i = !cir.int<s, 8>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+
+module {
+  cir.func @shouldFlatSwitchWithDefault(%arg0: !s8i) {
+    cir.switch (%arg0 : !s8i) [
+    case (equal, 1) {
+      cir.break
+    },
+    case (default) {
+      cir.break
+    }
+    ]
+    cir.return
+  }
+// CHECK:  cir.func @shouldFlatSwitchWithDefault(%arg0: !s8i) {
+// CHECK:    cir.switch.flat %arg0 : !s8i, ^bb[[#DEFAULT:]] [
+// CHECK:      1: ^bb[[#CASE1:]]
+// CHECK:    ]
+// CHECK:  ^bb[[#CASE1]]:
+// CHECK:    cir.br ^bb3
+// CHECK:  ^bb[[#DEFAULT]]:
+// CHECK:    cir.br ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  cir.func @shouldFlatSwitchWithoutDefault(%arg0: !s32i) {
+    cir.switch (%arg0 : !s32i) [
+    case (equal, 1) {
+      cir.break
+    }
+    ]
+    cir.return
+  }
+// CHECK:  cir.func @shouldFlatSwitchWithoutDefault(%arg0: !s32i) {
+// CHECK:    cir.switch.flat %arg0 : !s32i, ^bb[[#EXIT:]] [
+// CHECK:      1: ^bb[[#CASE1:]]
+// CHECK:    ]
+// CHECK:  ^bb[[#CASE1]]:
+// CHECK:    cir.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+
+  cir.func @shouldFlatSwitchWithImplicitFallthrough(%arg0: !s64i) {
+    cir.switch (%arg0 : !s64i) [
+    case (anyof, [1, 2] : !s64i) {
+      cir.break
+    }
+    ]
+    cir.return
+  }
+// CHECK:  cir.func @shouldFlatSwitchWithImplicitFallthrough(%arg0: !s64i) {
+// CHECK:    cir.switch.flat %arg0 : !s64i, ^bb[[#EXIT:]] [
+// CHECK:      1: ^bb[[#CASE1N2:]],
+// CHECK:      2: ^bb[[#CASE1N2]]
+// CHECK:    ]
+// CHECK:  ^bb[[#CASE1N2]]:
+// CHECK:    cir.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+
+
+  cir.func @shouldFlatSwitchWithExplicitFallthrough(%arg0: !s64i) {
+      cir.switch (%arg0 : !s64i) [
+      case (equal, 1 : !s64i) { // case 1 has its own region
+        cir.yield // fallthrough to case 2
+      },
+      case (equal, 2 : !s64i) {
+        cir.break
+      }
+      ]
+    cir.return
+  }
+// CHECK:  cir.func @shouldFlatSwitchWithExplicitFallthrough(%arg0: !s64i) {
+// CHECK:    cir.switch.flat %arg0 : !s64i, ^bb[[#EXIT:]] [
+// CHECK:      1: ^bb[[#CASE1:]],
+// CHECK:      2: ^bb[[#CASE2:]]
+// CHECK:    ]
+// CHECK:  ^bb[[#CASE1]]:
+// CHECK:    cir.br ^bb[[#CASE2]]
+// CHECK:  ^bb[[#CASE2]]:
+// CHECK:    cir.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  cir.func @shouldFlatSwitchWithFallthroughToExit(%arg0: !s64i) {
+      cir.switch (%arg0 : !s64i) [
+      case (equal, 1 : !s64i) {
+        cir.yield // fallthrough to exit
+      }
+      ]
+    cir.return
+  }
+// CHECK:  cir.func @shouldFlatSwitchWithFallthroughToExit(%arg0: !s64i) {
+// CHECK:    cir.switch.flat %arg0 : !s64i, ^bb[[#EXIT:]] [
+// CHECK:      1: ^bb[[#CASE1:]]
+// CHECK:    ]
+// CHECK:  ^bb[[#CASE1]]:
+// CHECK:    cir.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  cir.func @shouldDropEmptySwitch(%arg0: !s64i) {
+    cir.switch (%arg0 : !s64i) [
+    ]
+    // CHECK-NOT: llvm.switch
+    cir.return
+  }
+// CHECK:      cir.func @shouldDropEmptySwitch(%arg0: !s64i)
+// CHECK-NOT:    cir.switch.flat
+
+
+  cir.func @shouldFlatMultiBlockCase(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.switch (%1 : !s32i) [
+      case (equal, 3) {
+        cir.return
+      ^bb1:  // no predecessors
+        cir.break
+      }
+      ]
+    }
+    cir.return
+  }
+
+// CHECK:  cir.func @shouldFlatMultiBlockCase(%arg0: !s32i) {
+// CHECK:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK:     cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK:     cir.br ^bb1
+// CHECK:   ^bb1:  // pred: ^bb0
+// CHECK:     %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:     cir.switch.flat %1 : !s32i, ^bb4 [
+// CHECK:       3: ^bb2
+// CHECK:     ]
+// CHECK:   ^bb2:  // pred: ^bb1
+// CHECK:     cir.return
+// CHECK:   ^bb3:  // no predecessors
+// CHECK:     cir.br ^bb4
+// CHECK:   ^bb4:  // 2 preds: ^bb1, ^bb3
+// CHECK:     cir.br ^bb5
+// CHECK:   ^bb5:  // pred: ^bb4
+// CHECK:     cir.return
+// CHECK:   }
+
+
+  cir.func @shouldFlatNestedBreak(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.switch (%5 : !s32i) [
+      case (equal, 0) {
+        cir.scope {
+          %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+          %7 = cir.const #cir.int<0> : !s32i
+          %8 = cir.cmp(ge, %6, %7) : !s32i, !s32i
+          %9 = cir.cast(int_to_bool, %8 : !s32i), !cir.bool
+          cir.if %9 {
+            cir.break
+          }
+        }
+        cir.break
+      }
+      ]
+    }
+    %3 = cir.const #cir.int<3> : !s32i
+    cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+    %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    cir.return %4 : !s32i
+  }
+// CHECK:  cir.func @shouldFlatNestedBreak(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+// CHECK:    cir.switch.flat %3 : !s32i, ^bb7 [
+// CHECK:      0: ^bb2
+// CHECK:    ]
+// CHECK:  ^bb2:  // pred: ^bb1
+// CHECK:    cir.br ^bb3
+// CHECK:  ^bb3:  // pred: ^bb2
+// CHECK:    cir.brcond {{%.*}} ^bb4, ^bb5
+// CHECK:  ^bb4:  // pred: ^bb3
+// CHECK:    cir.br ^bb7
+// CHECK:  ^bb5:  // pred: ^bb3
+// CHECK:    cir.br ^bb6
+// CHECK:  ^bb6:  // pred: ^bb5
+// CHECK:    cir.br ^bb7
+// CHECK:  ^bb7:  // 3 preds: ^bb1, ^bb4, ^bb6
+// CHECK:    cir.br ^bb8
+// CHECK:  ^bb8:  // pred: ^bb7
+// CHECK:    cir.return %9 : !s32i
+// CHECK:  }
+
+
+  cir.func @flatCaseRange(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %3 = cir.const #cir.int<0> : !s32i
+    cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %6 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.switch (%6 : !s32i) [
+      case (equal, -100) {
+        %7 = cir.const #cir.int<1> : !s32i
+        cir.store %7, %2 : !s32i, !cir.ptr<!s32i>
+        cir.break
+      },
+      case (range, [1, 100] : !s32i) {
+        %7 = cir.const #cir.int<2> : !s32i
+        cir.store %7, %2 : !s32i, !cir.ptr<!s32i>
+        cir.break
+      },
+      case (default) {
+        %7 = cir.const #cir.int<3> : !s32i
+        cir.store %7, %2 : !s32i, !cir.ptr<!s32i>
+        cir.break
+      }
+      ]
+    }
+    %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    cir.store %4, %1 : !s32i, !cir.ptr<!s32i>
+    %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %5 : !s32i
+  }
+//      CHECK:  cir.func @flatCaseRange(%arg0: !s32i) -> !s32i {
+//      CHECK:    cir.switch.flat %[[X:[0-9]+]] : !s32i, ^[[JUDGE_RANGE:bb[0-9]+]] [
+// CHECK-NEXT:      -100: ^[[CASE_EQUAL:bb[0-9]+]]
+// CHECK-NEXT:    ]
+// CHECK-NEXT:  ^[[CASE_EQUAL]]:
+// CHECK-NEXT:    cir.int<1>
+// CHECK-NEXT:    cir.store
+// CHECK-NEXT:    cir.br ^[[EPILOG:bb[0-9]+]]
+// CHECK-NEXT:  ^[[CASE_RANGE:bb[0-9]+]]:
+// CHECK-NEXT:    cir.int<2>
+// CHECK-NEXT:    cir.store
+// CHECK-NEXT:    cir.br ^[[EPILOG]]
+// CHECK-NEXT:  ^[[JUDGE_RANGE]]:
+// CHECK-NEXT:     %[[RANGE:[0-9]+]] = cir.const #cir.int<99>
+// CHECK-NEXT:     %[[LOWER_BOUND:[0-9]+]] = cir.const #cir.int<1>
+// CHECK-NEXT:     %[[DIFF:[0-9]+]] = cir.binop(sub, %[[X]], %[[LOWER_BOUND]])
+// CHECK-NEXT:     %[[U_DIFF:[0-9]+]] = cir.cast(integral, %[[DIFF]] : !s32i), !u32i
+// CHECK-NEXT:     %[[U_RANGE:[0-9]+]] = cir.cast(integral, %[[RANGE]] : !s32i), !u32i
+// CHECK-NEXT:     %[[CMP_RESULT:[0-9]+]] = cir.cmp(le, %[[U_DIFF]], %[[U_RANGE]])
+// CHECK-NEXT:     cir.brcond %[[CMP_RESULT]] ^[[CASE_RANGE]], ^[[CASE_DEFAULT:bb[0-9]+]]
+// CHECK-NEXT:  ^[[CASE_DEFAULT]]:
+// CHECK-NEXT:    cir.int<3>
+// CHECK-NEXT:    cir.store
+// CHECK-NEXT:    cir.br ^[[EPILOG]]
+// CHECK-NEXT:  ^[[EPILOG]]:
+// CHECK-NEXT:    cir.br ^[[EPILOG_END:bb[0-9]+]]
+// CHECK-NEXT:  ^[[EPILOG_END]]:
+//      CHECK:    cir.return
+//      CHECK:  }
+
+}
diff --git a/clang/test/CIR/Transforms/ternary-fold.cir b/clang/test/CIR/Transforms/ternary-fold.cir
new file mode 100644
index 000000000000..72ba4815b2db
--- /dev/null
+++ b/clang/test/CIR/Transforms/ternary-fold.cir
@@ -0,0 +1,60 @@
+// RUN: cir-opt -cir-canonicalize -cir-simplify -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @fold_ternary(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %0 = cir.const #cir.bool<false> : !cir.bool
+    %1 = cir.ternary (%0, true {
+      cir.yield %arg0 : !s32i
+    }, false {
+      cir.yield %arg1 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @fold_ternary(%{{.+}}: !s32i, %[[ARG:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   cir.return %[[ARG]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @simplify_ternary(%arg0 : !cir.bool, %arg1 : !s32i) -> !s32i {
+    %0 = cir.ternary (%arg0, true {
+      %1 = cir.const #cir.int<42> : !s32i
+      cir.yield %1 : !s32i
+    }, false {
+      cir.yield %arg1 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.return %0 : !s32i
+  }
+
+  //      CHECK: cir.func @simplify_ternary(%[[ARG0:.+]]: !cir.bool, %[[ARG1:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   %[[#A:]] = cir.const #cir.int<42> : !s32i
+  // CHECK-NEXT:   %[[#B:]] = cir.select if %[[ARG0]] then %[[#A]] else %[[ARG1]] : (!cir.bool, !s32i, !s32i) -> !s32i
+  // CHECK-NEXT:   cir.return %[[#B]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @non_simplifiable_ternary(%arg0 : !cir.bool) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+    %1 = cir.ternary (%arg0, true {
+      %2 = cir.const #cir.int<42> : !s32i
+      cir.yield %2 : !s32i
+    }, false {
+      %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.yield %3 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @non_simplifiable_ternary(%[[ARG0:.+]]: !cir.bool) -> !s32i {
+  // CHECK-NEXT:   %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+  // CHECK-NEXT:   %[[#B:]] = cir.ternary(%[[ARG0]], true {
+  // CHECK-NEXT:     %[[#C:]] = cir.const #cir.int<42> : !s32i
+  // CHECK-NEXT:     cir.yield %[[#C]] : !s32i
+  // CHECK-NEXT:   }, false {
+  // CHECK-NEXT:     %[[#D:]] = cir.load %[[#A]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT:     cir.yield %[[#D]] : !s32i
+  // CHECK-NEXT:   }) : (!cir.bool) -> !s32i
+  // CHECK-NEXT:   cir.return %[[#B]] : !s32i
+  // CHECK-NEXT: }
+}
diff --git a/clang/test/CIR/Transforms/ternary-fold.cpp b/clang/test/CIR/Transforms/ternary-fold.cpp
new file mode 100644
index 000000000000..69934da793df
--- /dev/null
+++ b/clang/test/CIR/Transforms/ternary-fold.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize %s -o %t1.cir 2>&1 | FileCheck -check-prefix=CIR-BEFORE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-simplify %s -o %t2.cir 2>&1 | FileCheck -check-prefix=CIR-AFTER %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -fclangir -emit-llvm %s -o %t.ll 
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+int test(bool x) {
+  return x ? 1 : 2;
+}
+
+//      CIR-BEFORE: cir.func @_Z4testb
+//      CIR-BEFORE:   %{{.+}} = cir.ternary(%{{.+}}, true {
+// CIR-BEFORE-NEXT:     %[[#A:]] = cir.const #cir.int<1> : !s32i
+// CIR-BEFORE-NEXT:     cir.yield %[[#A]] : !s32i
+// CIR-BEFORE-NEXT:   }, false {
+// CIR-BEFORE-NEXT:     %[[#B:]] = cir.const #cir.int<2> : !s32i
+// CIR-BEFORE-NEXT:     cir.yield %[[#B]] : !s32i
+// CIR-BEFORE-NEXT:   }) : (!cir.bool) -> !s32i
+//      CIR-BEFORE: }
+
+//      CIR-AFTER: cir.func @_Z4testb
+//      CIR-AFTER:   %[[#A:]] = cir.const #cir.int<1> : !s32i
+// CIR-AFTER-NEXT:   %[[#B:]] = cir.const #cir.int<2> : !s32i
+// CIR-AFTER-NEXT:   %{{.+}} = cir.select if %{{.+}} then %[[#A]] else %[[#B]] : (!cir.bool, !s32i, !s32i) -> !s32i
+//      CIR-AFTER: }
+
+// LLVM: @_Z4testb
+// LLVM:   %{{.+}} = select i1 %{{.+}}, i32 1, i32 2
+// LLVM: }
+
+int test2(bool cond) {
+  constexpr int x = 1;
+  constexpr int y = 2;
+  return cond ? x : y;
+}
+
+//      CIR-BEFORE: cir.func  @_Z5test2b
+//      CIR-BEFORE:   %[[#COND:]] = cir.load %{{.+}} : !cir.ptr<!cir.bool>, !cir.bool
+// CIR-BEFORE-NEXT:   %{{.+}} = cir.ternary(%[[#COND]], true {
+// CIR-BEFORE-NEXT:     %[[#A:]] = cir.const #cir.int<1> : !s32i
+// CIR-BEFORE-NEXT:     cir.yield %[[#A]] : !s32i
+// CIR-BEFORE-NEXT:   }, false {
+// CIR-BEFORE-NEXT:     %[[#B:]] = cir.const #cir.int<2> : !s32i
+// CIR-BEFORE-NEXT:     cir.yield %[[#B]] : !s32i
+// CIR-BEFORE-NEXT:   }) : (!cir.bool) -> !s32i
+//      CIR-BEFORE: }
+
+//      CIR-AFTER: cir.func @_Z5test2b
+//      CIR-AFTER:   %[[#COND:]] = cir.load %{{.+}} : !cir.ptr<!cir.bool>, !cir.bool
+// CIR-AFTER-NEXT:   %[[#A:]] = cir.const #cir.int<1> : !s32i
+// CIR-AFTER-NEXT:   %[[#B:]] = cir.const #cir.int<2> : !s32i
+// CIR-AFTER-NEXT:   %{{.+}} = cir.select if %[[#COND]] then %[[#A]] else %[[#B]] : (!cir.bool, !s32i, !s32i) -> !s32i
+//      CIR-AFTER: }
+
+// LLVM: @_Z5test2b
+// LLVM:   %{{.+}} = select i1 %{{.+}}, i32 1, i32 2
+// LLVM: }
diff --git a/clang/test/CIR/Transforms/ternary.cir b/clang/test/CIR/Transforms/ternary.cir
new file mode 100644
index 000000000000..67ef7f95a6b5
--- /dev/null
+++ b/clang/test/CIR/Transforms/ternary.cir
@@ -0,0 +1,68 @@
+// RUN: cir-opt %s -cir-flatten-cfg -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @foo(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %3 = cir.const #cir.int<0> : !s32i
+    %4 = cir.cmp(gt, %2, %3) : !s32i, !cir.bool
+    %5 = cir.ternary(%4, true {
+      %7 = cir.const #cir.int<3> : !s32i
+      cir.yield %7 : !s32i
+    }, false {
+      %7 = cir.const #cir.int<5> : !s32i
+      cir.yield %7 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+    %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %6 : !s32i
+  }
+
+// CHECK: cir.func @foo(%arg0: !s32i) -> !s32i {
+// CHECK:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+// CHECK:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:   cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK:   %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:   %3 = cir.const #cir.int<0> : !s32i
+// CHECK:   %4 = cir.cmp(gt, %2, %3) : !s32i, !cir.bool
+// CHECK:    cir.brcond %4 ^bb1, ^bb2
+// CHECK:  ^bb1:  // pred: ^bb0
+// CHECK:    %5 = cir.const #cir.int<3> : !s32i
+// CHECK:    cir.br ^bb3(%5 : !s32i)
+// CHECK:  ^bb2:  // pred: ^bb0
+// CHECK:    %6 = cir.const #cir.int<5> : !s32i
+// CHECK:    cir.br ^bb3(%6 : !s32i)
+// CHECK:  ^bb3(%7: !s32i):  // 2 preds: ^bb1, ^bb2
+// CHECK:    cir.br ^bb4
+// CHECK:  ^bb4:  // pred: ^bb3
+// CHECK:    cir.store %7, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK:    %8 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK:    cir.return %8 : !s32i
+// CHECK:  }
+
+  cir.func @foo2(%arg0: !cir.bool) {
+    cir.ternary(%arg0, true {
+      cir.yield
+    }, false {
+      cir.yield
+    }) : (!cir.bool) -> ()
+    cir.return
+  }
+
+// CHECK: cir.func @foo2(%arg0: !cir.bool) {
+// CHECK:   cir.brcond %arg0 ^bb1, ^bb2
+// CHECK: ^bb1:  // pred: ^bb0
+// CHECK:   cir.br ^bb3
+// CHECK: ^bb2:  // pred: ^bb0
+// CHECK:   cir.br ^bb3
+// CHECK: ^bb3:  // 2 preds: ^bb1, ^bb2
+// CHECK:   cir.br ^bb4
+// CHECK: ^bb4:  // pred: ^bb3
+// CHECK:   cir.return
+// CHECK: }
+
+}
diff --git a/clang/test/CIR/analysis-only.cpp b/clang/test/CIR/analysis-only.cpp
new file mode 100644
index 000000000000..7dc58250b91b
--- /dev/null
+++ b/clang/test/CIR/analysis-only.cpp
@@ -0,0 +1,2 @@
+// RUN: %clang %s -fclangir-analysis-only -### -c %s 2>&1 | FileCheck %s
+// CHECK: "-fclangir-analysis-only"
diff --git a/clang/test/CIR/cc1.c b/clang/test/CIR/cc1.c
new file mode 100644
index 000000000000..176ea42d44de
--- /dev/null
+++ b/clang/test/CIR/cc1.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s -check-prefix=MLIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm-bc %s -o %t.bc
+// RUN: llvm-dis %t.bc -o %t.bc.ll
+// RUN: FileCheck --input-file=%t.bc.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -S %s -o %t.s
+// RUN: FileCheck --input-file=%t.s %s -check-prefix=ASM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-obj %s -o %t.o
+// RUN: llvm-objdump -d %t.o | FileCheck %s -check-prefix=OBJ
+
+void foo() {}
+
+//      MLIR: func.func @foo() {
+// MLIR-NEXT:   return
+// MLIR-NEXT: }
+
+//      LLVM: define dso_local void @foo()
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
+
+//      ASM: .globl  foo
+// ASM-NEXT: .p2align
+// ASM-NEXT: .type foo,@function
+// ASM-NEXT: foo:
+//      ASM: retq
+
+// OBJ: 0: c3 retq
diff --git a/clang/test/CIR/cc1.cir b/clang/test/CIR/cc1.cir
new file mode 100644
index 000000000000..9ea923faff0c
--- /dev/null
+++ b/clang/test/CIR/cc1.cir
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=LLVM
+
+module {
+  cir.func @foo() {
+    cir.return
+  }
+}
+
+//      LLVM: define void @foo()
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
diff --git a/clang/test/CIR/cirtool.cir b/clang/test/CIR/cirtool.cir
new file mode 100644
index 000000000000..8351d5be3165
--- /dev/null
+++ b/clang/test/CIR/cirtool.cir
@@ -0,0 +1,20 @@
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s -check-prefix=MLIR
+// RUN: mlir-translate -mlir-to-llvmir %t.mlir -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+module {
+  cir.func @foo() {
+    cir.return
+  }
+}
+
+//      LLVM: define void @foo()
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
+
+//      MLIR: module {
+// MLIR-NEXT:   llvm.func @foo() {
+// MLIR-NEXT:     llvm.return
+// MLIR-NEXT:   }
+// MLIR-NEXT: }
diff --git a/clang/test/CIR/driver.c b/clang/test/CIR/driver.c
new file mode 100644
index 000000000000..fcafb71a0a4a
--- /dev/null
+++ b/clang/test/CIR/driver.c
@@ -0,0 +1,55 @@
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fclangir-direct-lowering -S -Xclang -emit-cir %s -o %t1.cir
+// RUN: FileCheck --input-file=%t1.cir %s -check-prefix=CIR
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -S -Xclang -emit-cir %s -o %t2.cir
+// RUN: FileCheck --input-file=%t2.cir %s -check-prefix=CIR
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fclangir-direct-lowering -S -emit-llvm %s -o %t1.ll
+// RUN: FileCheck --input-file=%t1.ll %s -check-prefix=LLVM
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -S -emit-llvm %s -o %t2.ll
+// RUN: FileCheck --input-file=%t2.ll %s -check-prefix=CIR_STD_LLVM
+// Test also the cases for both -fclangir-direct-lowering and -fno-clangir-direct-lowering,
+// with -fno-clangir-direct-lowering having the preference
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fclangir-direct-lowering -fno-clangir-direct-lowering -S -emit-llvm %s -o %t2.ll
+// RUN: FileCheck --input-file=%t2.ll %s -check-prefix=CIR_STD_LLVM
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -fclangir-direct-lowering -c -emit-llvm %s -o %t1.bc
+// RUN: FileCheck --input-file=%t2.ll %s -check-prefix=CIR_STD_LLVM
+// RUN: llvm-dis %t1.bc -o %t1.bc.ll
+// RUN: FileCheck --input-file=%t1.bc.ll %s -check-prefix=LLVM
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -c -emit-llvm %s -o %t2.bc
+// RUN: llvm-dis %t2.bc -o %t2.bc.ll
+// RUN: FileCheck --input-file=%t2.bc.ll %s -check-prefix=CIR_STD_LLVM
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -c %s -o %t.o
+// RUN: llvm-objdump -d %t.o | FileCheck %s -check-prefix=OBJ
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -clangir-disable-passes -S -Xclang -emit-cir %s -o %t.cir
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -clangir-disable-verifier -S -Xclang -emit-cir %s -o %t.cir
+// RUN: %clang -target arm64-apple-macosx12.0.0 -fclangir -S -Xclang -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR_MACOS
+// RUN: %clang -target arm64-apple-macosx12.0.0 -fclangir -S -emit-llvm %s -o %t3.ll
+// RUN: FileCheck --input-file=%t3.ll %s -check-prefix=LLVM_MACOS
+
+void foo(void) {}
+
+//      CIR: module {{.*}} {
+// CIR-NEXT:   cir.func @foo()
+// CIR-NEXT:     cir.return
+// CIR-NEXT:   }
+// CIR-NEXT: }
+
+//      CIR_MACOS: module {{.*}} {
+// CIR_MACOS-NEXT:   cir.func @foo()
+// CIR_MACOS-NEXT:     cir.return
+// CIR_MACOS-NEXT:   }
+// CIR_MACOS-NEXT: }
+
+//      LLVM: define dso_local void @foo()
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
+
+//      CIR_STD_LLVM: define void @foo()
+// CIR_STD_LLVM-NEXT:   ret void
+// CIR_STD_LLVM-NEXT: }
+
+//      LLVM_MACOS: define void @foo()
+// LLVM_MACOS-NEXT:   ret void
+// LLVM_MACOS-NEXT: }
+
+// OBJ: 0: c3 retq
diff --git a/clang/test/CIR/lit.local.cfg b/clang/test/CIR/lit.local.cfg
new file mode 100644
index 000000000000..6afd60f47bff
--- /dev/null
+++ b/clang/test/CIR/lit.local.cfg
@@ -0,0 +1,2 @@
+if not config.root.clang_enable_cir:
+    config.unsupported = True
diff --git a/clang/test/CIR/mlirargs.c b/clang/test/CIR/mlirargs.c
new file mode 100644
index 000000000000..cfb07197ef18
--- /dev/null
+++ b/clang/test/CIR/mlirargs.c
@@ -0,0 +1,12 @@
+// Clang returns 1 when wrong arguments are given.
+// RUN: not %clang_cc1 -mmlir -mlir-disable-threadingd  -mmlir -mlir-print-op-genericd 2>&1 | FileCheck %s --check-prefix=WRONG
+// Test that the driver can pass mlir args to cc1.
+// RUN: %clang -### -mmlir -mlir-disable-threading %s 2>&1 | FileCheck %s --check-prefix=CC1
+
+
+// WRONG: clang (MLIR option parsing): Unknown command line argument '-mlir-disable-threadingd'.  Try: 'clang (MLIR option parsing) --help'
+// WRONG: clang (MLIR option parsing): Did you mean '--mlir-disable-threading'?
+// WRONG: clang (MLIR option parsing): Unknown command line argument '-mlir-print-op-genericd'.  Try: 'clang (MLIR option parsing) --help'
+// WRONG: clang (MLIR option parsing): Did you mean '--mlir-print-op-generic'?
+
+// CC1: "-mmlir" "-mlir-disable-threading"
diff --git a/clang/test/CIR/mlirprint.c b/clang/test/CIR/mlirprint.c
new file mode 100644
index 000000000000..3514eb895381
--- /dev/null
+++ b/clang/test/CIR/mlirprint.c
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after-all %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir-flat -mmlir --mlir-print-ir-after-all %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=CIRFLAT
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fno-clangir-direct-lowering -emit-mlir -mmlir --mlir-print-ir-after-all %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=CIRMLIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -mmlir --mlir-print-ir-after-all -mllvm -print-after-all  %s -o %t.ll 2>&1 | FileCheck %s -check-prefix=CIR -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-drop-ast %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=CIRPASS
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir-flat -mmlir --mlir-print-ir-before=cir-flatten-cfg %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=CFGPASS
+
+int foo(void) {
+  int i = 3;
+  return i;
+}
+
+
+// CIR:  IR Dump After CIRCanonicalize (cir-canonicalize)
+// CIR:  cir.func @foo() -> !s32i
+// CIR:  IR Dump After LoweringPrepare (cir-lowering-prepare)
+// CIR:  cir.func @foo() -> !s32i
+// CIR-NOT: IR Dump After FlattenCFG
+// CIR-NOT: IR Dump After SCFPrepare
+// CIR:  IR Dump After DropAST (cir-drop-ast)
+// CIR:  cir.func @foo() -> !s32i
+// CIRFLAT:  IR Dump After CIRCanonicalize (cir-canonicalize)
+// CIRFLAT:  cir.func @foo() -> !s32i
+// CIRFLAT:  IR Dump After LoweringPrepare (cir-lowering-prepare)
+// CIRFLAT:  cir.func @foo() -> !s32i
+// CIRFLAT:  IR Dump After FlattenCFG (cir-flatten-cfg)
+// CIRFLAT:  IR Dump After DropAST (cir-drop-ast)
+// CIRFLAT:  cir.func @foo() -> !s32i
+// CIRMLIR:  IR Dump After CIRCanonicalize (cir-canonicalize)
+// CIRMLIR:  IR Dump After LoweringPrepare (cir-lowering-prepare)
+// CIRMLIR:  IR Dump After SCFPrepare (cir-mlir-scf-prepare
+// CIRMLIR:  IR Dump After DropAST (cir-drop-ast)
+// LLVM: IR Dump After cir::direct::ConvertCIRToLLVMPass (cir-flat-to-llvm)
+// LLVM: llvm.func @foo() -> i32
+// LLVM: IR Dump After
+// LLVM: define dso_local i32 @foo()
+
+// CIRPASS-NOT:  IR Dump After CIRCanonicalize
+// CIRPASS:      IR Dump After DropAST
+
+// CFGPASS: IR Dump Before FlattenCFG (cir-flatten-cfg)
diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index 5fceb1d71033..d0b7d3f1008c 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -80,7 +80,15 @@ list(APPEND CLANG_TEST_DEPS
   diagtool
   hmaptool
   )
-  
+
+if(CLANG_ENABLE_CIR)
+  list(APPEND CLANG_TEST_DEPS
+    cir-opt
+    cir-translate
+    mlir-translate
+    )
+endif()
+
 if(CLANG_ENABLE_STATIC_ANALYZER)
   list(APPEND CLANG_TEST_DEPS
     clang-check
diff --git a/clang/test/CodeGen/atomic.c b/clang/test/CodeGen/atomic.c
index 16c29e282ddd..48e3c3304816 100644
--- a/clang/test/CodeGen/atomic.c
+++ b/clang/test/CodeGen/atomic.c
@@ -160,4 +160,4 @@ void force_global_uses(void) {
   // X86:      call void @__atomic_load(i32 noundef 16, ptr noundef @glob_longdbl, ptr noundef %atomic-temp
   // X86-NEXT: %0 = load x86_fp80, ptr %atomic-temp, align 16
   // SYSTEMZ: load atomic fp128, ptr @[[GLOB_LONGDBL]] seq_cst
-}
+}
\ No newline at end of file
diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py
index e5630a07424c..bcbb0f13dd69 100644
--- a/clang/test/lit.cfg.py
+++ b/clang/test/lit.cfg.py
@@ -29,6 +29,7 @@
     ".c",
     ".cpp",
     ".i",
+    ".cir",
     ".cppm",
     ".m",
     ".mm",
@@ -84,6 +85,8 @@
 tools = [
     "apinotes-test",
     "c-index-test",
+    "cir-opt",
+    "cir-translate",
     "clang-diff",
     "clang-format",
     "clang-repl",
diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt
index bdd8004be3e0..602ed9afd64b 100644
--- a/clang/tools/CMakeLists.txt
+++ b/clang/tools/CMakeLists.txt
@@ -3,6 +3,11 @@ create_subdirectory_options(CLANG TOOL)
 add_clang_subdirectory(diagtool)
 add_clang_subdirectory(driver)
 add_clang_subdirectory(apinotes-test)
+if(CLANG_ENABLE_CIR)
+  add_clang_subdirectory(cir-opt)
+  add_clang_subdirectory(cir-translate)
+  add_clang_subdirectory(cir-lsp-server)
+endif()
 add_clang_subdirectory(clang-diff)
 add_clang_subdirectory(clang-format)
 add_clang_subdirectory(clang-format-vs)
diff --git a/clang/tools/cir-lsp-server/CMakeLists.txt b/clang/tools/cir-lsp-server/CMakeLists.txt
new file mode 100644
index 000000000000..5154a08e7d47
--- /dev/null
+++ b/clang/tools/cir-lsp-server/CMakeLists.txt
@@ -0,0 +1,35 @@
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
+
+include_directories(${LLVM_MAIN_SRC_DIR}/../mlir/include)
+include_directories(${CMAKE_BINARY_DIR}/tools/mlir/include)
+
+set(LIBS
+  ${dialect_libs}
+  ${conversion_libs}
+  ${test_libs}
+  clangCIR
+  clangCIRLoweringThroughMLIR
+  clangCIRLoweringDirectToLLVM
+  MLIRCIR
+  MLIRAffineAnalysis
+  MLIRAnalysis
+  MLIRDialect
+  MLIRLspServerLib
+  MLIRParser
+  MLIRPass
+  MLIRTransforms
+  MLIRTransformUtils
+  MLIRSupport
+  MLIRIR
+  )
+
+add_mlir_tool(cir-lsp-server
+  cir-lsp-server.cpp
+
+  DEPENDS
+  ${LIBS}
+)
+
+target_link_libraries(cir-lsp-server PRIVATE ${LIBS})
+llvm_update_compile_flags(cir-lsp-server)
diff --git a/clang/tools/cir-lsp-server/cir-lsp-server.cpp b/clang/tools/cir-lsp-server/cir-lsp-server.cpp
new file mode 100644
index 000000000000..bd823c13a42e
--- /dev/null
+++ b/clang/tools/cir-lsp-server/cir-lsp-server.cpp
@@ -0,0 +1,20 @@
+//===- cir-lsp-server.cpp - CIR Language Server ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/InitAllDialects.h"
+#include "mlir/Tools/mlir-lsp-server/MlirLspServerMain.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+
+int main(int argc, char **argv) {
+  mlir::DialectRegistry registry;
+  mlir::registerAllDialects(registry);
+  registry.insert<mlir::cir::CIRDialect>();
+  return failed(mlir::MlirLspServerMain(argc, argv, registry));
+}
diff --git a/clang/tools/cir-opt/CMakeLists.txt b/clang/tools/cir-opt/CMakeLists.txt
new file mode 100644
index 000000000000..741cdfa5950d
--- /dev/null
+++ b/clang/tools/cir-opt/CMakeLists.txt
@@ -0,0 +1,35 @@
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
+
+include_directories(${LLVM_MAIN_SRC_DIR}/../mlir/include)
+include_directories(${CMAKE_BINARY_DIR}/tools/mlir/include)
+
+set(LIBS
+  ${dialect_libs}
+  ${conversion_libs}
+  clangCIR
+  clangCIRLoweringThroughMLIR
+  clangCIRLoweringDirectToLLVM
+  MLIRAnalysis
+  MLIRCIR
+  MLIRCIRTransforms
+  MLIRDialect
+  MLIRIR
+  MLIRMemRefDialect
+  MLIROptLib
+  MLIRParser
+  MLIRPass
+  MLIRSideEffectInterfaces
+  MLIRTransforms
+  MLIRTransformUtils
+)
+
+add_clang_tool(cir-opt
+  cir-opt.cpp
+
+  DEPENDS
+  ${LIBS}
+)
+
+target_link_libraries(cir-opt PRIVATE ${LIBS})
+llvm_update_compile_flags(cir-opt)
diff --git a/clang/tools/cir-opt/cir-opt.cpp b/clang/tools/cir-opt/cir-opt.cpp
new file mode 100644
index 000000000000..e7af0b214462
--- /dev/null
+++ b/clang/tools/cir-opt/cir-opt.cpp
@@ -0,0 +1,72 @@
+//===- cir-opt.cpp - CIR optimization and analysis driver -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Similar to MLIR/LLVM's "opt" tools but also deals with analysis and custom
+// arguments. TODO: this is basically a copy from MlirOptMain.cpp, but capable
+// of module emission as specified by the user.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/InitAllPasses.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Pass/PassRegistry.h"
+#include "mlir/Tools/mlir-opt/MlirOptMain.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/Passes.h"
+#include "clang/CIR/Passes.h"
+
+int main(int argc, char **argv) {
+  // TODO: register needed MLIR passes for CIR?
+  mlir::DialectRegistry registry;
+  registry.insert<mlir::BuiltinDialect, mlir::arith::ArithDialect,
+                  mlir::cir::CIRDialect, mlir::memref::MemRefDialect,
+                  mlir::LLVM::LLVMDialect, mlir::DLTIDialect,
+                  mlir::omp::OpenMPDialect>();
+
+  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
+    return cir::createConvertMLIRToLLVMPass();
+  });
+  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
+    return mlir::createCIRCanonicalizePass();
+  });
+  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
+    return mlir::createCIRSimplifyPass();
+  });
+
+  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
+    return mlir::createSCFPreparePass();
+  });
+  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
+    return cir::createConvertCIRToMLIRPass();
+  });
+
+  mlir::PassPipelineRegistration<mlir::EmptyPipelineOptions> pipeline(
+      "cir-to-llvm", "", [](mlir::OpPassManager &pm) {
+        cir::direct::populateCIRToLLVMPasses(pm);
+      });
+
+  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
+    return mlir::createFlattenCFGPass();
+  });
+
+  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
+    return mlir::createReconcileUnrealizedCastsPass();
+  });
+
+  mlir::registerTransformsPasses();
+
+  return failed(MlirOptMain(
+      argc, argv, "Clang IR analysis and optimization tool\n", registry));
+}
diff --git a/clang/tools/cir-translate/CMakeLists.txt b/clang/tools/cir-translate/CMakeLists.txt
new file mode 100644
index 000000000000..a5e22b02e505
--- /dev/null
+++ b/clang/tools/cir-translate/CMakeLists.txt
@@ -0,0 +1,36 @@
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
+get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS)
+
+include_directories(${LLVM_MAIN_SRC_DIR}/../mlir/include)
+include_directories(${CMAKE_BINARY_DIR}/tools/mlir/include)
+
+set(LIBS
+  ${dialect_libs}
+  ${conversion_libs}
+  ${translation_libs}
+  clangCIR
+  clangCIRLoweringDirectToLLVM
+  MLIRAnalysis
+  MLIRCIR
+  MLIRCIRTransforms
+  MLIRDialect
+  MLIRIR
+  MLIROptLib
+  MLIRParser
+  MLIRPass
+  MLIRTransforms
+  MLIRTransformUtils
+  MLIRTranslateLib
+  MLIRSupport
+)
+
+add_clang_tool(cir-translate
+  cir-translate.cpp
+
+  DEPENDS
+  ${LIBS}
+)
+
+target_link_libraries(cir-translate PRIVATE ${LIBS})
+llvm_update_compile_flags(cir-translate)
diff --git a/clang/tools/cir-translate/cir-translate.cpp b/clang/tools/cir-translate/cir-translate.cpp
new file mode 100644
index 000000000000..9ff379a26588
--- /dev/null
+++ b/clang/tools/cir-translate/cir-translate.cpp
@@ -0,0 +1,57 @@
+//===- cir-translate.cpp - CIR Translate Driver ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Converts CIR directly to LLVM IR, similar to mlir-translate or LLVM llc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/InitAllTranslations.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Target/LLVMIR/Dialect/All.h"
+#include "mlir/Tools/mlir-translate/MlirTranslateMain.h"
+#include "mlir/Tools/mlir-translate/Translation.h"
+#include "llvm/IR/Module.h"
+
+namespace cir {
+namespace direct {
+extern void registerCIRDialectTranslation(mlir::DialectRegistry &registry);
+extern std::unique_ptr<llvm::Module>
+lowerDirectlyFromCIRToLLVMIR(mlir::ModuleOp theModule,
+                             llvm::LLVMContext &llvmCtx,
+                             bool disableVerifier = false);
+} // namespace direct
+}
+
+void registerToLLVMTranslation() {
+  mlir::TranslateFromMLIRRegistration registration(
+      "cir-to-llvmir", "Translate CIR to LLVMIR",
+      [](mlir::Operation *op, mlir::raw_ostream &output) {
+        llvm::LLVMContext llvmContext;
+        auto llvmModule = cir::direct::lowerDirectlyFromCIRToLLVMIR(
+            llvm::dyn_cast<mlir::ModuleOp>(op), llvmContext);
+        if (!llvmModule)
+          return mlir::failure();
+        llvmModule->print(output, nullptr);
+        return mlir::success();
+      },
+      [](mlir::DialectRegistry &registry) {
+        registry.insert<mlir::DLTIDialect, mlir::func::FuncDialect>();
+        mlir::registerAllToLLVMIRTranslations(registry);
+        cir::direct::registerCIRDialectTranslation(registry);
+      });
+}
+
+int main(int argc, char **argv) {
+  registerToLLVMTranslation();
+  return failed(
+      mlir::mlirTranslateMain(argc, argv, "CIR Translation Tool"));
+}
diff --git a/clang/utils/TableGen/CIRLoweringEmitter.cpp b/clang/utils/TableGen/CIRLoweringEmitter.cpp
new file mode 100644
index 000000000000..1468f95a7074
--- /dev/null
+++ b/clang/utils/TableGen/CIRLoweringEmitter.cpp
@@ -0,0 +1,75 @@
+//===- CIRBuiltinsEmitter.cpp - Generate lowering of builtins --=-*- C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TableGenBackends.h"
+#include "llvm/TableGen/TableGenBackend.h"
+
+using namespace llvm;
+
+namespace {
+std::string ClassDefinitions;
+std::string ClassList;
+
+void GenerateLowering(const Record *Operation) {
+  using namespace std::string_literals;
+  std::string Name = Operation->getName().str();
+  std::string LLVMOp = Operation->getValueAsString("llvmOp").str();
+  ClassDefinitions +=
+      "class CIR" + Name +
+      "Lowering : public mlir::OpConversionPattern<mlir::cir::" + Name +
+      R"C++(> {
+  public:
+    using OpConversionPattern<mlir::cir::)C++" +
+      Name + R"C++(>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(mlir::cir::)C++" +
+      Name +
+      " op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) "
+      "const "
+      "override {";
+
+  auto ResultCount = Operation->getValueAsDag("results")->getNumArgs();
+  if (ResultCount > 0)
+    ClassDefinitions += R"C++(
+    auto resTy = this->getTypeConverter()->convertType(op.getType());)C++";
+
+  ClassDefinitions += R"C++(
+    rewriter.replaceOpWithNewOp<mlir::LLVM::)C++" +
+                      LLVMOp + ">(op";
+
+  if (ResultCount > 0)
+    ClassDefinitions += ", resTy";
+
+  auto ArgCount = Operation->getValueAsDag("arguments")->getNumArgs();
+  for (size_t i = 0; i != ArgCount; ++i)
+    ClassDefinitions += ", adaptor.getOperands()[" + std::to_string(i) + ']';
+
+  ClassDefinitions += R"C++();
+    return mlir::success();
+  }
+};
+)C++";
+
+  ClassList += ", CIR" + Name + "Lowering\n";
+}
+} // namespace
+
+void clang::EmitCIRBuiltinsLowering(RecordKeeper &Records, raw_ostream &OS) {
+  emitSourceFileHeader("Lowering of ClangIR builtins to LLVM IR builtins", OS);
+  for (const auto *Builtin :
+       Records.getAllDerivedDefinitions("LLVMLoweringInfo")) {
+    if (!Builtin->getValueAsString("llvmOp").empty())
+      GenerateLowering(Builtin);
+  }
+
+  OS << "#ifdef GET_BUILTIN_LOWERING_CLASSES\n"
+     << ClassDefinitions << "\n#undef GET_BUILTIN_LOWERING_CLASSES\n#endif\n";
+  OS << "#ifdef GET_BUILTIN_LOWERING_LIST\n"
+     << ClassList << "\n#undef GET_BUILTIN_LOWERING_LIST\n#endif\n";
+}
diff --git a/clang/utils/TableGen/CMakeLists.txt b/clang/utils/TableGen/CMakeLists.txt
index 5b072a1ac196..df5d8c03f5a5 100644
--- a/clang/utils/TableGen/CMakeLists.txt
+++ b/clang/utils/TableGen/CMakeLists.txt
@@ -4,6 +4,7 @@ add_tablegen(clang-tblgen CLANG
   DESTINATION "${CLANG_TOOLS_INSTALL_DIR}"
   EXPORT Clang
   ASTTableGen.cpp
+  CIRLoweringEmitter.cpp
   ClangASTNodesEmitter.cpp
   ClangASTPropertiesEmitter.cpp
   ClangAttrEmitter.cpp
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 42cc704543f1..5d1c5e39d82c 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -26,6 +26,7 @@ using namespace clang;
 enum ActionType {
   PrintRecords,
   DumpJSON,
+  GenCIRBuiltinsLowering,
   GenClangAttrClasses,
   GenClangAttrParserStringSwitches,
   GenClangAttrSubjectMatchRulesParserStringSwitches,
@@ -119,6 +120,9 @@ cl::opt<ActionType> Action(
                    "Print all records to stdout (default)"),
         clEnumValN(DumpJSON, "dump-json",
                    "Dump all records as machine-readable JSON"),
+        clEnumValN(GenCIRBuiltinsLowering, "gen-cir-builtins-lowering",
+                   "Generate lowering of ClangIR builtins to equivalent LLVM "
+                   "IR builtins"),
         clEnumValN(GenClangAttrClasses, "gen-clang-attr-classes",
                    "Generate clang attribute clases"),
         clEnumValN(GenClangAttrParserStringSwitches,
@@ -320,6 +324,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case DumpJSON:
     EmitJSON(Records, OS);
     break;
+  case GenCIRBuiltinsLowering:
+    EmitCIRBuiltinsLowering(Records, OS);
+    break;
   case GenClangAttrClasses:
     EmitClangAttrClass(Records, OS);
     break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index 5f2dd257cb90..aea983d8c0a1 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -24,6 +24,7 @@ class RecordKeeper;
 
 namespace clang {
 
+void EmitCIRBuiltinsLowering(llvm::RecordKeeper &RK, llvm::raw_ostream &OS);
 void EmitClangDeclContext(llvm::RecordKeeper &RK, llvm::raw_ostream &OS);
 /**
   @param PriorizeIfSubclassOf These classes should be prioritized in the output.
diff --git a/mlir/docs/Passes.md b/mlir/docs/Passes.md
index 6a18e06593e8..242b11a824c1 100644
--- a/mlir/docs/Passes.md
+++ b/mlir/docs/Passes.md
@@ -123,3 +123,7 @@ This document describes the available MLIR passes and their contracts.
 ## XeGPU Dialect Passes
 
 [include "XeGPUPasses.md"]
+
+## CIR Dialect Passes
+
+[include "CIRPasses.md"]
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index d1280aceeb7b..13011939b0eb 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -970,9 +970,9 @@ void CallOp::build(OpBuilder &builder, OperationState &state, TypeRange results,
 
 void CallOp::build(OpBuilder &builder, OperationState &state, TypeRange results,
                    FlatSymbolRefAttr callee, ValueRange args) {
-  assert(callee && "expected non-null callee in direct call builder");
+  auto fargs = callee ? args : args.drop_front();
   build(builder, state, results,
-        TypeAttr::get(getLLVMFuncType(builder.getContext(), results, args)),
+        TypeAttr::get(getLLVMFuncType(builder.getContext(), results, fargs)),
         callee, args, /*fastmathFlags=*/nullptr, /*branch_weights=*/nullptr,
         /*CConv=*/nullptr,
         /*access_groups=*/nullptr, /*alias_scopes=*/nullptr,
diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt
index 24f485116959..b49f479f57fb 100644
--- a/runtimes/CMakeLists.txt
+++ b/runtimes/CMakeLists.txt
@@ -50,6 +50,8 @@ function(runtime_register_component name)
 endfunction()
 
 find_package(LLVM PATHS "${LLVM_BINARY_DIR}" NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+# TODO(CIR): Once we guard CIR including clang builds guard this with the same flag
+find_package(MLIR PATHS "${LLVM_BINARY_DIR}" NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
 find_package(Clang PATHS "${LLVM_BINARY_DIR}" NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
 
 set(LLVM_THIRD_PARTY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../third-party")