diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 00000000000..c6f1bef64aa
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,31 @@
+name: build
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on:
+     - ubuntu-20.04
+    strategy:
+      fail-fast: false
+      matrix:
+        compiler: [clang, gcc]
+        check_type: [normal, debug]
+    env:
+      LLVM_VER: 10
+      COMPILER: ${{ matrix.compiler }}
+      CHECK_TYPE: ${{ matrix.check_type }}
+    steps:
+      - name: Checkout code into workspace directory
+        uses: actions/checkout@v2
+      - name: Setup prerequisites
+        run: bash ./ci/prerequisites.sh
+      - name: Build
+        run: bash ./ci/build.sh
+      - name: Check
+        run: bash ./ci/check.sh
+      - name: Check output
+        run: bash ./ci/check_output.sh
+        if: ${{ success() || failure() }}
diff --git a/ci/build.sh b/ci/build.sh
new file mode 100644
index 00000000000..f541929e69c
--- /dev/null
+++ b/ci/build.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -eu
+
+if [ $COMPILER = "clang" ]; then
+	export CC=clang-$LLVM_VER
+else
+	export CC=gcc
+fi
+
+# configure & build
+if [ $CHECK_TYPE = "debug" ]; then
+	CFLAGS="-O0" ./configure --enable-debug --enable-cassert --enable-tap-tests --with-icu
+else
+	./configure --disable-debug --disable-cassert --enable-tap-tests --with-icu
+fi
+
+make -sj4
+cd contrib
+make -sj4
+cd ..
diff --git a/ci/check.sh b/ci/check.sh
new file mode 100644
index 00000000000..faa8c25e84a
--- /dev/null
+++ b/ci/check.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -eu
+
+# unsets limit for coredumps size
+ulimit -c unlimited -S
+# sets a coredump file pattern
+mkdir -p /tmp/cores-$GITHUB_SHA-$TIMESTAMP
+sudo sh -c "echo \"/tmp/cores-$GITHUB_SHA-$TIMESTAMP/%t_%p_%s.core\" > /proc/sys/kernel/core_pattern"
+
+make check-world -j4
diff --git a/ci/check_output.sh b/ci/check_output.sh
new file mode 100644
index 00000000000..ae26cf63d68
--- /dev/null
+++ b/ci/check_output.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+set -eu
+
+status=0
+
+# show diff if it exists
+for f in ` find . -name regression.diffs ` ; do
+	echo "========= Contents of $f" 
+	cat $f
+	status=1
+done
+
+# check core dumps if any
+cores=$(find /tmp/cores-$GITHUB_SHA-$TIMESTAMP/ -name '*.core' 2>/dev/null)
+
+if [ -n "$cores" ]; then
+	for corefile in $cores ; do
+		if [[ $corefile != *_3.core ]]; then
+			binary=$(gdb -quiet -core $corefile -batch -ex 'info auxv' | grep AT_EXECFN | perl -pe "s/^.*\"(.*)\"\$/\$1/g")
+			echo dumping $corefile for $binary
+			gdb --batch --quiet -ex "thread apply all bt full" -ex "quit" $binary $corefile
+			status=1
+		fi
+	done
+fi
+
+rm -rf /tmp/cores-$GITHUB_SHA-$TIMESTAMP
+
+exit $status
diff --git a/ci/prerequisites.sh b/ci/prerequisites.sh
new file mode 100644
index 00000000000..b26251b711c
--- /dev/null
+++ b/ci/prerequisites.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -eu
+
+# print the hostname to be able to identify runner by logs
+echo "HOSTNAME=`hostname`"
+TIMESTAMP=$(date +%s)
+echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_ENV
+echo "TIMESTAMP=$TIMESTAMP"
+
+sudo apt-get -y install -qq wget ca-certificates
+
+sudo apt-get update -qq
+
+apt_packages="build-essential flex bison pkg-config libreadline-dev make gdb libipc-run-perl libicu-dev python3 python3-dev python3-pip python3-setuptools python3-testresources"
+
+if [ $COMPILER = "clang" ]; then
+	apt_packages="$apt_packages llvm-$LLVM_VER clang-$LLVM_VER clang-tools-$LLVM_VER"
+fi
+
+# install required packages
+sudo apt-get -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" -y install -qq $apt_packages
diff --git a/configure b/configure
index 71155f46e0d..f66a81e682d 100755
--- a/configure
+++ b/configure
@@ -628,6 +628,7 @@ ac_includes_default="\
 ac_subst_vars='LTLIBOBJS
 vpath_build
 PG_SYSROOT
+ORIOLEDB_PATCHSET_VERSION
 PG_VERSION_NUM
 LDFLAGS_EX_BE
 PROVE
@@ -6657,6 +6658,99 @@ fi
   if test -n "$NOT_THE_CFLAGS"; then
     CFLAGS="$CFLAGS -Wno-cast-function-type-strict"
   fi
+  if test x"$host_cpu" == x"aarch64"; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -moutline-atomics, for CFLAGS" >&5
+$as_echo_n "checking whether ${CC} supports -moutline-atomics, for CFLAGS... " >&6; }
+if ${pgac_cv_prog_CC_cflags__moutline_atomics+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  pgac_save_CFLAGS=$CFLAGS
+pgac_save_CC=$CC
+CC=${CC}
+CFLAGS="${CFLAGS} -moutline-atomics"
+ac_save_c_werror_flag=$ac_c_werror_flag
+ac_c_werror_flag=yes
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  pgac_cv_prog_CC_cflags__moutline_atomics=yes
+else
+  pgac_cv_prog_CC_cflags__moutline_atomics=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_c_werror_flag=$ac_save_c_werror_flag
+CFLAGS="$pgac_save_CFLAGS"
+CC="$pgac_save_CC"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CC_cflags__moutline_atomics" >&5
+$as_echo "$pgac_cv_prog_CC_cflags__moutline_atomics" >&6; }
+if test x"$pgac_cv_prog_CC_cflags__moutline_atomics" = x"yes"; then
+  CFLAGS="${CFLAGS} -moutline-atomics"
+fi
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CXX} supports -moutline-atomics, for CXXFLAGS" >&5
+$as_echo_n "checking whether ${CXX} supports -moutline-atomics, for CXXFLAGS... " >&6; }
+if ${pgac_cv_prog_CXX_cxxflags__moutline_atomics+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  pgac_save_CXXFLAGS=$CXXFLAGS
+pgac_save_CXX=$CXX
+CXX=${CXX}
+CXXFLAGS="${CXXFLAGS} -moutline-atomics"
+ac_save_cxx_werror_flag=$ac_cxx_werror_flag
+ac_cxx_werror_flag=yes
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  pgac_cv_prog_CXX_cxxflags__moutline_atomics=yes
+else
+  pgac_cv_prog_CXX_cxxflags__moutline_atomics=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+CXXFLAGS="$pgac_save_CXXFLAGS"
+CXX="$pgac_save_CXX"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CXX_cxxflags__moutline_atomics" >&5
+$as_echo "$pgac_cv_prog_CXX_cxxflags__moutline_atomics" >&6; }
+if test x"$pgac_cv_prog_CXX_cxxflags__moutline_atomics" = x"yes"; then
+  CXXFLAGS="${CXXFLAGS} -moutline-atomics"
+fi
+
+
+  fi
 elif test "$ICC" = yes; then
   # Intel's compiler has a bug/misoptimization in checking for
   # division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS.
@@ -15706,7 +15800,7 @@ fi
 LIBS_including_readline="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
-for ac_func in backtrace_symbols copyfile getifaddrs getpeerucred inet_pton kqueue mbstowcs_l memset_s posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l
+for ac_func in backtrace_symbols copyfile getifaddrs getpeerucred inet_pton kqueue mbstowcs_l memset_s posix_fallocate ppoll setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
@@ -19386,6 +19480,10 @@ _ACEOF
 
 
 
+# Needed to check postgresql patches git tag during orioledb extension build
+ORIOLEDB_PATCHSET_VERSION=`git describe --tags | cut -d'_' -f2`
+
+
 # If we are inserting PG_SYSROOT into CPPFLAGS, do so symbolically not
 # literally, so that it's possible to override it at build time using
 # a command like "make ... PG_SYSROOT=path".  This has to be done after
diff --git a/configure.ac b/configure.ac
index bf0cfdf2ba9..e4ebb267341 100644
--- a/configure.ac
+++ b/configure.ac
@@ -579,6 +579,10 @@ if test "$GCC" = yes -a "$ICC" = no; then
   if test -n "$NOT_THE_CFLAGS"; then
     CFLAGS="$CFLAGS -Wno-cast-function-type-strict"
   fi
+  if test x"$host_cpu" == x"aarch64"; then
+    PGAC_PROG_CC_CFLAGS_OPT([-moutline-atomics])
+    PGAC_PROG_CXX_CFLAGS_OPT([-moutline-atomics])
+  fi
 elif test "$ICC" = yes; then
   # Intel's compiler has a bug/misoptimization in checking for
   # division by NAN (NaN == 0), -mp1 fixes it, so add it to the CFLAGS.
@@ -1804,7 +1808,6 @@ AC_CHECK_FUNCS(m4_normalize([
 	memset_s
 	posix_fallocate
 	ppoll
-	pthread_is_threaded_np
 	setproctitle
 	setproctitle_fast
 	strchrnul
@@ -2414,6 +2417,10 @@ $AWK '{printf "%d%04d", $1, $2}'`"]
 AC_DEFINE_UNQUOTED(PG_VERSION_NUM, $PG_VERSION_NUM, [PostgreSQL version as a number])
 AC_SUBST(PG_VERSION_NUM)
 
+# Needed to check postgresql patches git tag during orioledb extension build
+[ORIOLEDB_PATCHSET_VERSION=`git describe --tags | cut -d'_' -f2`]
+AC_SUBST(ORIOLEDB_PATCHSET_VERSION)
+
 # If we are inserting PG_SYSROOT into CPPFLAGS, do so symbolically not
 # literally, so that it's possible to override it at build time using
 # a command like "make ... PG_SYSROOT=path".  This has to be done after
diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c
index b90145148d4..99aed8f9948 100644
--- a/contrib/bloom/blinsert.c
+++ b/contrib/bloom/blinsert.c
@@ -172,7 +172,7 @@ blbuildempty(Relation index)
  */
 bool
 blinsert(Relation index, Datum *values, bool *isnull,
-		 ItemPointer ht_ctid, Relation heapRel,
+		 Datum tupleid, Relation heapRel,
 		 IndexUniqueCheck checkUnique,
 		 bool indexUnchanged,
 		 IndexInfo *indexInfo)
@@ -189,6 +189,7 @@ blinsert(Relation index, Datum *values, bool *isnull,
 	BlockNumber blkno = InvalidBlockNumber;
 	OffsetNumber nStart;
 	GenericXLogState *state;
+	ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
 
 	insertCtx = AllocSetContextCreate(CurrentMemoryContext,
 									  "Bloom insert temporary context",
diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
index 330811ec608..15ef1b9aee2 100644
--- a/contrib/bloom/bloom.h
+++ b/contrib/bloom/bloom.h
@@ -189,7 +189,7 @@ extern bool blvalidate(Oid opclassoid);
 
 /* index access method interface functions */
 extern bool blinsert(Relation index, Datum *values, bool *isnull,
-					 ItemPointer ht_ctid, Relation heapRel,
+					 Datum tupleid, Relation heapRel,
 					 IndexUniqueCheck checkUnique,
 					 bool indexUnchanged,
 					 struct IndexInfo *indexInfo);
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index f23fbb1d9e0..d92858a3433 100644
--- a/contrib/bloom/blutils.c
+++ b/contrib/bloom/blutils.c
@@ -130,7 +130,8 @@ blhandler(PG_FUNCTION_ARGS)
 
 	amroutine->ambuild = blbuild;
 	amroutine->ambuildempty = blbuildempty;
-	amroutine->aminsert = blinsert;
+	amroutine->aminsert = NULL;
+	amroutine->aminsertextended = blinsert;
 	amroutine->ambulkdelete = blbulkdelete;
 	amroutine->amvacuumcleanup = blvacuumcleanup;
 	amroutine->amcanreturn = NULL;
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
index 0f0252558c5..d1ac2fd85ee 100644
--- a/contrib/pageinspect/heapfuncs.c
+++ b/contrib/pageinspect/heapfuncs.c
@@ -364,6 +364,7 @@ tuple_data_split_internal(Oid relid, char *tupdata,
 				 */
 				if (VARATT_IS_EXTERNAL(tupdata + off) &&
 					!VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
+					!VARATT_IS_EXTERNAL_ORIOLEDB(tupdata + off) &&
 					!VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
 					ereport(ERROR,
 							(errcode(ERRCODE_DATA_CORRUPTED),
diff --git a/contrib/test_decoding/test_decoding.c b/contrib/test_decoding/test_decoding.c
index 12d1d0505d7..dedc4be074f 100644
--- a/contrib/test_decoding/test_decoding.c
+++ b/contrib/test_decoding/test_decoding.c
@@ -578,7 +578,7 @@ tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple, bool skip_
 		/* print data */
 		if (isnull)
 			appendStringInfoString(s, "null");
-		else if (typisvarlena && VARATT_IS_EXTERNAL_ONDISK(origval))
+		else if (typisvarlena && (VARATT_IS_EXTERNAL_ONDISK(origval) || VARATT_IS_EXTERNAL_ORIOLEDB(origval)))
 			appendStringInfoString(s, "unchanged-toast-datum");
 		else if (!typisvarlena)
 			print_literal(s, typid,
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml
index 30eda37afa8..cee79776683 100644
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -139,6 +139,7 @@ typedef struct IndexAmRoutine
     ambuild_function ambuild;
     ambuildempty_function ambuildempty;
     aminsert_function aminsert;
+    aminsert_extended_function aminsertextended;
     ambulkdelete_function ambulkdelete;
     amvacuumcleanup_function amvacuumcleanup;
     amcanreturn_function amcanreturn;   /* can be NULL */
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 2de747ec37f..01d20462e33 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -284,6 +284,16 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>-e <replaceable class="parameter">path</replaceable></option></term>
+      <term><option>--extension=<replaceable class="parameter">path</replaceable></option></term>
+      <listitem>
+       <para>
+        Load shared library that performs custom rewind for postgres extension. The <replaceable class="parameter">path</replaceable> may be full or relative to PKGLIBDIR. File extension is optional. Multiple extensions can be selected by multiple <option>-e</option> switches.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-V</option></term>
       <term><option>--version</option></term>
diff --git a/meson.build b/meson.build
index 56454cc3395..4158f96ad41 100644
--- a/meson.build
+++ b/meson.build
@@ -153,6 +153,8 @@ cdata.set('PG_VERSION_NUM', pg_version_num)
 # PG_VERSION_STR is built later, it depends on compiler test results
 cdata.set_quoted('CONFIGURE_ARGS', '')
 
+git_describe_tags = run_command('git', 'describe', '--tags', check: true)
+orioledb_patchset_version = git_describe_tags.stdout().strip().split('_')[1]
 
 
 ###############################################################
@@ -2539,7 +2541,6 @@ func_checks = [
   ['posix_fallocate'],
   ['ppoll'],
   ['pthread_barrier_wait', {'dependencies': [thread_dep]}],
-  ['pthread_is_threaded_np', {'dependencies': [thread_dep]}],
   ['sem_init', {'dependencies': [rt_dep, thread_dep], 'skip': sema_kind != 'unnamed_posix', 'define': false}],
   ['setproctitle', {'dependencies': [util_dep]}],
   ['setproctitle_fast'],
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index cc4dc6de91e..ccae8c39d87 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -42,6 +42,9 @@ VERSION_NUM = @PG_VERSION_NUM@
 
 PACKAGE_URL = @PACKAGE_URL@
 
+# OrioleDB patchset git tag number
+ORIOLEDB_PATCHSET_VERSION = @ORIOLEDB_PATCHSET_VERSION@
+
 # Set top_srcdir, srcdir, and VPATH.
 ifdef PGXS
 top_srcdir = $(top_builddir)
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index a257903991d..38469a5a554 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -116,7 +116,8 @@ brinhandler(PG_FUNCTION_ARGS)
 
 	amroutine->ambuild = brinbuild;
 	amroutine->ambuildempty = brinbuildempty;
-	amroutine->aminsert = brininsert;
+	amroutine->aminsert = NULL;
+	amroutine->aminsertextended = brininsert;
 	amroutine->ambulkdelete = brinbulkdelete;
 	amroutine->amvacuumcleanup = brinvacuumcleanup;
 	amroutine->amcanreturn = NULL;
@@ -154,7 +155,7 @@ brinhandler(PG_FUNCTION_ARGS)
  */
 bool
 brininsert(Relation idxRel, Datum *values, bool *nulls,
-		   ItemPointer heaptid, Relation heapRel,
+		   Datum tupleid, Relation heapRel,
 		   IndexUniqueCheck checkUnique,
 		   bool indexUnchanged,
 		   IndexInfo *indexInfo)
@@ -168,6 +169,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
 	MemoryContext tupcxt = NULL;
 	MemoryContext oldcxt = CurrentMemoryContext;
 	bool		autosummarize = BrinGetAutoSummarize(idxRel);
+	ItemPointer heaptid = DatumGetItemPointer(tupleid);
 
 	revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);
 
diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c
index 108e0126a14..f54dcb03517 100644
--- a/src/backend/access/common/detoast.c
+++ b/src/backend/access/common/detoast.c
@@ -26,9 +26,10 @@ static struct varlena *toast_fetch_datum(struct varlena *attr);
 static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
 											   int32 sliceoffset,
 											   int32 slicelength);
-static struct varlena *toast_decompress_datum(struct varlena *attr);
 static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
 
+static ToastFunc o_detoast_func = NULL;
+
 /* ----------
  * detoast_external_attr -
  *
@@ -46,7 +47,7 @@ detoast_external_attr(struct varlena *attr)
 {
 	struct varlena *result;
 
-	if (VARATT_IS_EXTERNAL_ONDISK(attr))
+	if (VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr))
 	{
 		/*
 		 * This is an external stored plain value
@@ -115,7 +116,7 @@ detoast_external_attr(struct varlena *attr)
 struct varlena *
 detoast_attr(struct varlena *attr)
 {
-	if (VARATT_IS_EXTERNAL_ONDISK(attr))
+	if (VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr))
 	{
 		/*
 		 * This is an externally stored datum --- fetch it back from there
@@ -223,7 +224,14 @@ detoast_attr_slice(struct varlena *attr,
 	else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit))
 		slicelength = slicelimit = -1;
 
-	if (VARATT_IS_EXTERNAL_ONDISK(attr))
+	if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+	{
+		Assert(o_detoast_func != NULL);
+		preslice = o_detoast_func(attr);
+		if (preslice == NULL)
+			elog(ERROR, "unexpected NULL detoast result");
+	}
+	else if (VARATT_IS_EXTERNAL_ONDISK(attr))
 	{
 		struct varatt_external toast_pointer;
 
@@ -332,6 +340,18 @@ detoast_attr_slice(struct varlena *attr,
 	return result;
 }
 
+void
+register_o_detoast_func(ToastFunc func)
+{
+	o_detoast_func = func;
+}
+
+void
+deregister_o_detoast_func()
+{
+	o_detoast_func = NULL;
+}
+
 /* ----------
  * toast_fetch_datum -
  *
@@ -347,6 +367,17 @@ toast_fetch_datum(struct varlena *attr)
 	struct varatt_external toast_pointer;
 	int32		attrsize;
 
+	if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+	{
+		if (o_detoast_func != NULL)
+		{
+			result = o_detoast_func(attr);
+			if (result == NULL)
+				elog(ERROR, "unexpected NULL detoast result");
+			return result;
+		}
+	}
+
 	if (!VARATT_IS_EXTERNAL_ONDISK(attr))
 		elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
 
@@ -467,7 +498,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
  *
  * Decompress a compressed version of a varlena datum
  */
-static struct varlena *
+struct varlena *
 toast_decompress_datum(struct varlena *attr)
 {
 	ToastCompressionId cmid;
@@ -547,11 +578,17 @@ toast_raw_datum_size(Datum value)
 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
 	Size		result;
 
-	if (VARATT_IS_EXTERNAL_ONDISK(attr))
+	if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+	{
+		OToastExternal *toasted = (OToastExternal*) VARDATA_EXTERNAL(attr);
+		result = toasted->raw_size + VARHDRSZ;
+	}
+	else if (VARATT_IS_EXTERNAL_ONDISK(attr))
 	{
-		/* va_rawsize is the size of the original datum -- including header */
 		struct varatt_external toast_pointer;
 
+		/* va_rawsize is the size of the original datum -- including header */
+
 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 		result = toast_pointer.va_rawsize;
 	}
@@ -603,7 +640,12 @@ toast_datum_size(Datum value)
 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
 	Size		result;
 
-	if (VARATT_IS_EXTERNAL_ONDISK(attr))
+	if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+	{
+		OToastExternal *toasted = (OToastExternal*) VARDATA_EXTERNAL(attr);
+		result = toasted->toasted_size - VARHDRSZ;
+	}
+	else if (VARATT_IS_EXTERNAL_ONDISK(attr))
 	{
 		/*
 		 * Attribute is stored externally - return the extsize whether
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index 6bedbdf07ff..75d9c272177 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -756,6 +756,10 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
 		case TableOidAttributeNumber:
 			result = ObjectIdGetDatum(tup->t_tableOid);
 			break;
+		case RowIdAttributeNumber:
+			*isnull = true;
+			result = 0;
+			break;
 		default:
 			elog(ERROR, "invalid attnum: %d", attnum);
 			result = 0;			/* keep compiler quiet */
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 469de9bb49f..a19499af976 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -24,6 +24,7 @@
 #include "access/nbtree.h"
 #include "access/reloptions.h"
 #include "access/spgist_private.h"
+#include "access/tableam.h"
 #include "catalog/pg_type.h"
 #include "commands/defrem.h"
 #include "commands/tablespace.h"
@@ -1379,7 +1380,7 @@ untransformRelOptions(Datum options)
  */
 bytea *
 extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
-				  amoptions_function amoptions)
+				  const TableAmRoutine *tableam, amoptions_function amoptions)
 {
 	bytea	   *options;
 	bool		isnull;
@@ -1401,7 +1402,8 @@ extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
 		case RELKIND_RELATION:
 		case RELKIND_TOASTVALUE:
 		case RELKIND_MATVIEW:
-			options = heap_reloptions(classForm->relkind, datum, false);
+			options = tableam_reloptions(tableam, classForm->relkind,
+										 datum, false);
 			break;
 		case RELKIND_PARTITIONED_TABLE:
 			options = partitioned_table_reloptions(datum, false);
diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c
index 4cf956a759c..4b281ed438d 100644
--- a/src/backend/access/common/toast_compression.c
+++ b/src/backend/access/common/toast_compression.c
@@ -262,7 +262,12 @@ toast_get_compression_id(struct varlena *attr)
 	 * the external toast pointer.  If compressed inline, fetch it from the
 	 * toast compression header.
 	 */
-	if (VARATT_IS_EXTERNAL_ONDISK(attr))
+	if (VARATT_IS_EXTERNAL_ORIOLEDB(attr))
+	{
+		OToastExternal *toasted = (OToastExternal*) VARDATA_EXTERNAL(attr);
+		cmid = toasted->formatFlags >> ORIOLEDB_EXT_FORMAT_FLAGS_BITS;
+	}
+	else if (VARATT_IS_EXTERNAL_ONDISK(attr))
 	{
 		struct varatt_external toast_pointer;
 
diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c
index 588825ed85d..9b6a5d9091c 100644
--- a/src/backend/access/common/toast_internals.c
+++ b/src/backend/access/common/toast_internals.c
@@ -240,7 +240,7 @@ toast_save_datum(Relation rel, Datum value,
 		{
 			struct varatt_external old_toast_pointer;
 
-			Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
+			Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal) || VARATT_IS_EXTERNAL_ORIOLEDB(oldexternal));
 			/* Must copy to access aligned fields */
 			VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
 			if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
@@ -396,7 +396,7 @@ toast_delete_datum(Relation rel, Datum value, bool is_speculative)
 	int			validIndex;
 	SnapshotData SnapshotToast;
 
-	if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+	if (!VARATT_IS_EXTERNAL_ONDISK(attr) && !VARATT_IS_EXTERNAL_ORIOLEDB(attr))
 		return;
 
 	/* Must copy to access aligned fields */
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 56968b95acf..36815547151 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -484,7 +484,7 @@ ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
 
 bool
 gininsert(Relation index, Datum *values, bool *isnull,
-		  ItemPointer ht_ctid, Relation heapRel,
+		  Datum tupleid, Relation heapRel,
 		  IndexUniqueCheck checkUnique,
 		  bool indexUnchanged,
 		  IndexInfo *indexInfo)
@@ -493,6 +493,7 @@ gininsert(Relation index, Datum *values, bool *isnull,
 	MemoryContext oldCtx;
 	MemoryContext insertCtx;
 	int			i;
+	ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
 
 	/* Initialize GinState cache if first call in this statement */
 	if (ginstate == NULL)
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 7a4cd93f301..52d9a725fc4 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -63,7 +63,8 @@ ginhandler(PG_FUNCTION_ARGS)
 
 	amroutine->ambuild = ginbuild;
 	amroutine->ambuildempty = ginbuildempty;
-	amroutine->aminsert = gininsert;
+	amroutine->aminsert = NULL;
+	amroutine->aminsertextended = gininsert;
 	amroutine->ambulkdelete = ginbulkdelete;
 	amroutine->amvacuumcleanup = ginvacuumcleanup;
 	amroutine->amcanreturn = NULL;
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 8ef5fa03290..73193f0970d 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -85,7 +85,8 @@ gisthandler(PG_FUNCTION_ARGS)
 
 	amroutine->ambuild = gistbuild;
 	amroutine->ambuildempty = gistbuildempty;
-	amroutine->aminsert = gistinsert;
+	amroutine->aminsert = NULL;
+	amroutine->aminsertextended = gistinsert;
 	amroutine->ambulkdelete = gistbulkdelete;
 	amroutine->amvacuumcleanup = gistvacuumcleanup;
 	amroutine->amcanreturn = gistcanreturn;
@@ -156,7 +157,7 @@ gistbuildempty(Relation index)
  */
 bool
 gistinsert(Relation r, Datum *values, bool *isnull,
-		   ItemPointer ht_ctid, Relation heapRel,
+		   Datum tupleid, Relation heapRel,
 		   IndexUniqueCheck checkUnique,
 		   bool indexUnchanged,
 		   IndexInfo *indexInfo)
@@ -164,6 +165,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
 	GISTSTATE  *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
 	IndexTuple	itup;
 	MemoryContext oldCxt;
+	ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
 
 	/* Initialize GISTSTATE cache if first call in this statement */
 	if (giststate == NULL)
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index fc5d97f606e..ffddf7b900c 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -82,7 +82,8 @@ hashhandler(PG_FUNCTION_ARGS)
 
 	amroutine->ambuild = hashbuild;
 	amroutine->ambuildempty = hashbuildempty;
-	amroutine->aminsert = hashinsert;
+	amroutine->aminsert = NULL;
+	amroutine->aminsertextended = hashinsert;
 	amroutine->ambulkdelete = hashbulkdelete;
 	amroutine->amvacuumcleanup = hashvacuumcleanup;
 	amroutine->amcanreturn = NULL;
@@ -247,7 +248,7 @@ hashbuildCallback(Relation index,
  */
 bool
 hashinsert(Relation rel, Datum *values, bool *isnull,
-		   ItemPointer ht_ctid, Relation heapRel,
+		   Datum tupleid, Relation heapRel,
 		   IndexUniqueCheck checkUnique,
 		   bool indexUnchanged,
 		   IndexInfo *indexInfo)
@@ -255,6 +256,7 @@ hashinsert(Relation rel, Datum *values, bool *isnull,
 	Datum		index_values[1];
 	bool		index_isnull[1];
 	IndexTuple	itup;
+	ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
 
 	/* convert data to a hash key; on failure, do not insert anything */
 	if (!_hash_convert_tuple(rel,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 64f84a2e4bd..43d2bbcf84b 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2512,10 +2512,11 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
 }
 
 /*
- *	heap_delete - delete a tuple
+ *	heap_delete - delete a tuple, optionally fetching it into a slot
  *
  * See table_tuple_delete() for an explanation of the parameters, except that
- * this routine directly takes a tuple rather than a slot.
+ * this routine directly takes a tuple rather than a slot.  Also, we don't
+ * place a lock on the tuple in this function, just fetch the existing version.
  *
  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
  * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@@ -2524,8 +2525,9 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
  */
 TM_Result
 heap_delete(Relation relation, ItemPointer tid,
-			CommandId cid, Snapshot crosscheck, bool wait,
-			TM_FailureData *tmfd, bool changingPart)
+			CommandId cid, Snapshot crosscheck, int options,
+			TM_FailureData *tmfd, bool changingPart,
+			TupleTableSlot *oldSlot)
 {
 	TM_Result	result;
 	TransactionId xid = GetCurrentTransactionId();
@@ -2603,7 +2605,7 @@ heap_delete(Relation relation, ItemPointer tid,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("attempted to delete invisible tuple")));
 	}
-	else if (result == TM_BeingModified && wait)
+	else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
 	{
 		TransactionId xwait;
 		uint16		infomask;
@@ -2744,7 +2746,30 @@ heap_delete(Relation relation, ItemPointer tid,
 			tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
 		else
 			tmfd->cmax = InvalidCommandId;
-		UnlockReleaseBuffer(buffer);
+
+		/*
+		 * If we're asked to lock the updated tuple, we just fetch the
+		 * existing tuple.  That let's the caller save some resources on
+		 * placing the lock.
+		 */
+		if (result == TM_Updated &&
+			(options & TABLE_MODIFY_LOCK_UPDATED))
+		{
+			BufferHeapTupleTableSlot *bslot;
+
+			Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+			bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+			bslot->base.tupdata = tp;
+			ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+										   oldSlot,
+										   buffer);
+		}
+		else
+		{
+			UnlockReleaseBuffer(buffer);
+		}
 		if (have_tuple_lock)
 			UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
 		if (vmbuffer != InvalidBuffer)
@@ -2918,8 +2943,24 @@ heap_delete(Relation relation, ItemPointer tid,
 	 */
 	CacheInvalidateHeapTuple(relation, &tp, NULL);
 
-	/* Now we can release the buffer */
-	ReleaseBuffer(buffer);
+	/* Fetch the old tuple version if we're asked for that. */
+	if (options & TABLE_MODIFY_FETCH_OLD_TUPLE)
+	{
+		BufferHeapTupleTableSlot *bslot;
+
+		Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+		bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+		bslot->base.tupdata = tp;
+		ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+									   oldSlot,
+									   buffer);
+	}
+	else
+	{
+		/* Now we can release the buffer */
+		ReleaseBuffer(buffer);
+	}
 
 	/*
 	 * Release the lmgr tuple lock, if we had it.
@@ -2951,8 +2992,8 @@ simple_heap_delete(Relation relation, ItemPointer tid)
 
 	result = heap_delete(relation, tid,
 						 GetCurrentCommandId(true), InvalidSnapshot,
-						 true /* wait for commit */ ,
-						 &tmfd, false /* changingPart */ );
+						 TABLE_MODIFY_WAIT /* wait for commit */ ,
+						 &tmfd, false /* changingPart */ , NULL);
 	switch (result)
 	{
 		case TM_SelfModified:
@@ -2979,10 +3020,11 @@ simple_heap_delete(Relation relation, ItemPointer tid)
 }
 
 /*
- *	heap_update - replace a tuple
+ *	heap_update - replace a tuple, optionally fetching it into a slot
  *
  * See table_tuple_update() for an explanation of the parameters, except that
- * this routine directly takes a tuple rather than a slot.
+ * this routine directly takes a tuple rather than a slot.  Also, we don't
+ * place a lock on the tuple in this function, just fetch the existing version.
  *
  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
  * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@@ -2991,9 +3033,9 @@ simple_heap_delete(Relation relation, ItemPointer tid)
  */
 TM_Result
 heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
-			CommandId cid, Snapshot crosscheck, bool wait,
+			CommandId cid, Snapshot crosscheck, int options,
 			TM_FailureData *tmfd, LockTupleMode *lockmode,
-			TU_UpdateIndexes *update_indexes)
+			TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot)
 {
 	TM_Result	result;
 	TransactionId xid = GetCurrentTransactionId();
@@ -3170,7 +3212,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 	result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
 
 	/* see below about the "no wait" case */
-	Assert(result != TM_BeingModified || wait);
+	Assert(result != TM_BeingModified || (options & TABLE_MODIFY_WAIT));
 
 	if (result == TM_Invisible)
 	{
@@ -3179,7 +3221,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("attempted to update invisible tuple")));
 	}
-	else if (result == TM_BeingModified && wait)
+	else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
 	{
 		TransactionId xwait;
 		uint16		infomask;
@@ -3383,7 +3425,30 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 			tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
 		else
 			tmfd->cmax = InvalidCommandId;
-		UnlockReleaseBuffer(buffer);
+
+		/*
+		 * If we're asked to lock the updated tuple, we just fetch the
+		 * existing tuple.  That let's the caller save some resouces on
+		 * placing the lock.
+		 */
+		if (result == TM_Updated &&
+			(options & TABLE_MODIFY_LOCK_UPDATED))
+		{
+			BufferHeapTupleTableSlot *bslot;
+
+			Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+			bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+			bslot->base.tupdata = oldtup;
+			ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+										   oldSlot,
+										   buffer);
+		}
+		else
+		{
+			UnlockReleaseBuffer(buffer);
+		}
 		if (have_tuple_lock)
 			UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
 		if (vmbuffer != InvalidBuffer)
@@ -3862,7 +3927,26 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 	/* Now we can release the buffer(s) */
 	if (newbuf != buffer)
 		ReleaseBuffer(newbuf);
-	ReleaseBuffer(buffer);
+
+	/* Fetch the old tuple version if we're asked for that. */
+	if (options & TABLE_MODIFY_FETCH_OLD_TUPLE)
+	{
+		BufferHeapTupleTableSlot *bslot;
+
+		Assert(TTS_IS_BUFFERTUPLE(oldSlot));
+		bslot = (BufferHeapTupleTableSlot *) oldSlot;
+
+		bslot->base.tupdata = oldtup;
+		ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
+									   oldSlot,
+									   buffer);
+	}
+	else
+	{
+		/* Now we can release the buffer */
+		ReleaseBuffer(buffer);
+	}
+
 	if (BufferIsValid(vmbuffer_new))
 		ReleaseBuffer(vmbuffer_new);
 	if (BufferIsValid(vmbuffer))
@@ -4070,8 +4154,8 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup,
 
 	result = heap_update(relation, otid, tup,
 						 GetCurrentCommandId(true), InvalidSnapshot,
-						 true /* wait for commit */ ,
-						 &tmfd, &lockmode, update_indexes);
+						 TABLE_MODIFY_WAIT /* wait for commit */ ,
+						 &tmfd, &lockmode, update_indexes, NULL);
 	switch (result)
 	{
 		case TM_SelfModified:
@@ -4134,12 +4218,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
  *		tuples.
  *
  * Output parameters:
- *	*tuple: all fields filled in
- *	*buffer: set to buffer holding tuple (pinned but not locked at exit)
+ *	*slot: BufferHeapTupleTableSlot filled with tuple
  *	*tmfd: filled in failure cases (see below)
  *
  * Function results are the same as the ones for table_tuple_lock().
  *
+ * If *slot already contains the target tuple, it takes advantage on that by
+ * skipping the ReadBuffer() call.
+ *
  * In the failure cases other than TM_Invisible, the routine fills
  * *tmfd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact,
  * if necessary), and t_cmax (the last only for TM_SelfModified,
@@ -4150,15 +4236,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
  * See README.tuplock for a thorough explanation of this mechanism.
  */
 TM_Result
-heap_lock_tuple(Relation relation, HeapTuple tuple,
+heap_lock_tuple(Relation relation, ItemPointer tid, TupleTableSlot *slot,
 				CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
-				bool follow_updates,
-				Buffer *buffer, TM_FailureData *tmfd)
+				bool follow_updates, TM_FailureData *tmfd)
 {
 	TM_Result	result;
-	ItemPointer tid = &(tuple->t_self);
 	ItemId		lp;
 	Page		page;
+	Buffer		buffer;
 	Buffer		vmbuffer = InvalidBuffer;
 	BlockNumber block;
 	TransactionId xid,
@@ -4170,8 +4255,24 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 	bool		skip_tuple_lock = false;
 	bool		have_tuple_lock = false;
 	bool		cleared_all_frozen = false;
+	BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
+	HeapTuple	tuple = &bslot->base.tupdata;
+
+	Assert(TTS_IS_BUFFERTUPLE(slot));
 
-	*buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+	/* Take advantage if slot already contains the relevant tuple  */
+	if (!TTS_EMPTY(slot) &&
+		slot->tts_tableOid == relation->rd_id &&
+		ItemPointerCompare(&slot->tts_tid, tid) == 0 &&
+		BufferIsValid(bslot->buffer))
+	{
+		buffer = bslot->buffer;
+		IncrBufferRefCount(buffer);
+	}
+	else
+	{
+		buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+	}
 	block = ItemPointerGetBlockNumber(tid);
 
 	/*
@@ -4180,21 +4281,22 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 	 * in the middle of changing this, so we'll need to recheck after we have
 	 * the lock.
 	 */
-	if (PageIsAllVisible(BufferGetPage(*buffer)))
+	if (PageIsAllVisible(BufferGetPage(buffer)))
 		visibilitymap_pin(relation, block, &vmbuffer);
 
-	LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
-	page = BufferGetPage(*buffer);
+	page = BufferGetPage(buffer);
 	lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
 	Assert(ItemIdIsNormal(lp));
 
+	tuple->t_self = *tid;
 	tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
 	tuple->t_len = ItemIdGetLength(lp);
 	tuple->t_tableOid = RelationGetRelid(relation);
 
 l3:
-	result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
+	result = HeapTupleSatisfiesUpdate(tuple, cid, buffer);
 
 	if (result == TM_Invisible)
 	{
@@ -4223,7 +4325,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 		infomask2 = tuple->t_data->t_infomask2;
 		ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
 
-		LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
 		/*
 		 * If any subtransaction of the current top transaction already holds
@@ -4375,12 +4477,12 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 					{
 						result = res;
 						/* recovery code expects to have buffer lock held */
-						LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+						LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 						goto failed;
 					}
 				}
 
-				LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+				LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
 				/*
 				 * Make sure it's still an appropriate lock, else start over.
@@ -4415,7 +4517,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 			if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
 				!HEAP_XMAX_IS_EXCL_LOCKED(infomask))
 			{
-				LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+				LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
 				/*
 				 * Make sure it's still an appropriate lock, else start over.
@@ -4443,7 +4545,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 					 * No conflict, but if the xmax changed under us in the
 					 * meantime, start over.
 					 */
-					LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+					LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 					if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
 						!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
 											 xwait))
@@ -4455,7 +4557,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 			}
 			else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
 			{
-				LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+				LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
 				/* if the xmax changed in the meantime, start over */
 				if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
@@ -4483,7 +4585,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 			TransactionIdIsCurrentTransactionId(xwait))
 		{
 			/* ... but if the xmax changed in the meantime, start over */
-			LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+			LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 			if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
 				!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
 									 xwait))
@@ -4505,7 +4607,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 		 */
 		if (require_sleep && (result == TM_Updated || result == TM_Deleted))
 		{
-			LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+			LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 			goto failed;
 		}
 		else if (require_sleep)
@@ -4530,7 +4632,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 				 */
 				result = TM_WouldBlock;
 				/* recovery code expects to have buffer lock held */
-				LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+				LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 				goto failed;
 			}
 
@@ -4556,7 +4658,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 						{
 							result = TM_WouldBlock;
 							/* recovery code expects to have buffer lock held */
-							LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+							LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 							goto failed;
 						}
 						break;
@@ -4596,7 +4698,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 						{
 							result = TM_WouldBlock;
 							/* recovery code expects to have buffer lock held */
-							LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+							LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 							goto failed;
 						}
 						break;
@@ -4622,12 +4724,12 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 				{
 					result = res;
 					/* recovery code expects to have buffer lock held */
-					LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+					LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 					goto failed;
 				}
 			}
 
-			LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+			LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
 			/*
 			 * xwait is done, but if xwait had just locked the tuple then some
@@ -4649,7 +4751,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 				 * don't check for this in the multixact case, because some
 				 * locker transactions might still be running.
 				 */
-				UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
+				UpdateXmaxHintBits(tuple->t_data, buffer, xwait);
 			}
 		}
 
@@ -4708,9 +4810,9 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 	 */
 	if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
 	{
-		LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 		visibilitymap_pin(relation, block, &vmbuffer);
-		LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 		goto l3;
 	}
 
@@ -4773,7 +4875,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 		cleared_all_frozen = true;
 
 
-	MarkBufferDirty(*buffer);
+	MarkBufferDirty(buffer);
 
 	/*
 	 * XLOG stuff.  You might think that we don't need an XLOG record because
@@ -4793,7 +4895,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 		XLogRecPtr	recptr;
 
 		XLogBeginInsert();
-		XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
+		XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
 
 		xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
 		xlrec.xmax = xid;
@@ -4814,7 +4916,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 	result = TM_Ok;
 
 out_locked:
-	LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
 out_unlocked:
 	if (BufferIsValid(vmbuffer))
@@ -4832,6 +4934,9 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 	if (have_tuple_lock)
 		UnlockTupleTuplock(relation, tid, mode);
 
+	/* Put the target tuple to the slot */
+	ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
+
 	return result;
 }
 
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 5a17112c91e..a32fc3b69fb 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -23,6 +23,7 @@
 #include "access/heapam.h"
 #include "access/heaptoast.h"
 #include "access/multixact.h"
+#include "access/reloptions.h"
 #include "access/rewriteheap.h"
 #include "access/syncscan.h"
 #include "access/tableam.h"
@@ -45,6 +46,12 @@
 #include "utils/builtins.h"
 #include "utils/rel.h"
 
+static TM_Result heapam_tuple_lock(Relation relation, Datum tid,
+								   Snapshot snapshot, TupleTableSlot *slot,
+								   CommandId cid, LockTupleMode mode,
+								   LockWaitPolicy wait_policy, uint8 flags,
+								   TM_FailureData *tmfd);
+
 static void reform_and_rewrite_tuple(HeapTuple tuple,
 									 Relation OldHeap, Relation NewHeap,
 									 Datum *values, bool *isnull, RewriteState rwstate);
@@ -69,6 +76,20 @@ heapam_slot_callbacks(Relation relation)
 	return &TTSOpsBufferHeapTuple;
 }
 
+static RowRefType
+heapam_get_row_ref_type(Relation rel)
+{
+	return ROW_REF_TID;
+}
+
+static void
+heapam_free_rd_amcache(Relation rel)
+{
+	if (rel->rd_amcache)
+		pfree(rel->rd_amcache);
+	rel->rd_amcache = NULL;
+}
+
 
 /* ------------------------------------------------------------------------
  * Index Scan Callbacks for heap AM
@@ -110,7 +131,7 @@ heapam_index_fetch_end(IndexFetchTableData *scan)
 
 static bool
 heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
-						 ItemPointer tid,
+						 Datum tupleid,
 						 Snapshot snapshot,
 						 TupleTableSlot *slot,
 						 bool *call_again, bool *all_dead)
@@ -118,6 +139,7 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
 	IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
 	BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
 	bool		got_heap_tuple;
+	ItemPointer tid = DatumGetItemPointer(tupleid);
 
 	Assert(TTS_IS_BUFFERTUPLE(slot));
 
@@ -178,7 +200,7 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
 
 static bool
 heapam_fetch_row_version(Relation relation,
-						 ItemPointer tid,
+						 Datum tupleid,
 						 Snapshot snapshot,
 						 TupleTableSlot *slot)
 {
@@ -187,7 +209,7 @@ heapam_fetch_row_version(Relation relation,
 
 	Assert(TTS_IS_BUFFERTUPLE(slot));
 
-	bslot->base.tupdata.t_self = *tid;
+	bslot->base.tupdata.t_self = *DatumGetItemPointer(tupleid);
 	if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
 	{
 		/* store in slot, transferring existing pin */
@@ -237,7 +259,7 @@ heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
  * ----------------------------------------------------------------------------
  */
 
-static void
+static TupleTableSlot *
 heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
 					int options, BulkInsertState bistate)
 {
@@ -254,6 +276,8 @@ heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
 
 	if (shouldFree)
 		pfree(tuple);
+
+	return slot;
 }
 
 static void
@@ -296,36 +320,341 @@ heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
 		pfree(tuple);
 }
 
+/*
+ * ExecCheckTupleVisible -- verify tuple is visible
+ *
+ * It would not be consistent with guarantees of the higher isolation levels to
+ * proceed with avoiding insertion (taking speculative insertion's alternative
+ * path) on the basis of another tuple that is not visible to MVCC snapshot.
+ * Check for the need to raise a serialization failure, and do so as necessary.
+ */
+static void
+ExecCheckTupleVisible(EState *estate,
+					  Relation rel,
+					  TupleTableSlot *slot)
+{
+	if (!IsolationUsesXactSnapshot())
+		return;
+
+	if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
+	{
+		Datum		xminDatum;
+		TransactionId xmin;
+		bool		isnull;
+
+		xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
+		Assert(!isnull);
+		xmin = DatumGetTransactionId(xminDatum);
+
+		/*
+		 * We should not raise a serialization failure if the conflict is
+		 * against a tuple inserted by our own transaction, even if it's not
+		 * visible to our snapshot.  (This would happen, for example, if
+		 * conflicting keys are proposed for insertion in a single command.)
+		 */
+		if (!TransactionIdIsCurrentTransactionId(xmin))
+			ereport(ERROR,
+					(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+					 errmsg("could not serialize access due to concurrent update")));
+	}
+}
+
+/*
+ * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
+ */
+static void
+ExecCheckTIDVisible(EState *estate,
+					Relation rel,
+					ItemPointer tid,
+					TupleTableSlot *tempSlot)
+{
+	/* Redundantly check isolation level */
+	if (!IsolationUsesXactSnapshot())
+		return;
+
+	if (!table_tuple_fetch_row_version(rel, PointerGetDatum(tid),
+									   SnapshotAny, tempSlot))
+		elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
+	ExecCheckTupleVisible(estate, rel, tempSlot);
+	ExecClearTuple(tempSlot);
+}
+
+static inline TupleTableSlot *
+heapam_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo,
+								 TupleTableSlot *slot,
+								 CommandId cid, int options,
+								 struct BulkInsertStateData *bistate,
+								 List *arbiterIndexes,
+								 EState *estate,
+								 LockTupleMode lockmode,
+								 TupleTableSlot *lockedSlot,
+								 TupleTableSlot *tempSlot)
+{
+	Relation	rel = resultRelInfo->ri_RelationDesc;
+	uint32		specToken;
+	ItemPointerData conflictTid;
+	bool		specConflict;
+	List	   *recheckIndexes = NIL;
+
+	while (true)
+	{
+		specConflict = false;
+		if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate, &conflictTid,
+									   arbiterIndexes))
+		{
+			if (lockedSlot)
+			{
+				TM_Result	test;
+				TM_FailureData tmfd;
+				Datum		xminDatum;
+				TransactionId xmin;
+				bool		isnull;
+
+				/* Determine lock mode to use */
+				lockmode = ExecUpdateLockMode(estate, resultRelInfo);
+
+				/*
+				 * Lock tuple for update.  Don't follow updates when tuple cannot be
+				* locked without doing so.  A row locking conflict here means our
+				* previous conclusion that the tuple is conclusively committed is not
+				* true anymore.
+				*/
+				test = table_tuple_lock(rel, PointerGetDatum(&conflictTid),
+										estate->es_snapshot,
+										lockedSlot, estate->es_output_cid,
+										lockmode, LockWaitBlock, 0,
+										&tmfd);
+				switch (test)
+				{
+					case TM_Ok:
+						/* success! */
+						break;
+
+					case TM_Invisible:
+
+						/*
+						 * This can occur when a just inserted tuple is updated again in
+						* the same command. E.g. because multiple rows with the same
+						* conflicting key values are inserted.
+						*
+						* This is somewhat similar to the ExecUpdate() TM_SelfModified
+						* case.  We do not want to proceed because it would lead to the
+						* same row being updated a second time in some unspecified order,
+						* and in contrast to plain UPDATEs there's no historical behavior
+						* to break.
+						*
+						* It is the user's responsibility to prevent this situation from
+						* occurring.  These problems are why the SQL standard similarly
+						* specifies that for SQL MERGE, an exception must be raised in
+						* the event of an attempt to update the same row twice.
+						*/
+						xminDatum = slot_getsysattr(lockedSlot,
+													MinTransactionIdAttributeNumber,
+													&isnull);
+						Assert(!isnull);
+						xmin = DatumGetTransactionId(xminDatum);
+
+						if (TransactionIdIsCurrentTransactionId(xmin))
+							ereport(ERROR,
+									(errcode(ERRCODE_CARDINALITY_VIOLATION),
+							/* translator: %s is a SQL command name */
+									errmsg("%s command cannot affect row a second time",
+											"ON CONFLICT DO UPDATE"),
+									errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
+
+						/* This shouldn't happen */
+						elog(ERROR, "attempted to lock invisible tuple");
+						break;
+
+					case TM_SelfModified:
+
+						/*
+						 * This state should never be reached. As a dirty snapshot is used
+						* to find conflicting tuples, speculative insertion wouldn't have
+						* seen this row to conflict with.
+						*/
+						elog(ERROR, "unexpected self-updated tuple");
+						break;
+
+					case TM_Updated:
+						if (IsolationUsesXactSnapshot())
+							ereport(ERROR,
+									(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+									errmsg("could not serialize access due to concurrent update")));
+
+						/*
+						 * As long as we don't support an UPDATE of INSERT ON CONFLICT for
+						* a partitioned table we shouldn't reach to a case where tuple to
+						* be lock is moved to another partition due to concurrent update
+						* of the partition key.
+						*/
+						Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
+
+						/*
+						 * Tell caller to try again from the very start.
+						*
+						* It does not make sense to use the usual EvalPlanQual() style
+						* loop here, as the new version of the row might not conflict
+						* anymore, or the conflicting tuple has actually been deleted.
+						*/
+						ExecClearTuple(lockedSlot);
+						return false;
+
+					case TM_Deleted:
+						if (IsolationUsesXactSnapshot())
+							ereport(ERROR,
+									(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+									errmsg("could not serialize access due to concurrent delete")));
+
+						/* see TM_Updated case */
+						Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
+						ExecClearTuple(lockedSlot);
+						return false;
+
+					default:
+						elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
+				}
+
+				/* Success, the tuple is locked. */
+
+				/*
+				 * Verify that the tuple is visible to our MVCC snapshot if the current
+				* isolation level mandates that.
+				*
+				* It's not sufficient to rely on the check within ExecUpdate() as e.g.
+				* CONFLICT ... WHERE clause may prevent us from reaching that.
+				*
+				* This means we only ever continue when a new command in the current
+				* transaction could see the row, even though in READ COMMITTED mode the
+				* tuple will not be visible according to the current statement's
+				* snapshot.  This is in line with the way UPDATE deals with newer tuple
+				* versions.
+				*/
+				ExecCheckTupleVisible(estate, rel, lockedSlot);
+				return NULL;
+			}
+			else
+			{
+				ExecCheckTIDVisible(estate, rel, &conflictTid, tempSlot);
+				return NULL;
+			}
+		}
+
+		/*
+		 * Before we start insertion proper, acquire our "speculative
+		 * insertion lock".  Others can use that to wait for us to decide
+		 * if we're going to go ahead with the insertion, instead of
+		 * waiting for the whole transaction to complete.
+		 */
+		specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
+
+		/* insert the tuple, with the speculative token */
+		heapam_tuple_insert_speculative(rel, slot,
+										estate->es_output_cid,
+										0,
+										NULL,
+										specToken);
+
+		/* insert index entries for tuple */
+		recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
+											   slot, estate, false, true,
+											   &specConflict,
+											   arbiterIndexes,
+											   false);
+
+		/* adjust the tuple's state accordingly */
+		heapam_tuple_complete_speculative(rel, slot,
+										  specToken, !specConflict);
+
+		/*
+		 * Wake up anyone waiting for our decision.  They will re-check
+		 * the tuple, see that it's no longer speculative, and wait on our
+		 * XID as if this was a regularly inserted tuple all along.  Or if
+		 * we killed the tuple, they will see it's dead, and proceed as if
+		 * the tuple never existed.
+		 */
+		SpeculativeInsertionLockRelease(GetCurrentTransactionId());
+
+		/*
+		 * If there was a conflict, start from the beginning.  We'll do
+		 * the pre-check again, which will now find the conflicting tuple
+		 * (unless it aborts before we get there).
+		 */
+		if (specConflict)
+		{
+			list_free(recheckIndexes);
+			CHECK_FOR_INTERRUPTS();
+			continue;
+		}
+
+		return slot;
+	}
+}
+
 static TM_Result
-heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
-					Snapshot snapshot, Snapshot crosscheck, bool wait,
-					TM_FailureData *tmfd, bool changingPart)
+heapam_tuple_delete(Relation relation, Datum tupleid, CommandId cid,
+					Snapshot snapshot, Snapshot crosscheck, int options,
+					TM_FailureData *tmfd, bool changingPart,
+					TupleTableSlot *oldSlot)
 {
+	TM_Result	result;
+	ItemPointer tid = DatumGetItemPointer(tupleid);
+
 	/*
 	 * Currently Deleting of index tuples are handled at vacuum, in case if
 	 * the storage itself is cleaning the dead tuples by itself, it is the
 	 * time to call the index tuple deletion also.
 	 */
-	return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
+	result = heap_delete(relation, tid, cid, crosscheck, options,
+						 tmfd, changingPart, oldSlot);
+
+	/*
+	 * If the tuple has been concurrently updated, then get the lock on it.
+	 * (Do only if caller asked for this by setting the
+	 * TABLE_MODIFY_LOCK_UPDATED option)  With the lock held retry of the
+	 * delete should succeed even if there are more concurrent update
+	 * attempts.
+	 */
+	if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
+	{
+		/*
+		 * heapam_tuple_lock() will take advantage of tuple loaded into
+		 * oldSlot by heap_delete().
+		 */
+		result = heapam_tuple_lock(relation, tupleid, snapshot,
+								   oldSlot, cid, LockTupleExclusive,
+								   (options & TABLE_MODIFY_WAIT) ?
+								   LockWaitBlock :
+								   LockWaitSkip,
+								   TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+								   tmfd);
+
+		if (result == TM_Ok)
+			return TM_Updated;
+	}
+
+	return result;
 }
 
 
 static TM_Result
-heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
+heapam_tuple_update(Relation relation, Datum tupleid, TupleTableSlot *slot,
 					CommandId cid, Snapshot snapshot, Snapshot crosscheck,
-					bool wait, TM_FailureData *tmfd,
-					LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
+					int options, TM_FailureData *tmfd,
+					LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes,
+					TupleTableSlot *oldSlot)
 {
 	bool		shouldFree = true;
 	HeapTuple	tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
 	TM_Result	result;
+	ItemPointer otid = DatumGetItemPointer(tupleid);
 
 	/* Update the tuple with table oid */
 	slot->tts_tableOid = RelationGetRelid(relation);
 	tuple->t_tableOid = slot->tts_tableOid;
 
-	result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
-						 tmfd, lockmode, update_indexes);
+	result = heap_update(relation, otid, tuple, cid, crosscheck, options,
+						 tmfd, lockmode, update_indexes, oldSlot);
 	ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
 
 	/*
@@ -352,19 +681,44 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
 	if (shouldFree)
 		pfree(tuple);
 
+	/*
+	 * If the tuple has been concurrently updated, then get the lock on it.
+	 * (Do only if caller asked for this by setting the
+	 * TABLE_MODIFY_LOCK_UPDATED option)  With the lock held retry of the
+	 * update should succeed even if there are more concurrent update
+	 * attempts.
+	 */
+	if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
+	{
+		/*
+		 * heapam_tuple_lock() will take advantage of tuple loaded into
+		 * oldSlot by heap_update().
+		 */
+		result = heapam_tuple_lock(relation, tupleid, snapshot,
+								   oldSlot, cid, *lockmode,
+								   (options & TABLE_MODIFY_WAIT) ?
+								   LockWaitBlock :
+								   LockWaitSkip,
+								   TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+								   tmfd);
+
+		if (result == TM_Ok)
+			return TM_Updated;
+	}
+
 	return result;
 }
 
 static TM_Result
-heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
+heapam_tuple_lock(Relation relation, Datum tupleid, Snapshot snapshot,
 				  TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
 				  LockWaitPolicy wait_policy, uint8 flags,
 				  TM_FailureData *tmfd)
 {
 	BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
 	TM_Result	result;
-	Buffer		buffer;
 	HeapTuple	tuple = &bslot->base.tupdata;
+	ItemPointer tid = DatumGetItemPointer(tupleid);
 	bool		follow_updates;
 
 	follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
@@ -373,9 +727,8 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 	Assert(TTS_IS_BUFFERTUPLE(slot));
 
 tuple_lock_retry:
-	tuple->t_self = *tid;
-	result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
-							 follow_updates, &buffer, tmfd);
+	result = heap_lock_tuple(relation, tid, slot, cid, mode, wait_policy,
+							 follow_updates, tmfd);
 
 	if (result == TM_Updated &&
 		(flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
@@ -383,8 +736,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 		/* Should not encounter speculative tuple on recheck */
 		Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
 
-		ReleaseBuffer(buffer);
-
 		if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
 		{
 			SnapshotData SnapshotDirty;
@@ -406,6 +757,8 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 			InitDirtySnapshot(SnapshotDirty);
 			for (;;)
 			{
+				Buffer		buffer = InvalidBuffer;
+
 				if (ItemPointerIndicatesMovedPartitions(tid))
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
@@ -500,7 +853,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 					/*
 					 * This is a live tuple, so try to lock it again.
 					 */
-					ReleaseBuffer(buffer);
+					ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
 					goto tuple_lock_retry;
 				}
 
@@ -511,7 +864,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 				 */
 				if (tuple->t_data == NULL)
 				{
-					Assert(!BufferIsValid(buffer));
+					ReleaseBuffer(buffer);
 					return TM_Deleted;
 				}
 
@@ -564,9 +917,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
 	slot->tts_tableOid = RelationGetRelid(relation);
 	tuple->t_tableOid = slot->tts_tableOid;
 
-	/* store in slot, transferring existing pin */
-	ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
-
 	return result;
 }
 
@@ -2536,6 +2886,29 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
 	}
 }
 
+static bool
+heapam_tuple_is_current(Relation rel, TupleTableSlot *slot)
+{
+	Datum		xminDatum;
+	TransactionId xmin;
+	bool		isnull;
+
+	xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
+	Assert(!isnull);
+	xmin = DatumGetTransactionId(xminDatum);
+	return TransactionIdIsCurrentTransactionId(xmin);
+}
+
+static bytea *
+heapam_reloptions(char relkind, Datum reloptions, bool validate)
+{
+	if (relkind == RELKIND_RELATION ||
+		relkind == RELKIND_TOASTVALUE ||
+		relkind == RELKIND_MATVIEW)
+		return heap_reloptions(relkind, reloptions, validate);
+
+	return NULL;
+}
 
 /* ------------------------------------------------------------------------
  * Definition of the heap table access method.
@@ -2546,6 +2919,8 @@ static const TableAmRoutine heapam_methods = {
 	.type = T_TableAmRoutine,
 
 	.slot_callbacks = heapam_slot_callbacks,
+	.get_row_ref_type = heapam_get_row_ref_type,
+	.free_rd_amcache = heapam_free_rd_amcache,
 
 	.scan_begin = heap_beginscan,
 	.scan_end = heap_endscan,
@@ -2565,8 +2940,7 @@ static const TableAmRoutine heapam_methods = {
 	.index_fetch_tuple = heapam_index_fetch_tuple,
 
 	.tuple_insert = heapam_tuple_insert,
-	.tuple_insert_speculative = heapam_tuple_insert_speculative,
-	.tuple_complete_speculative = heapam_tuple_complete_speculative,
+	.tuple_insert_with_arbiter = heapam_tuple_insert_with_arbiter,
 	.multi_insert = heap_multi_insert,
 	.tuple_delete = heapam_tuple_delete,
 	.tuple_update = heapam_tuple_update,
@@ -2598,7 +2972,11 @@ static const TableAmRoutine heapam_methods = {
 	.scan_bitmap_next_block = heapam_scan_bitmap_next_block,
 	.scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
 	.scan_sample_next_block = heapam_scan_sample_next_block,
-	.scan_sample_next_tuple = heapam_scan_sample_next_tuple
+	.scan_sample_next_tuple = heapam_scan_sample_next_tuple,
+
+	.tuple_is_current = heapam_tuple_is_current,
+
+	.reloptions = heapam_reloptions
 };
 
 
diff --git a/src/backend/access/index/amapi.c b/src/backend/access/index/amapi.c
index 8b02cdbe825..ed2b9fc9e68 100644
--- a/src/backend/access/index/amapi.c
+++ b/src/backend/access/index/amapi.c
@@ -16,25 +16,27 @@
 #include "access/amapi.h"
 #include "access/htup_details.h"
 #include "catalog/pg_am.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_index.h"
 #include "catalog/pg_opclass.h"
 #include "utils/builtins.h"
 #include "utils/syscache.h"
 
+IndexAMRoutineHookType IndexAMRoutineHook = NULL;
 
-/*
- * GetIndexAmRoutine - call the specified access method handler routine to get
- * its IndexAmRoutine struct, which will be palloc'd in the caller's context.
- *
- * Note that if the amhandler function is built-in, this will not involve
- * any catalog access.  It's therefore safe to use this while bootstrapping
- * indexes for the system catalogs.  relcache.c relies on that.
- */
 IndexAmRoutine *
-GetIndexAmRoutine(Oid amhandler)
+GetIndexAmRoutineWithTableAM(Oid tamoid, Oid amhandler)
 {
 	Datum		datum;
 	IndexAmRoutine *routine;
 
+	if (IndexAMRoutineHook != NULL)
+	{
+		routine = IndexAMRoutineHook(tamoid, amhandler);
+		if (routine)
+			return routine;
+	}
+
 	datum = OidFunctionCall0(amhandler);
 	routine = (IndexAmRoutine *) DatumGetPointer(datum);
 
@@ -45,6 +47,52 @@ GetIndexAmRoutine(Oid amhandler)
 	return routine;
 }
 
+/*
+ * GetIndexAmRoutine - call the specified access method handler routine to get
+ * its IndexAmRoutine struct, which will be palloc'd in the caller's context.
+ *
+ * Note that if the amhandler function is built-in, this will not involve
+ * any catalog access.  It's therefore safe to use this while bootstrapping
+ * indexes for the system catalogs.  relcache.c relies on that.
+ */
+IndexAmRoutine *
+GetIndexAmRoutine(Oid amhandler)
+{
+	return GetIndexAmRoutineExtended(InvalidOid, amhandler);
+}
+
+IndexAmRoutine *
+GetIndexAmRoutineExtended(Oid indoid, Oid amhandler)
+{
+	HeapTuple	ht_idx;
+	HeapTuple	ht_tblrel;
+	Form_pg_index idxrec;
+	Form_pg_class tblrelrec;
+	Oid			indrelid;
+	Oid			tamoid;
+
+	if (!OidIsValid((indoid)) || indoid < FirstNormalObjectId)
+		return GetIndexAmRoutineWithTableAM(HEAP_TABLE_AM_OID, amhandler);
+
+	ht_idx = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indoid));
+	if (!HeapTupleIsValid(ht_idx))
+		elog(ERROR, "cache lookup failed for index %u", indoid);
+	idxrec = (Form_pg_index) GETSTRUCT(ht_idx);
+	Assert(indoid == idxrec->indexrelid);
+	indrelid = idxrec->indrelid;
+
+	ht_tblrel = SearchSysCache1(RELOID, ObjectIdGetDatum(indrelid));
+	if (!HeapTupleIsValid(ht_tblrel))
+		elog(ERROR, "cache lookup failed for relation %u", indrelid);
+	tblrelrec = (Form_pg_class) GETSTRUCT(ht_tblrel);
+	tamoid = tblrelrec->relam;
+
+	ReleaseSysCache(ht_tblrel);
+	ReleaseSysCache(ht_idx);
+
+	return GetIndexAmRoutineWithTableAM(tamoid, amhandler);
+}
+
 /*
  * GetIndexAmRoutineByAmId - look up the handler of the index access method
  * with the given OID, and get its IndexAmRoutine struct.
@@ -53,7 +101,7 @@ GetIndexAmRoutine(Oid amhandler)
  * noerror is true, else throws error.
  */
 IndexAmRoutine *
-GetIndexAmRoutineByAmId(Oid amoid, bool noerror)
+GetIndexAmRoutineByAmId(Oid indoid, Oid amoid, bool noerror)
 {
 	HeapTuple	tuple;
 	Form_pg_am	amform;
@@ -103,7 +151,7 @@ GetIndexAmRoutineByAmId(Oid amoid, bool noerror)
 	ReleaseSysCache(tuple);
 
 	/* And finally, call the handler function to get the API struct. */
-	return GetIndexAmRoutine(amhandler);
+	return GetIndexAmRoutineExtended(indoid, amhandler);
 }
 
 
@@ -129,7 +177,7 @@ amvalidate(PG_FUNCTION_ARGS)
 
 	ReleaseSysCache(classtup);
 
-	amroutine = GetIndexAmRoutineByAmId(amoid, false);
+	amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
 
 	if (amroutine->amvalidate == NULL)
 		elog(ERROR, "function amvalidate is not defined for index access method %u",
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index 709b2641021..e0535503145 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -104,6 +104,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
 		scan->orderByData = NULL;
 
 	scan->xs_want_itup = false; /* may be set later */
+	scan->xs_want_rowid = false; /* may be set later */
 
 	/*
 	 * During recovery we ignore killed tuples and don't bother to kill them
@@ -125,6 +126,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
 	scan->xs_itupdesc = NULL;
 	scan->xs_hitup = NULL;
 	scan->xs_hitupdesc = NULL;
+	scan->xs_rowid.isnull = true;
 
 	return scan;
 }
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 715e91e25f0..94bdec63666 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -218,24 +218,99 @@ bool
 index_insert(Relation indexRelation,
 			 Datum *values,
 			 bool *isnull,
-			 ItemPointer heap_t_ctid,
+			 ItemPointer tupleid,
 			 Relation heapRelation,
 			 IndexUniqueCheck checkUnique,
 			 bool indexUnchanged,
 			 IndexInfo *indexInfo)
 {
 	RELATION_CHECKS;
-	CHECK_REL_PROCEDURE(aminsert);
+
+	if (indexRelation->rd_indam->aminsertextended == NULL && indexRelation->rd_indam->aminsert == NULL )
+		elog(ERROR, "at least one function aminsert or aminsertextended should be defined for index \"%s\"", \
+			RelationGetRelationName(indexRelation));
 
 	if (!(indexRelation->rd_indam->ampredlocks))
 		CheckForSerializableConflictIn(indexRelation,
 									   (ItemPointer) NULL,
 									   InvalidBlockNumber);
 
-	return indexRelation->rd_indam->aminsert(indexRelation, values, isnull,
-											 heap_t_ctid, heapRelation,
+	if (indexRelation->rd_indam->aminsert)
+	{
+		/* compatibility method for extension AM's not aware of aminsertextended */
+		return indexRelation->rd_indam->aminsert(indexRelation, values, isnull,
+											 tupleid, heapRelation,
+											 checkUnique, indexUnchanged,
+											 indexInfo);
+	}
+	else
+	{
+		/* index insert method for internal AM's and Orioledb that are aware of aminsertextended */
+		return indexRelation->rd_indam->aminsertextended(indexRelation, values, isnull,
+											 ItemPointerGetDatum(tupleid), heapRelation,
 											 checkUnique, indexUnchanged,
 											 indexInfo);
+	}
+}
+
+/* ----------------
+ *		index_update - update an index tuple in a relation
+ * ----------------
+ */
+bool
+index_update(Relation indexRelation,
+			 bool new_valid,
+			 bool old_valid,
+			 Datum *values,
+			 bool *isnull,
+			 Datum tupleid,
+			 Datum *valuesOld,
+			 bool *isnullOld,
+			 Datum oldTupleid,
+			 Relation heapRelation,
+			 IndexUniqueCheck checkUnique,
+			 IndexInfo *indexInfo)
+{
+	RELATION_CHECKS;
+	CHECK_REL_PROCEDURE(amupdate);
+
+	if (!(indexRelation->rd_indam->ampredlocks))
+		CheckForSerializableConflictIn(indexRelation,
+									   (ItemPointer) NULL,
+									   InvalidBlockNumber);
+
+	return indexRelation->rd_indam->amupdate(indexRelation,
+											 new_valid, old_valid,
+											 values, isnull, tupleid,
+											 valuesOld, isnullOld, oldTupleid,
+											 heapRelation,
+											 checkUnique,
+											 indexInfo);
+}
+
+
+/* ----------------
+ *		index_delete - delete an index tuple from a relation
+ * ----------------
+ */
+bool
+index_delete(Relation indexRelation,
+			 Datum *values, bool *isnull, Datum tupleid,
+			 Relation heapRelation,
+			 IndexInfo *indexInfo)
+{
+	RELATION_CHECKS;
+	CHECK_REL_PROCEDURE(amdelete);
+
+	if (!(indexRelation->rd_indam->ampredlocks))
+		CheckForSerializableConflictIn(indexRelation,
+									   (ItemPointer) NULL,
+									   InvalidBlockNumber);
+
+	return indexRelation->rd_indam->amdelete(indexRelation,
+											 values, isnull, tupleid,
+											 heapRelation,
+											 indexInfo);
 }
 
 /*
@@ -603,6 +678,55 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
 	return &scan->xs_heaptid;
 }
 
+/* ----------------
+ * index_getnext_rowid - get the next ROWID from a scan
+ *
+ * The result is the next ROWID satisfying the scan keys,
+ * or isnull if no more matching tuples exist.
+ * ----------------
+ */
+NullableDatum
+index_getnext_rowid(IndexScanDesc scan, ScanDirection direction)
+{
+	NullableDatum result;
+	bool		found;
+
+	SCAN_CHECKS;
+	CHECK_SCAN_PROCEDURE(amgettuple);
+
+	/* XXX: we should assert that a snapshot is pushed or registered */
+	Assert(TransactionIdIsValid(RecentXmin));
+
+	/*
+	 * The AM's amgettuple proc finds the next index entry matching the scan
+	 * keys, and puts the TID into scan->xs_heaptid.  It should also set
+	 * scan->xs_recheck and possibly scan->xs_itup/scan->xs_hitup, though we
+	 * pay no attention to those fields here.
+	 */
+	found = scan->indexRelation->rd_indam->amgettuple(scan, direction);
+
+	/* Reset kill flag immediately for safety */
+	scan->kill_prior_tuple = false;
+	scan->xs_heap_continue = false;
+
+	/* If we're out of index entries, we're done */
+	if (!found)
+	{
+		/* release resources (like buffer pins) from table accesses */
+		if (scan->xs_heapfetch)
+			table_index_fetch_reset(scan->xs_heapfetch);
+
+		result.isnull = true;
+		return result;
+	}
+	/* Assert(RowidIsValid(&scan->xs_rowid)); */
+
+	pgstat_count_index_tuples(scan->indexRelation, 1);
+
+	/* Return the ROWID of the tuple we found. */
+	return scan->xs_rowid;
+}
+
 /* ----------------
  *		index_fetch_heap - get the scan's next heap tuple
  *
@@ -626,8 +750,17 @@ index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot)
 {
 	bool		all_dead = false;
 	bool		found;
+	Datum		tupleid;
+
+	if (scan->xs_want_rowid)
+	{
+		Assert(!scan->xs_rowid.isnull);
+		tupleid = scan->xs_rowid.value;
+	}
+	else
+		tupleid = PointerGetDatum(&scan->xs_heaptid);
 
-	found = table_index_fetch_tuple(scan->xs_heapfetch, &scan->xs_heaptid,
+	found = table_index_fetch_tuple(scan->xs_heapfetch, tupleid,
 									scan->xs_snapshot, slot,
 									&scan->xs_heap_continue, &all_dead);
 
@@ -669,16 +802,30 @@ index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *
 	{
 		if (!scan->xs_heap_continue)
 		{
-			ItemPointer tid;
+			if (scan->xs_want_rowid)
+			{
+				NullableDatum rowid;
+				/* Time to fetch the next TID from the index */
+				rowid = index_getnext_rowid(scan, direction);
 
-			/* Time to fetch the next TID from the index */
-			tid = index_getnext_tid(scan, direction);
+				/* If we're out of index entries, we're done */
+				if (rowid.isnull)
+					break;
 
-			/* If we're out of index entries, we're done */
-			if (tid == NULL)
-				break;
+				/* Assert(RowidEquals(rowid, &scan->xs_rowid)); */
+			}
+			else
+			{
+				ItemPointer tid;
+				/* Time to fetch the next TID from the index */
+				tid = index_getnext_tid(scan, direction);
 
-			Assert(ItemPointerEquals(tid, &scan->xs_heaptid));
+				/* If we're out of index entries, we're done */
+				if (tid == NULL)
+					break;
+
+				Assert(ItemPointerEquals(tid, &scan->xs_heaptid));
+			}
 		}
 
 		/*
@@ -686,7 +833,8 @@ index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *
 		 * If we don't find anything, loop around and grab the next TID from
 		 * the index.
 		 */
-		Assert(ItemPointerIsValid(&scan->xs_heaptid));
+		if (!scan->xs_want_rowid)
+			Assert(ItemPointerIsValid(&scan->xs_heaptid));
 		if (index_fetch_heap(scan, slot))
 			return true;
 	}
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 6c5b5c69ce5..44daed95baf 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -121,7 +121,8 @@ bthandler(PG_FUNCTION_ARGS)
 
 	amroutine->ambuild = btbuild;
 	amroutine->ambuildempty = btbuildempty;
-	amroutine->aminsert = btinsert;
+	amroutine->aminsert = NULL;
+	amroutine->aminsertextended = btinsert;
 	amroutine->ambulkdelete = btbulkdelete;
 	amroutine->amvacuumcleanup = btvacuumcleanup;
 	amroutine->amcanreturn = btcanreturn;
@@ -188,13 +189,14 @@ btbuildempty(Relation index)
  */
 bool
 btinsert(Relation rel, Datum *values, bool *isnull,
-		 ItemPointer ht_ctid, Relation heapRel,
+		 Datum tupleid, Relation heapRel,
 		 IndexUniqueCheck checkUnique,
 		 bool indexUnchanged,
 		 IndexInfo *indexInfo)
 {
 	bool		result;
 	IndexTuple	itup;
+	ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
 
 	/* generate an index tuple */
 	itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index 4443f1918df..1f5c9a930d2 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -198,7 +198,7 @@ spgbuildempty(Relation index)
  */
 bool
 spginsert(Relation index, Datum *values, bool *isnull,
-		  ItemPointer ht_ctid, Relation heapRel,
+		  Datum tupleid, Relation heapRel,
 		  IndexUniqueCheck checkUnique,
 		  bool indexUnchanged,
 		  IndexInfo *indexInfo)
@@ -206,6 +206,7 @@ spginsert(Relation index, Datum *values, bool *isnull,
 	SpGistState spgstate;
 	MemoryContext oldCtx;
 	MemoryContext insertCtx;
+	ItemPointer ht_ctid = DatumGetItemPointer(tupleid);
 
 	insertCtx = AllocSetContextCreate(CurrentMemoryContext,
 									  "SP-GiST insert temporary context",
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index 5fa9e230c08..127ff3922d1 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -69,7 +69,8 @@ spghandler(PG_FUNCTION_ARGS)
 
 	amroutine->ambuild = spgbuild;
 	amroutine->ambuildempty = spgbuildempty;
-	amroutine->aminsert = spginsert;
+	amroutine->aminsert = NULL;
+	amroutine->aminsertextended = spginsert;
 	amroutine->ambulkdelete = spgbulkdelete;
 	amroutine->amvacuumcleanup = spgvacuumcleanup;
 	amroutine->amcanreturn = spgcanreturn;
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index 771438c8cec..3f64d70666e 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -227,7 +227,7 @@ table_index_fetch_tuple_check(Relation rel,
 
 	slot = table_slot_create(rel, NULL);
 	scan = table_index_fetch_begin(rel);
-	found = table_index_fetch_tuple(scan, tid, snapshot, slot, &call_again,
+	found = table_index_fetch_tuple(scan, PointerGetDatum(tid), snapshot, slot, &call_again,
 									all_dead);
 	table_index_fetch_end(scan);
 	ExecDropSingleTupleTableSlot(slot);
@@ -297,16 +297,23 @@ simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
  * via ereport().
  */
 void
-simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
+simple_table_tuple_delete(Relation rel, Datum tupleid, Snapshot snapshot,
+						  TupleTableSlot *oldSlot)
 {
 	TM_Result	result;
 	TM_FailureData tmfd;
+	int			options = TABLE_MODIFY_WAIT;	/* wait for commit */
 
-	result = table_tuple_delete(rel, tid,
+	/* Fetch old tuple if the relevant slot is provided */
+	if (oldSlot)
+		options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
+
+	result = table_tuple_delete(rel, tupleid,
 								GetCurrentCommandId(true),
 								snapshot, InvalidSnapshot,
-								true /* wait for commit */ ,
-								&tmfd, false /* changingPart */ );
+								options,
+								&tmfd, false /* changingPart */ ,
+								oldSlot);
 
 	switch (result)
 	{
@@ -342,20 +349,27 @@ simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
  * via ereport().
  */
 void
-simple_table_tuple_update(Relation rel, ItemPointer otid,
+simple_table_tuple_update(Relation rel, Datum tupleid,
 						  TupleTableSlot *slot,
 						  Snapshot snapshot,
-						  TU_UpdateIndexes *update_indexes)
+						  TU_UpdateIndexes *update_indexes,
+						  TupleTableSlot *oldSlot)
 {
 	TM_Result	result;
 	TM_FailureData tmfd;
 	LockTupleMode lockmode;
+	int			options = TABLE_MODIFY_WAIT;	/* wait for commit */
+
+	/* Fetch old tuple if the relevant slot is provided */
+	if (oldSlot)
+		options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
 
-	result = table_tuple_update(rel, otid, slot,
+	result = table_tuple_update(rel, tupleid, slot,
 								GetCurrentCommandId(true),
 								snapshot, InvalidSnapshot,
-								true /* wait for commit */ ,
-								&tmfd, &lockmode, update_indexes);
+								options,
+								&tmfd, &lockmode, update_indexes,
+								oldSlot);
 
 	switch (result)
 	{
diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c
index d7798b6afb6..26aca18dc50 100644
--- a/src/backend/access/table/tableamapi.c
+++ b/src/backend/access/table/tableamapi.c
@@ -75,8 +75,7 @@ GetTableAmRoutine(Oid amhandler)
 	 * Could be made optional, but would require throwing error during
 	 * parse-analysis.
 	 */
-	Assert(routine->tuple_insert_speculative != NULL);
-	Assert(routine->tuple_complete_speculative != NULL);
+	Assert(routine->tuple_insert_with_arbiter != NULL);
 
 	Assert(routine->multi_insert != NULL);
 	Assert(routine->tuple_delete != NULL);
@@ -104,9 +103,29 @@ GetTableAmRoutine(Oid amhandler)
 	Assert(routine->scan_sample_next_block != NULL);
 	Assert(routine->scan_sample_next_tuple != NULL);
 
+	Assert(routine->tuple_is_current != NULL);
+
 	return routine;
 }
 
+const TableAmRoutine *
+GetTableAmRoutineByAmOid(Oid amoid)
+{
+	HeapTuple				ht_am;
+	Form_pg_am				amrec;
+	const TableAmRoutine   *tableam = NULL;
+
+	ht_am = SearchSysCache1(AMOID, ObjectIdGetDatum(amoid));
+	if (!HeapTupleIsValid(ht_am))
+		elog(ERROR, "cache lookup failed for access method %u",
+			 amoid);
+	amrec = (Form_pg_am)GETSTRUCT(ht_am);
+
+	tableam = GetTableAmRoutine(amrec->amhandler);
+	ReleaseSysCache(ht_am);
+	return tableam;
+}
+
 /* check_hook: validate new default_table_access_method */
 bool
 check_default_table_access_method(char **newval, void **extra, GucSource source)
diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c
index b5cfeb21aab..2afcd4830d5 100644
--- a/src/backend/access/table/toast_helper.c
+++ b/src/backend/access/table/toast_helper.c
@@ -72,10 +72,10 @@ toast_tuple_init(ToastTupleContext *ttc)
 			 * we have to delete it later.
 			 */
 			if (att->attlen == -1 && !ttc->ttc_oldisnull[i] &&
-				VARATT_IS_EXTERNAL_ONDISK(old_value))
+				(VARATT_IS_EXTERNAL_ONDISK(old_value) || VARATT_IS_EXTERNAL_ORIOLEDB(old_value)))
 			{
 				if (ttc->ttc_isnull[i] ||
-					!VARATT_IS_EXTERNAL_ONDISK(new_value) ||
+					!(VARATT_IS_EXTERNAL_ONDISK(new_value) || VARATT_IS_EXTERNAL_ORIOLEDB(new_value)) ||
 					memcmp((char *) old_value, (char *) new_value,
 						   VARSIZE_EXTERNAL(old_value)) != 0)
 				{
@@ -331,7 +331,7 @@ toast_delete_external(Relation rel, Datum *values, bool *isnull,
 
 			if (isnull[i])
 				continue;
-			else if (VARATT_IS_EXTERNAL_ONDISK(value))
+			else if (VARATT_IS_EXTERNAL_ONDISK(value) || VARATT_IS_EXTERNAL_ORIOLEDB(value))
 				toast_delete_datum(rel, value, is_speculative);
 		}
 	}
diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c
index 7629904bbf7..d118c5fd61a 100644
--- a/src/backend/access/transam/transam.c
+++ b/src/backend/access/transam/transam.c
@@ -22,6 +22,7 @@
 #include "access/clog.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
+#include "storage/proc.h"
 #include "utils/snapmgr.h"
 
 /*
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 4a2ea4adbaf..dab73df4b2c 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -209,6 +209,7 @@ typedef struct TransactionStateData
 	int			parallelModeLevel;	/* Enter/ExitParallelMode counter */
 	bool		chain;			/* start a new block after this one */
 	bool		topXidLogged;	/* for a subxact: is top-level XID logged? */
+	CommitSeqNo	csn;
 	struct TransactionStateData *parent;	/* back link to parent */
 } TransactionStateData;
 
@@ -242,6 +243,7 @@ static TransactionStateData TopTransactionStateData = {
 	.state = TRANS_DEFAULT,
 	.blockState = TBLOCK_DEFAULT,
 	.topXidLogged = false,
+	.csn = COMMITSEQNO_INPROGRESS
 };
 
 /*
@@ -320,6 +322,7 @@ typedef struct SubXactCallbackItem
 
 static SubXactCallbackItem *SubXact_callbacks = NULL;
 
+xact_redo_hook_type xact_redo_hook = NULL;
 
 /* local function prototypes */
 static void AssignTransactionId(TransactionState s);
@@ -2014,6 +2017,7 @@ StartTransaction(void)
 	 */
 	s->state = TRANS_START;
 	s->fullTransactionId = InvalidFullTransactionId;	/* until assigned */
+	s->csn = COMMITSEQNO_INPROGRESS;
 
 	/* Determine if statements are logged in this transaction */
 	xact_is_sampled = log_xact_sample_rate != 0 &&
@@ -2288,7 +2292,9 @@ CommitTransaction(void)
 	 * must be done _before_ releasing locks we hold and _after_
 	 * RecordTransactionCommit.
 	 */
+	MyProc->lastCommittedCSN = s->csn;
 	ProcArrayEndTransaction(MyProc, latestXid);
+	s->csn = MyProc->lastCommittedCSN;
 
 	/*
 	 * This is all post-commit cleanup.  Note that if an error is raised here,
@@ -2714,6 +2720,7 @@ AbortTransaction(void)
 	 * while cleaning up!
 	 */
 	LWLockReleaseAll();
+	CustomErrorCleanup();
 
 	/* Clear wait information and command progress indicator */
 	pgstat_report_wait_end();
@@ -5076,6 +5083,7 @@ AbortSubTransaction(void)
 	 * Buffer locks, for example?  I don't think so but I'm not sure.
 	 */
 	LWLockReleaseAll();
+	CustomErrorCleanup();
 
 	pgstat_report_wait_end();
 	pgstat_progress_end_command();
@@ -5958,6 +5966,9 @@ xact_redo_commit(xl_xact_parsed_commit *parsed,
 	TransactionId max_xid;
 	TimestampTz commit_time;
 
+	if (xact_redo_hook)
+		xact_redo_hook(xid, lsn);
+
 	Assert(TransactionIdIsValid(xid));
 
 	max_xid = TransactionIdLatest(xid, parsed->nsubxacts, parsed->subxacts);
@@ -6267,3 +6278,9 @@ xact_redo(XLogReaderState *record)
 	else
 		elog(PANIC, "xact_redo: unknown op code %u", info);
 }
+
+CommitSeqNo
+GetCurrentCSN(void)
+{
+	return TopTransactionStateData.csn;
+}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a19ba7167fd..ee0794465b1 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -138,6 +138,7 @@ int			wal_retrieve_retry_interval = 5000;
 int			max_slot_wal_keep_size_mb = -1;
 int			wal_decode_buffer_size = 512 * 1024;
 bool		track_wal_io_timing = false;
+CommitSeqNo	startupCommitSeqNo = COMMITSEQNO_FIRST_NORMAL + 1;
 
 #ifdef WAL_DEBUG
 bool		XLOG_DEBUG = false;
@@ -145,6 +146,11 @@ bool		XLOG_DEBUG = false;
 
 int			wal_segment_size = DEFAULT_XLOG_SEG_SIZE;
 
+/* Hook for plugins to get control in CheckPointGuts() */
+CheckPoint_hook_type CheckPoint_hook = NULL;
+double CheckPointProgress;
+after_checkpoint_cleanup_hook_type after_checkpoint_cleanup_hook = NULL;
+
 /*
  * Number of WAL insertion locks to use. A higher value allows more insertions
  * to happen concurrently, but adds some CPU overhead to flushing the WAL,
@@ -4710,6 +4716,7 @@ BootStrapXLOG(void)
 	ShmemVariableCache->nextXid = checkPoint.nextXid;
 	ShmemVariableCache->nextOid = checkPoint.nextOid;
 	ShmemVariableCache->oidCount = 0;
+	pg_atomic_write_u64(&ShmemVariableCache->nextCommitSeqNo, COMMITSEQNO_FIRST_NORMAL + 1);
 	MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
 	AdvanceOldestClogXid(checkPoint.oldestXid);
 	SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
@@ -5049,6 +5056,7 @@ StartupXLOG(void)
 	XLogRecPtr	missingContrecPtr;
 	TransactionId oldestActiveXID;
 	bool		promoted = false;
+	bool		wasInRecovery;
 
 	/*
 	 * We should have an aux process resource owner to use, and we should not
@@ -5176,6 +5184,7 @@ StartupXLOG(void)
 	ShmemVariableCache->nextXid = checkPoint.nextXid;
 	ShmemVariableCache->nextOid = checkPoint.nextOid;
 	ShmemVariableCache->oidCount = 0;
+	pg_atomic_write_u64(&ShmemVariableCache->nextCommitSeqNo, startupCommitSeqNo);
 	MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
 	AdvanceOldestClogXid(checkPoint.oldestXid);
 	SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
@@ -5664,6 +5673,8 @@ StartupXLOG(void)
 	 */
 	PreallocXlogFiles(EndOfLog, newTLI);
 
+	wasInRecovery = InRecovery;
+
 	/*
 	 * Okay, we're officially UP.
 	 */
@@ -5742,6 +5753,9 @@ StartupXLOG(void)
 	 */
 	CompleteCommitTsInitialization();
 
+	if (wasInRecovery && after_checkpoint_cleanup_hook)
+		after_checkpoint_cleanup_hook(EndOfLog, 0);
+
 	/*
 	 * All done with end-of-recovery actions.
 	 *
@@ -6866,6 +6880,9 @@ CreateCheckPoint(int flags)
 	if (!RecoveryInProgress())
 		TruncateSUBTRANS(GetOldestTransactionIdConsideredRunning());
 
+	if (after_checkpoint_cleanup_hook)
+		after_checkpoint_cleanup_hook(ProcLastRecPtr, flags);
+
 	/* Real work is done; log and update stats. */
 	LogCheckpointEnd(false);
 
@@ -7040,6 +7057,9 @@ CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
 	CheckPointPredicate();
 	CheckPointBuffers(flags);
 
+	if (CheckPoint_hook)
+		CheckPoint_hook(checkPointRedo, flags);
+
 	/* Perform all queued up fsyncs */
 	TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
 	CheckpointStats.ckpt_sync_t = GetCurrentTimestamp();
@@ -8579,6 +8599,19 @@ get_backup_status(void)
 	return sessionBackupState;
 }
 
+/*
+ * Check if there is a backup in progress.
+ *
+ * We do this check without lock assuming 32-bit reads are atomic.  In fact,
+ * the false result means that there was at least a moment of time when there
+ * were no backups.
+ */
+bool
+have_backup_in_progress(void)
+{
+	return (XLogCtl->Insert.runningBackups > 0);
+}
+
 /*
  * do_pg_backup_stop
  *
@@ -8988,3 +9021,5 @@ SetWalWriterSleeping(bool sleeping)
 	XLogCtl->WalWriterSleeping = sleeping;
 	SpinLockRelease(&XLogCtl->info_lck);
 }
+
+void (*RedoShutdownHook) (void) = NULL;
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 3c7fb913e7e..8de18a3a6ee 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -1806,6 +1806,8 @@ PerformWalRecovery(void)
 					 * exit with special return code to request shutdown of
 					 * postmaster.  Log messages issued from postmaster.
 					 */
+					if (RedoShutdownHook != NULL)
+						RedoShutdownHook();
 					proc_exit(3);
 
 				case RECOVERY_TARGET_ACTION_PAUSE:
diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c
index a2aad09e6a0..ac39a2c4c0a 100644
--- a/src/backend/catalog/aclchk.c
+++ b/src/backend/catalog/aclchk.c
@@ -1617,7 +1617,7 @@ expand_all_col_privileges(Oid table_oid, Form_pg_class classForm,
 	AttrNumber	curr_att;
 
 	Assert(classForm->relnatts - FirstLowInvalidHeapAttributeNumber < num_col_privileges);
-	for (curr_att = FirstLowInvalidHeapAttributeNumber + 1;
+	for (curr_att = FirstLowInvalidHeapAttributeNumber + 2;
 		 curr_att <= classForm->relnatts;
 		 curr_att++)
 	{
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 494738824cb..8627810dc23 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -242,6 +242,7 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel,
 					int flags)
 {
 	int			i;
+	bool		*depends_on_relation;
 
 	/*
 	 * Keep track of objects for event triggers, if necessary.
@@ -269,6 +270,33 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel,
 		}
 	}
 
+	depends_on_relation = palloc0(sizeof(bool) * targetObjects->numrefs);
+
+	for (i = targetObjects->numrefs - 1; i >= 0; i--)
+	{
+		ObjectAddressExtra *thisextra = targetObjects->extras + i;
+		int j;
+
+		if (thisextra->dependee.classId == RelationRelationId &&
+			thisextra->dependee.objectSubId == 0)
+		{
+			depends_on_relation[i] = true;
+			continue;
+		}
+
+		for (j = i + 1; j < targetObjects->numrefs; j++)
+		{
+			ObjectAddress *depobj = targetObjects->refs + j;
+			if (depobj->classId == thisextra->dependee.classId &&
+				depobj->objectId == thisextra->dependee.objectId &&
+				depobj->objectSubId == thisextra->dependee.objectSubId)
+			{
+				depends_on_relation[i] = depends_on_relation[j];
+				break;
+			}
+		}
+	}
+
 	/*
 	 * Delete all the objects in the proper order, except that if told to, we
 	 * should skip the original object(s).
@@ -277,13 +305,19 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel,
 	{
 		ObjectAddress *thisobj = targetObjects->refs + i;
 		ObjectAddressExtra *thisextra = targetObjects->extras + i;
+		int temp_flags = flags;
 
 		if ((flags & PERFORM_DELETION_SKIP_ORIGINAL) &&
 			(thisextra->flags & DEPFLAG_ORIGINAL))
 			continue;
 
-		deleteOneObject(thisobj, depRel, flags);
+		if (depends_on_relation[i])
+			temp_flags |= PERFORM_DELETION_OF_RELATION;
+
+		deleteOneObject(thisobj, depRel, temp_flags);
 	}
+
+	pfree(depends_on_relation);
 }
 
 /*
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 6f1910a6e0f..69c6689245e 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -125,9 +125,6 @@ static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
 								bool immediate,
 								bool isvalid,
 								bool isready);
-static void index_update_stats(Relation rel,
-							   bool hasindex,
-							   double reltuples);
 static void IndexCheckExclusion(Relation heapRelation,
 								Relation indexRelation,
 								IndexInfo *indexInfo);
@@ -301,7 +298,7 @@ ConstructTupleDescriptor(Relation heapRelation,
 	int			i;
 
 	/* We need access to the index AM's API struct */
-	amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
+	amroutine = GetIndexAmRoutineByAmId(InvalidOid, accessMethodObjectId, false);
 
 	/* ... and to the table's tuple descriptor */
 	heapTupDesc = RelationGetDescr(heapRelation);
@@ -2681,9 +2678,6 @@ BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
 	 */
 	Assert(ii->ii_Unique);
 
-	if (index->rd_rel->relam != BTREE_AM_OID)
-		elog(ERROR, "unexpected non-btree speculative unique index");
-
 	ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
 	ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
 	ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
@@ -2807,7 +2801,7 @@ FormIndexDatum(IndexInfo *indexInfo,
  * index.  When updating an index, it's important because some index AMs
  * expect a relcache flush to occur after REINDEX.
  */
-static void
+void
 index_update_stats(Relation rel,
 				   bool hasindex,
 				   double reltuples)
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index bda364552ca..cd4a16a5572 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -97,9 +97,6 @@ static void compute_index_stats(Relation onerel, double totalrows,
 								MemoryContext col_context);
 static VacAttrStats *examine_attribute(Relation onerel, int attnum,
 									   Node *index_expr);
-static int	acquire_sample_rows(Relation onerel, int elevel,
-								HeapTuple *rows, int targrows,
-								double *totalrows, double *totaldeadrows);
 static int	compare_rows(const void *a, const void *b, void *arg);
 static int	acquire_inherited_sample_rows(Relation onerel, int elevel,
 										  HeapTuple *rows, int targrows,
@@ -201,10 +198,7 @@ analyze_rel(Oid relid, RangeVar *relation,
 	if (onerel->rd_rel->relkind == RELKIND_RELATION ||
 		onerel->rd_rel->relkind == RELKIND_MATVIEW)
 	{
-		/* Regular table, so we'll use the regular row acquisition function */
-		acquirefunc = acquire_sample_rows;
-		/* Also get regular table's size */
-		relpages = RelationGetNumberOfBlocks(onerel);
+		table_analyze(onerel, &acquirefunc, &relpages);
 	}
 	else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
 	{
@@ -1133,7 +1127,7 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
  * block.  The previous sampling method put too much credence in the row
  * density near the start of the table.
  */
-static int
+int
 acquire_sample_rows(Relation onerel, int elevel,
 					HeapTuple *rows, int targrows,
 					double *totalrows, double *totaldeadrows)
@@ -1460,9 +1454,7 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 		if (childrel->rd_rel->relkind == RELKIND_RELATION ||
 			childrel->rd_rel->relkind == RELKIND_MATVIEW)
 		{
-			/* Regular table, so use the regular row acquisition function */
-			acquirefunc = acquire_sample_rows;
-			relpages = RelationGetNumberOfBlocks(childrel);
+			table_analyze(childrel, &acquirefunc, &relpages);
 		}
 		else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
 		{
diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c
index 35c4451fc06..982bae9ed42 100644
--- a/src/backend/commands/constraint.c
+++ b/src/backend/commands/constraint.c
@@ -111,7 +111,7 @@ unique_key_recheck(PG_FUNCTION_ARGS)
 		IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation);
 		bool		call_again = false;
 
-		if (!table_index_fetch_tuple(scan, &tmptid, SnapshotSelf, slot,
+		if (!table_index_fetch_tuple(scan, PointerGetDatum(&tmptid), SnapshotSelf, slot,
 									 &call_again, NULL))
 		{
 			/*
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 6c2e5c8a4f9..b3421e6e5a8 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -64,9 +64,6 @@ static void report_triggers(ResultRelInfo *rInfo, bool show_relname,
 							ExplainState *es);
 static double elapsed_time(instr_time *starttime);
 static bool ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used);
-static void ExplainNode(PlanState *planstate, List *ancestors,
-						const char *relationship, const char *plan_name,
-						ExplainState *es);
 static void show_plan_tlist(PlanState *planstate, List *ancestors,
 							ExplainState *es);
 static void show_expression(Node *node, const char *qlabel,
@@ -75,9 +72,6 @@ static void show_expression(Node *node, const char *qlabel,
 static void show_qual(List *qual, const char *qlabel,
 					  PlanState *planstate, List *ancestors,
 					  bool useprefix, ExplainState *es);
-static void show_scan_qual(List *qual, const char *qlabel,
-						   PlanState *planstate, List *ancestors,
-						   ExplainState *es);
 static void show_upper_qual(List *qual, const char *qlabel,
 							PlanState *planstate, List *ancestors,
 							ExplainState *es);
@@ -114,8 +108,6 @@ static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
 static void show_tidbitmap_info(BitmapHeapScanState *planstate,
 								ExplainState *es);
-static void show_instrumentation_count(const char *qlabel, int which,
-									   PlanState *planstate, ExplainState *es);
 static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
 static void show_eval_params(Bitmapset *bms_params, ExplainState *es);
 static const char *explain_get_index_name(Oid indexId);
@@ -1174,7 +1166,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
  * to the nesting depth of logical output groups, and therefore is controlled
  * by ExplainOpenGroup/ExplainCloseGroup.
  */
-static void
+void
 ExplainNode(PlanState *planstate, List *ancestors,
 			const char *relationship, const char *plan_name,
 			ExplainState *es)
@@ -2346,7 +2338,7 @@ show_qual(List *qual, const char *qlabel,
 /*
  * Show a qualifier expression for a scan plan node
  */
-static void
+void
 show_scan_qual(List *qual, const char *qlabel,
 			   PlanState *planstate, List *ancestors,
 			   ExplainState *es)
@@ -3437,7 +3429,7 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
  *
  * "which" identifies which instrumentation counter to print
  */
-static void
+void
 show_instrumentation_count(const char *qlabel, int which,
 						   PlanState *planstate, ExplainState *es)
 {
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 21ed483b7fa..df4fffc4e37 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -69,6 +69,7 @@
 #include "utils/snapmgr.h"
 #include "utils/syscache.h"
 
+GetDefaultOpClass_hook_type GetDefaultOpClass_hook = NULL;
 
 /* non-export function prototypes */
 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
@@ -87,11 +88,7 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo,
 							  Oid ddl_userid,
 							  int ddl_sec_context,
 							  int *ddl_save_nestlevel);
-static char *ChooseIndexName(const char *tabname, Oid namespaceId,
-							 List *colnames, List *exclusionOpNames,
-							 bool primary, bool isconstraint);
 static char *ChooseIndexNameAddition(List *colnames);
-static List *ChooseIndexColumnNames(List *indexElems);
 static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params,
 						 bool isTopLevel);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
@@ -217,7 +214,7 @@ CheckIndexCompatible(Oid oldId,
 						accessMethodName)));
 	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
 	accessMethodId = accessMethodForm->oid;
-	amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
+	amRoutine = GetIndexAmRoutineExtended(oldId, accessMethodForm->amhandler);
 	ReleaseSysCache(tuple);
 
 	amcanorder = amRoutine->amcanorder;
@@ -841,7 +838,7 @@ DefineIndex(Oid relationId,
 	}
 	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
 	accessMethodId = accessMethodForm->oid;
-	amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
+	amRoutine = GetIndexAmRoutineWithTableAM(rel->rd_rel->relam, accessMethodForm->amhandler);
 
 	pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
 								 accessMethodId);
@@ -2284,6 +2281,9 @@ GetDefaultOpClass(Oid type_id, Oid am_id)
 	/* If it's a domain, look at the base type instead */
 	type_id = getBaseType(type_id);
 
+	if (GetDefaultOpClass_hook)
+		return GetDefaultOpClass_hook(type_id, am_id);
+
 	tcategory = TypeCategory(type_id);
 
 	/*
@@ -2499,7 +2499,7 @@ ChooseRelationName(const char *name1, const char *name2,
  *
  * The argument list is pretty ad-hoc :-(
  */
-static char *
+char *
 ChooseIndexName(const char *tabname, Oid namespaceId,
 				List *colnames, List *exclusionOpNames,
 				bool primary, bool isconstraint)
@@ -2588,7 +2588,7 @@ ChooseIndexNameAddition(List *colnames)
  *
  * Returns a List of plain strings (char *, not String nodes).
  */
-static List *
+List *
 ChooseIndexColumnNames(List *indexElems)
 {
 	List	   *result = NIL;
diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c
index 5f7ee238863..76722d506b5 100644
--- a/src/backend/commands/opclasscmds.c
+++ b/src/backend/commands/opclasscmds.c
@@ -43,6 +43,7 @@
 #include "parser/parse_func.h"
 #include "parser/parse_oper.h"
 #include "parser/parse_type.h"
+#include "postgres_ext.h"
 #include "utils/builtins.h"
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
@@ -377,7 +378,7 @@ DefineOpClass(CreateOpClassStmt *stmt)
 
 	amform = (Form_pg_am) GETSTRUCT(tup);
 	amoid = amform->oid;
-	amroutine = GetIndexAmRoutineByAmId(amoid, false);
+	amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
 	ReleaseSysCache(tup);
 
 	maxOpNumber = amroutine->amstrategies;
@@ -835,7 +836,7 @@ AlterOpFamily(AlterOpFamilyStmt *stmt)
 
 	amform = (Form_pg_am) GETSTRUCT(tup);
 	amoid = amform->oid;
-	amroutine = GetIndexAmRoutineByAmId(amoid, false);
+	amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
 	ReleaseSysCache(tup);
 
 	maxOpNumber = amroutine->amstrategies;
@@ -882,7 +883,7 @@ AlterOpFamilyAdd(AlterOpFamilyStmt *stmt, Oid amoid, Oid opfamilyoid,
 				 int maxOpNumber, int maxProcNumber, int optsProcNumber,
 				 List *items)
 {
-	IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false);
+	IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
 	List	   *operators;		/* OpFamilyMember list for operators */
 	List	   *procedures;		/* OpFamilyMember list for support procs */
 	ListCell   *l;
@@ -1165,7 +1166,7 @@ assignOperTypes(OpFamilyMember *member, Oid amoid, Oid typeoid)
 		 * the family has been created but not yet populated with the required
 		 * operators.)
 		 */
-		IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(amoid, false);
+		IndexAmRoutine *amroutine = GetIndexAmRoutineByAmId(InvalidOid, amoid, false);
 
 		if (!amroutine->amcanorderbyop)
 			ereport(ERROR,
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index b4eeb2523a2..49f71638bbe 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -684,6 +684,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
 	LOCKMODE	parentLockmode;
 	const char *accessMethod = NULL;
 	Oid			accessMethodId = InvalidOid;
+	const TableAmRoutine *tableam = NULL;
 
 	/*
 	 * Truncate relname to appropriate length (probably a waste of time, as
@@ -819,6 +820,26 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
 	if (!OidIsValid(ownerId))
 		ownerId = GetUserId();
 
+	/*
+	 * If the statement hasn't specified an access method, but we're defining
+	 * a type of relation that needs one, use the default.
+	 */
+	if (stmt->accessMethod != NULL)
+	{
+		accessMethod = stmt->accessMethod;
+
+		if (partitioned)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					errmsg("specifying a table access method is not supported on a partitioned table")));
+	}
+	else if (RELKIND_HAS_TABLE_AM(relkind))
+		accessMethod = default_table_access_method;
+
+	/* look up the access method, verify it is for a table */
+	if (accessMethod != NULL)
+		accessMethodId = get_table_am_oid(accessMethod, false);
+
 	/*
 	 * Parse and validate reloptions, if any.
 	 */
@@ -827,6 +848,12 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
 
 	switch (relkind)
 	{
+		case RELKIND_RELATION:
+		case RELKIND_TOASTVALUE:
+		case RELKIND_MATVIEW:
+			tableam = GetTableAmRoutineByAmOid(accessMethodId);
+			(void) tableam_reloptions(tableam, relkind, reloptions, true);
+			break;
 		case RELKIND_VIEW:
 			(void) view_reloptions(reloptions, true);
 			break;
@@ -835,6 +862,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
 			break;
 		default:
 			(void) heap_reloptions(relkind, reloptions, true);
+			break;
 	}
 
 	if (stmt->ofTypename)
@@ -938,26 +966,6 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
 			attr->attstorage = GetAttributeStorage(attr->atttypid, colDef->storage_name);
 	}
 
-	/*
-	 * If the statement hasn't specified an access method, but we're defining
-	 * a type of relation that needs one, use the default.
-	 */
-	if (stmt->accessMethod != NULL)
-	{
-		accessMethod = stmt->accessMethod;
-
-		if (partitioned)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("specifying a table access method is not supported on a partitioned table")));
-	}
-	else if (RELKIND_HAS_TABLE_AM(relkind))
-		accessMethod = default_table_access_method;
-
-	/* look up the access method, verify it is for a table */
-	if (accessMethod != NULL)
-		accessMethodId = get_table_am_oid(accessMethod, false);
-
 	/*
 	 * Create the relation.  Inherited defaults and constraints are passed in
 	 * for immediate handling --- since they don't need parsing, they can be
@@ -6136,8 +6144,10 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
 
 			/* Write the tuple out to the new relation */
 			if (newrel)
+			{
 				table_tuple_insert(newrel, insertslot, mycid,
 								   ti_options, bistate);
+			}
 
 			ResetExprContext(econtext);
 
@@ -14435,7 +14445,8 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation,
 		case RELKIND_RELATION:
 		case RELKIND_TOASTVALUE:
 		case RELKIND_MATVIEW:
-			(void) heap_reloptions(rel->rd_rel->relkind, newOptions, true);
+			(void) table_reloptions(rel, rel->rd_rel->relkind,
+									newOptions, true);
 			break;
 		case RELKIND_PARTITIONED_TABLE:
 			(void) partitioned_table_reloptions(newOptions, true);
@@ -18124,6 +18135,7 @@ static void
 AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
 {
 	List	   *idxes;
+	List	   *buildIdxes = NIL;
 	List	   *attachRelIdxs;
 	Relation   *attachrelIdxRels;
 	IndexInfo **attachInfos;
@@ -18131,6 +18143,7 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
 	ListCell   *cell;
 	MemoryContext cxt;
 	MemoryContext oldcxt;
+	AttrMap    *attmap;
 
 	cxt = AllocSetContextCreate(CurrentMemoryContext,
 								"AttachPartitionEnsureIndexes",
@@ -18181,6 +18194,10 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
 		goto out;
 	}
 
+	attmap = build_attrmap_by_name(RelationGetDescr(attachrel),
+								   RelationGetDescr(rel),
+								   false);
+
 	/*
 	 * For each index on the partitioned table, find a matching one in the
 	 * partition-to-be; if one is not found, create one.
@@ -18190,7 +18207,6 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
 		Oid			idx = lfirst_oid(cell);
 		Relation	idxRel = index_open(idx, AccessShareLock);
 		IndexInfo  *info;
-		AttrMap    *attmap;
 		bool		found = false;
 		Oid			constraintOid;
 
@@ -18206,9 +18222,6 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
 
 		/* construct an indexinfo to compare existing indexes against */
 		info = BuildIndexInfo(idxRel);
-		attmap = build_attrmap_by_name(RelationGetDescr(attachrel),
-									   RelationGetDescr(rel),
-									   false);
 		constraintOid = get_relation_idx_constraint_oid(RelationGetRelid(rel), idx);
 
 		/*
@@ -18269,19 +18282,7 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
 		 * now.
 		 */
 		if (!found)
-		{
-			IndexStmt  *stmt;
-			Oid			conOid;
-
-			stmt = generateClonedIndexStmt(NULL,
-										   idxRel, attmap,
-										   &conOid);
-			DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid,
-						RelationGetRelid(idxRel),
-						conOid,
-						-1,
-						true, false, false, false, false);
-		}
+			buildIdxes = lappend_oid(buildIdxes, RelationGetRelid(idxRel));
 
 		index_close(idxRel, AccessShareLock);
 	}
@@ -18290,6 +18291,25 @@ AttachPartitionEnsureIndexes(Relation rel, Relation attachrel)
 	/* Clean up. */
 	for (i = 0; i < list_length(attachRelIdxs); i++)
 		index_close(attachrelIdxRels[i], AccessShareLock);
+
+	foreach(cell, buildIdxes)
+	{
+		Oid			idx = lfirst_oid(cell);
+		Relation	idxRel = index_open(idx, AccessShareLock);
+		IndexStmt  *stmt;
+		Oid			conOid;
+
+		stmt = generateClonedIndexStmt(NULL,
+									   idxRel, attmap,
+									   &conOid);
+		DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid,
+					RelationGetRelid(idxRel),
+					conOid,
+					-1,
+					true, false, false, false, false);
+		index_close(idxRel, AccessShareLock);
+	}
+
 	MemoryContextSwitchTo(oldcxt);
 	MemoryContextDelete(cxt);
 }
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 8b1d3b99fe9..b6f6ebaa624 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -83,7 +83,7 @@ static void SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger);
 static bool GetTupleForTrigger(EState *estate,
 							   EPQState *epqstate,
 							   ResultRelInfo *relinfo,
-							   ItemPointer tid,
+							   Datum tupleid,
 							   LockTupleMode lockmode,
 							   TupleTableSlot *oldslot,
 							   TupleTableSlot **epqslot,
@@ -2688,7 +2688,7 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
 bool
 ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
 					 ResultRelInfo *relinfo,
-					 ItemPointer tupleid,
+					 Datum tupleid,
 					 HeapTuple fdw_trigtuple,
 					 TupleTableSlot **epqslot,
 					 TM_Result *tmresult,
@@ -2702,7 +2702,7 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
 	bool		should_free = false;
 	int			i;
 
-	Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
+	Assert(HeapTupleIsValid(fdw_trigtuple) ^ (DatumGetPointer(tupleid) != NULL));
 	if (fdw_trigtuple == NULL)
 	{
 		TupleTableSlot *epqslot_candidate = NULL;
@@ -2779,8 +2779,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
 void
 ExecARDeleteTriggers(EState *estate,
 					 ResultRelInfo *relinfo,
-					 ItemPointer tupleid,
 					 HeapTuple fdw_trigtuple,
+					 TupleTableSlot *slot,
 					 TransitionCaptureState *transition_capture,
 					 bool is_crosspart_update)
 {
@@ -2789,20 +2789,11 @@ ExecARDeleteTriggers(EState *estate,
 	if ((trigdesc && trigdesc->trig_delete_after_row) ||
 		(transition_capture && transition_capture->tcs_delete_old_table))
 	{
-		TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
-
-		Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
-		if (fdw_trigtuple == NULL)
-			GetTupleForTrigger(estate,
-							   NULL,
-							   relinfo,
-							   tupleid,
-							   LockTupleExclusive,
-							   slot,
-							   NULL,
-							   NULL,
-							   NULL);
-		else
+		/*
+		 * Put the FDW old tuple to the slot.  Otherwise, caller is expected
+		 * to have old tuple alredy fetched to the slot.
+		 */
+		if (fdw_trigtuple != NULL)
 			ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
 
 		AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@@ -2939,7 +2930,7 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 bool
 ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
 					 ResultRelInfo *relinfo,
-					 ItemPointer tupleid,
+					 Datum tupleid,
 					 HeapTuple fdw_trigtuple,
 					 TupleTableSlot *newslot,
 					 TM_Result *tmresult,
@@ -2959,7 +2950,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
 	/* Determine lock mode to use */
 	lockmode = ExecUpdateLockMode(estate, relinfo);
 
-	Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
+	Assert(HeapTupleIsValid(fdw_trigtuple) ^ (DatumGetPointer(tupleid) != NULL));
 	if (fdw_trigtuple == NULL)
 	{
 		TupleTableSlot *epqslot_candidate = NULL;
@@ -3093,18 +3084,17 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
  * Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source
  * and destination partitions, respectively, of a cross-partition update of
  * the root partitioned table mentioned in the query, given by 'relinfo'.
- * 'tupleid' in that case refers to the ctid of the "old" tuple in the source
- * partition, and 'newslot' contains the "new" tuple in the destination
- * partition.  This interface allows to support the requirements of
- * ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in
- * that case.
+ * 'oldslot' contains the "old" tuple in the source partition, and 'newslot'
+ * contains the "new" tuple in the destination partition.  This interface
+ * allows to support the requirements of ExecCrossPartitionUpdateForeignKey();
+ * is_crosspart_update must be true in that case.
  */
 void
 ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 					 ResultRelInfo *src_partinfo,
 					 ResultRelInfo *dst_partinfo,
-					 ItemPointer tupleid,
 					 HeapTuple fdw_trigtuple,
+					 TupleTableSlot *oldslot,
 					 TupleTableSlot *newslot,
 					 List *recheckIndexes,
 					 TransitionCaptureState *transition_capture,
@@ -3123,29 +3113,14 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 		 * separately for DELETE and INSERT to capture transition table rows.
 		 * In such case, either old tuple or new tuple can be NULL.
 		 */
-		TupleTableSlot *oldslot;
-		ResultRelInfo *tupsrc;
-
 		Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
 			   !is_crosspart_update);
 
-		tupsrc = src_partinfo ? src_partinfo : relinfo;
-		oldslot = ExecGetTriggerOldSlot(estate, tupsrc);
-
-		if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
-			GetTupleForTrigger(estate,
-							   NULL,
-							   tupsrc,
-							   tupleid,
-							   LockTupleExclusive,
-							   oldslot,
-							   NULL,
-							   NULL,
-							   NULL);
-		else if (fdw_trigtuple != NULL)
+		if (fdw_trigtuple != NULL)
+		{
+			Assert(oldslot);
 			ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
-		else
-			ExecClearTuple(oldslot);
+		}
 
 		AfterTriggerSaveEvent(estate, relinfo,
 							  src_partinfo, dst_partinfo,
@@ -3292,7 +3267,7 @@ static bool
 GetTupleForTrigger(EState *estate,
 				   EPQState *epqstate,
 				   ResultRelInfo *relinfo,
-				   ItemPointer tid,
+				   Datum tupleid,
 				   LockTupleMode lockmode,
 				   TupleTableSlot *oldslot,
 				   TupleTableSlot **epqslot,
@@ -3317,7 +3292,9 @@ GetTupleForTrigger(EState *estate,
 		 */
 		if (!IsolationUsesXactSnapshot())
 			lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION;
-		test = table_tuple_lock(relation, tid, estate->es_snapshot, oldslot,
+
+		test = table_tuple_lock(relation, tupleid,
+								estate->es_snapshot, oldslot,
 								estate->es_output_cid,
 								lockmode, LockWaitBlock,
 								lockflags,
@@ -3413,8 +3390,8 @@ GetTupleForTrigger(EState *estate,
 		 * We expect the tuple to be present, thus very simple error handling
 		 * suffices.
 		 */
-		if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny,
-										   oldslot))
+		if (!table_tuple_fetch_row_version(relation, tupleid,
+										   SnapshotAny, oldslot))
 			elog(ERROR, "failed to fetch tuple for trigger");
 	}
 
@@ -3620,18 +3597,22 @@ typedef SetConstraintStateData *SetConstraintState;
  * cycles.  So we need only ensure that ats_firing_id is zero when attaching
  * a new event to an existing AfterTriggerSharedData record.
  */
-typedef uint32 TriggerFlags;
+typedef uint64 TriggerFlags;
 
-#define AFTER_TRIGGER_OFFSET			0x07FFFFFF	/* must be low-order bits */
-#define AFTER_TRIGGER_DONE				0x80000000
-#define AFTER_TRIGGER_IN_PROGRESS		0x40000000
+#define AFTER_TRIGGER_SIZE				UINT64CONST(0xFFFF000000000)	/* must be low-order bits */
+#define AFTER_TRIGGER_SIZE_SHIFT		(36)
+#define AFTER_TRIGGER_OFFSET			UINT64CONST(0x000000FFFFFFF)	/* must be low-order bits */
+#define AFTER_TRIGGER_DONE				UINT64CONST(0x0000800000000)
+#define AFTER_TRIGGER_IN_PROGRESS		UINT64CONST(0x0000400000000)
 /* bits describing the size and tuple sources of this event */
-#define AFTER_TRIGGER_FDW_REUSE			0x00000000
-#define AFTER_TRIGGER_FDW_FETCH			0x20000000
-#define AFTER_TRIGGER_1CTID				0x10000000
-#define AFTER_TRIGGER_2CTID				0x30000000
-#define AFTER_TRIGGER_CP_UPDATE			0x08000000
-#define AFTER_TRIGGER_TUP_BITS			0x38000000
+#define AFTER_TRIGGER_FDW_REUSE			UINT64CONST(0x0000000000000)
+#define AFTER_TRIGGER_FDW_FETCH			UINT64CONST(0x0000200000000)
+#define AFTER_TRIGGER_1CTID				UINT64CONST(0x0000100000000)
+#define AFTER_TRIGGER_ROWID1			UINT64CONST(0x0000010000000)
+#define AFTER_TRIGGER_2CTID				UINT64CONST(0x0000300000000)
+#define AFTER_TRIGGER_ROWID2			UINT64CONST(0x0000020000000)
+#define AFTER_TRIGGER_CP_UPDATE			UINT64CONST(0x0000080000000)
+#define AFTER_TRIGGER_TUP_BITS			UINT64CONST(0x0000380000000)
 typedef struct AfterTriggerSharedData *AfterTriggerShared;
 
 typedef struct AfterTriggerSharedData
@@ -3683,6 +3664,9 @@ typedef struct AfterTriggerEventDataZeroCtids
 }			AfterTriggerEventDataZeroCtids;
 
 #define SizeofTriggerEvent(evt) \
+	(((evt)->ate_flags & AFTER_TRIGGER_SIZE) >> AFTER_TRIGGER_SIZE_SHIFT)
+
+#define BasicSizeofTriggerEvent(evt) \
 	(((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_CP_UPDATE ? \
 	 sizeof(AfterTriggerEventData) : \
 	 (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \
@@ -4035,14 +4019,34 @@ afterTriggerCopyBitmap(Bitmapset *src)
  */
 static void
 afterTriggerAddEvent(AfterTriggerEventList *events,
-					 AfterTriggerEvent event, AfterTriggerShared evtshared)
+					 AfterTriggerEvent event, AfterTriggerShared evtshared,
+					 bytea *rowid1, bytea *rowid2)
 {
-	Size		eventsize = SizeofTriggerEvent(event);
-	Size		needed = eventsize + sizeof(AfterTriggerSharedData);
+	Size		basiceventsize = MAXALIGN(BasicSizeofTriggerEvent(event));
+	Size		eventsize;
+	Size		needed;
 	AfterTriggerEventChunk *chunk;
 	AfterTriggerShared newshared;
 	AfterTriggerEvent newevent;
 
+	if (SizeofTriggerEvent(event) == 0)
+	{
+		eventsize = basiceventsize;
+		if (event->ate_flags & AFTER_TRIGGER_ROWID1)
+			eventsize += MAXALIGN(VARSIZE(rowid1));
+
+		if (event->ate_flags & AFTER_TRIGGER_ROWID2)
+			eventsize += MAXALIGN(VARSIZE(rowid2));
+
+		event->ate_flags |= eventsize << AFTER_TRIGGER_SIZE_SHIFT;
+	}
+	else
+	{
+		eventsize = SizeofTriggerEvent(event);
+	}
+
+	needed = eventsize + sizeof(AfterTriggerSharedData);
+
 	/*
 	 * If empty list or not enough room in the tail chunk, make a new chunk.
 	 * We assume here that a new shared record will always be needed.
@@ -4075,7 +4079,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events,
 		 * sizes used should be MAXALIGN multiples, to ensure that the shared
 		 * records will be aligned safely.
 		 */
-#define MIN_CHUNK_SIZE 1024
+#define MIN_CHUNK_SIZE (1024*4)
 #define MAX_CHUNK_SIZE (1024*1024)
 
 #if MAX_CHUNK_SIZE > (AFTER_TRIGGER_OFFSET+1)
@@ -4094,6 +4098,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events,
 				chunksize *= 2; /* okay, double it */
 			else
 				chunksize /= 2; /* too many shared records */
+			chunksize = Max(chunksize, MIN_CHUNK_SIZE);
 			chunksize = Min(chunksize, MAX_CHUNK_SIZE);
 		}
 		chunk = MemoryContextAlloc(afterTriggers.event_cxt, chunksize);
@@ -4134,7 +4139,26 @@ afterTriggerAddEvent(AfterTriggerEventList *events,
 
 	/* Insert the data */
 	newevent = (AfterTriggerEvent) chunk->freeptr;
-	memcpy(newevent, event, eventsize);
+	if (!rowid1 && !rowid2)
+	{
+		memcpy(newevent, event, eventsize);
+	}
+	else
+	{
+		Pointer ptr = chunk->freeptr;
+
+		memcpy(newevent, event, basiceventsize);
+		ptr += basiceventsize;
+
+		if (event->ate_flags & AFTER_TRIGGER_ROWID1)
+		{
+			memcpy(ptr, rowid1, MAXALIGN(VARSIZE(rowid1)));
+			ptr += MAXALIGN(VARSIZE(rowid1));
+		}
+
+		if (event->ate_flags & AFTER_TRIGGER_ROWID2)
+			memcpy(ptr, rowid2, MAXALIGN(VARSIZE(rowid2)));
+	}
 	/* ... and link the new event to its shared record */
 	newevent->ate_flags &= ~AFTER_TRIGGER_OFFSET;
 	newevent->ate_flags |= (char *) newshared - (char *) newevent;
@@ -4294,6 +4318,7 @@ AfterTriggerExecute(EState *estate,
 	int			tgindx;
 	bool		should_free_trig = false;
 	bool		should_free_new = false;
+	Pointer		ptr;
 
 	/*
 	 * Locate trigger in trigdesc.  It might not be present, and in fact the
@@ -4329,15 +4354,17 @@ AfterTriggerExecute(EState *estate,
 			{
 				Tuplestorestate *fdw_tuplestore = GetCurrentFDWTuplestore();
 
-				if (!tuplestore_gettupleslot(fdw_tuplestore, true, false,
-											 trig_tuple_slot1))
+				if (!tuplestore_force_gettupleslot(fdw_tuplestore, true, false,
+												   trig_tuple_slot1))
 					elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
 
 				if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) ==
 					TRIGGER_EVENT_UPDATE &&
-					!tuplestore_gettupleslot(fdw_tuplestore, true, false,
-											 trig_tuple_slot2))
+					!tuplestore_force_gettupleslot(fdw_tuplestore, true, false,
+												   trig_tuple_slot2))
 					elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
+				trig_tuple_slot1->tts_tid = event->ate_ctid1;
+				trig_tuple_slot2->tts_tid = event->ate_ctid2;
 			}
 			/* fall through */
 		case AFTER_TRIGGER_FDW_REUSE:
@@ -4369,13 +4396,26 @@ AfterTriggerExecute(EState *estate,
 			break;
 
 		default:
-			if (ItemPointerIsValid(&(event->ate_ctid1)))
+			ptr = (Pointer) event + MAXALIGN(BasicSizeofTriggerEvent(event));
+			if (ItemPointerIsValid(&(event->ate_ctid1)) ||
+				(event->ate_flags & AFTER_TRIGGER_ROWID1))
 			{
+				Datum		tupleid;
+
 				TupleTableSlot *src_slot = ExecGetTriggerOldSlot(estate,
 																 src_relInfo);
 
-				if (!table_tuple_fetch_row_version(src_rel,
-												   &(event->ate_ctid1),
+				if (event->ate_flags & AFTER_TRIGGER_ROWID1)
+				{
+					tupleid = PointerGetDatum(ptr);
+					ptr += MAXALIGN(VARSIZE(ptr));
+				}
+				else
+				{
+					tupleid = PointerGetDatum(&(event->ate_ctid1));
+				}
+
+				if (!table_tuple_fetch_row_version(src_rel, tupleid,
 												   SnapshotAny,
 												   src_slot))
 					elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
@@ -4411,13 +4451,23 @@ AfterTriggerExecute(EState *estate,
 			/* don't touch ctid2 if not there */
 			if (((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ||
 				 (event->ate_flags & AFTER_TRIGGER_CP_UPDATE)) &&
-				ItemPointerIsValid(&(event->ate_ctid2)))
+				(ItemPointerIsValid(&(event->ate_ctid2)) ||
+				 (event->ate_flags & AFTER_TRIGGER_ROWID2)))
 			{
+				Datum		tupleid;
+
 				TupleTableSlot *dst_slot = ExecGetTriggerNewSlot(estate,
 																 dst_relInfo);
 
-				if (!table_tuple_fetch_row_version(dst_rel,
-												   &(event->ate_ctid2),
+				if (event->ate_flags & AFTER_TRIGGER_ROWID2)
+				{
+					tupleid = PointerGetDatum(ptr);
+				}
+				else
+				{
+					tupleid = PointerGetDatum(&(event->ate_ctid2));
+				}
+				if (!table_tuple_fetch_row_version(dst_rel, tupleid,
 												   SnapshotAny,
 												   dst_slot))
 					elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
@@ -4591,7 +4641,7 @@ afterTriggerMarkEvents(AfterTriggerEventList *events,
 		{
 			deferred_found = true;
 			/* add it to move_list */
-			afterTriggerAddEvent(move_list, event, evtshared);
+			afterTriggerAddEvent(move_list, event, evtshared, NULL, NULL);
 			/* mark original copy "done" so we don't do it again */
 			event->ate_flags |= AFTER_TRIGGER_DONE;
 		}
@@ -4695,6 +4745,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
 					/* caution: trigdesc could be NULL here */
 					finfo = rInfo->ri_TrigFunctions;
 					instr = rInfo->ri_TrigInstrument;
+
 					if (slot1 != NULL)
 					{
 						ExecDropSingleTupleTableSlot(slot1);
@@ -6084,6 +6135,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
 	int			tgtype_level;
 	int			i;
 	Tuplestorestate *fdw_tuplestore = NULL;
+	bytea	   *rowId1 = NULL;
+	bytea	   *rowId2 = NULL;
 
 	/*
 	 * Check state.  We use a normal test not Assert because it is possible to
@@ -6177,6 +6230,21 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
 	 * if so.  This preserves the behavior that statement-level triggers fire
 	 * just once per statement and fire after row-level triggers.
 	 */
+
+	/* Determine flags */
+	if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger))
+	{
+		if (row_trigger && event == TRIGGER_EVENT_UPDATE)
+		{
+			if (relkind == RELKIND_PARTITIONED_TABLE)
+				new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE;
+			else
+				new_event.ate_flags = AFTER_TRIGGER_2CTID;
+		}
+		else
+			new_event.ate_flags = AFTER_TRIGGER_1CTID;
+	}
+
 	switch (event)
 	{
 		case TRIGGER_EVENT_INSERT:
@@ -6187,6 +6255,13 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
 				Assert(newslot != NULL);
 				ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid1));
 				ItemPointerSetInvalid(&(new_event.ate_ctid2));
+				if (table_get_row_ref_type(rel) == ROW_REF_ROWID)
+				{
+					bool	isnull;
+					rowId1 = DatumGetByteaP(slot_getsysattr(newslot, RowIdAttributeNumber, &isnull));
+					new_event.ate_flags |= AFTER_TRIGGER_ROWID1;
+					Assert(!isnull);
+				}
 			}
 			else
 			{
@@ -6206,6 +6281,13 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
 				Assert(newslot == NULL);
 				ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
 				ItemPointerSetInvalid(&(new_event.ate_ctid2));
+				if (table_get_row_ref_type(rel) == ROW_REF_ROWID)
+				{
+					bool	isnull;
+					rowId1 = DatumGetByteaP(slot_getsysattr(oldslot, RowIdAttributeNumber, &isnull));
+					new_event.ate_flags |= AFTER_TRIGGER_ROWID1;
+					Assert(!isnull);
+				}
 			}
 			else
 			{
@@ -6221,10 +6303,54 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
 			tgtype_event = TRIGGER_TYPE_UPDATE;
 			if (row_trigger)
 			{
+				bool		src_rowid = false,
+							dst_rowid = false;
 				Assert(oldslot != NULL);
 				Assert(newslot != NULL);
 				ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
 				ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2));
+				if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+				{
+					Relation src_rel = src_partinfo->ri_RelationDesc;
+					Relation dst_rel = dst_partinfo->ri_RelationDesc;
+
+					src_rowid = table_get_row_ref_type(src_rel) ==
+								ROW_REF_ROWID;
+					dst_rowid = table_get_row_ref_type(dst_rel) ==
+								ROW_REF_ROWID;
+				}
+				else
+				{
+					if (table_get_row_ref_type(rel) == ROW_REF_ROWID)
+					{
+						src_rowid = true;
+						dst_rowid = true;
+					}
+				}
+
+				if (src_rowid)
+				{
+					Datum	val;
+					bool	isnull;
+					val = slot_getsysattr(oldslot,
+										  RowIdAttributeNumber,
+										  &isnull);
+					rowId1 = DatumGetByteaP(val);
+					Assert(!isnull);
+					new_event.ate_flags |= AFTER_TRIGGER_ROWID1;
+				}
+
+				if (dst_rowid)
+				{
+					Datum	val;
+					bool	isnull;
+					val = slot_getsysattr(newslot,
+										  RowIdAttributeNumber,
+										  &isnull);
+					rowId2 = DatumGetByteaP(val);
+					Assert(!isnull);
+					new_event.ate_flags |= AFTER_TRIGGER_ROWID2;
+				}
 
 				/*
 				 * Also remember the OIDs of partitions to fetch these tuples
@@ -6262,20 +6388,6 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
 			break;
 	}
 
-	/* Determine flags */
-	if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger))
-	{
-		if (row_trigger && event == TRIGGER_EVENT_UPDATE)
-		{
-			if (relkind == RELKIND_PARTITIONED_TABLE)
-				new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE;
-			else
-				new_event.ate_flags = AFTER_TRIGGER_2CTID;
-		}
-		else
-			new_event.ate_flags = AFTER_TRIGGER_1CTID;
-	}
-
 	/* else, we'll initialize ate_flags for each trigger */
 
 	tgtype_level = (row_trigger ? TRIGGER_TYPE_ROW : TRIGGER_TYPE_STATEMENT);
@@ -6441,7 +6553,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
 		new_shared.ats_modifiedcols = afterTriggerCopyBitmap(modifiedCols);
 
 		afterTriggerAddEvent(&afterTriggers.query_stack[afterTriggers.query_depth].events,
-							 &new_event, &new_shared);
+							 &new_event, &new_shared, rowId1, rowId2);
 	}
 
 	/*
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 9d18ce8c6b2..286a0f8f222 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -615,7 +615,7 @@ IndexSupportsBackwardScan(Oid indexid)
 	idxrelrec = (Form_pg_class) GETSTRUCT(ht_idxrel);
 
 	/* Fetch the index AM's API struct */
-	amroutine = GetIndexAmRoutineByAmId(idxrelrec->relam, false);
+	amroutine = GetIndexAmRoutineByAmId(indexid, idxrelrec->relam, false);
 
 	result = amroutine->amcanbackward;
 
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index bf3a08c5f08..928566b3e40 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -48,6 +48,9 @@
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/datum.h"
+#include "utils/json.h"
+#include "utils/jsonb.h"
+#include "utils/jsonpath.h"
 #include "utils/lsyscache.h"
 #include "utils/typcache.h"
 
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 6b7997465d0..5d1a31566e7 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -4388,7 +4388,9 @@ ExecEvalSysVar(ExprState *state, ExprEvalStep *op, ExprContext *econtext,
 						op->resnull);
 	*op->resvalue = d;
 	/* this ought to be unreachable, but it's cheap enough to check */
-	if (unlikely(*op->resnull))
+	if (op->d.var.attnum != RowIdAttributeNumber &&
+		op->d.var.attnum != SelfItemPointerAttributeNumber &&
+		unlikely(*op->resnull))
 		elog(ERROR, "failed to fetch attribute from slot");
 }
 
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index 1d82b64b897..a40aebb1ef1 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -299,7 +299,6 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
 					  List *arbiterIndexes,
 					  bool onlySummarizing)
 {
-	ItemPointer tupleid = &slot->tts_tid;
 	List	   *result = NIL;
 	int			i;
 	int			numIndices;
@@ -309,8 +308,20 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
 	ExprContext *econtext;
 	Datum		values[INDEX_MAX_KEYS];
 	bool		isnull[INDEX_MAX_KEYS];
+	ItemPointer	tupleid;
 
-	Assert(ItemPointerIsValid(tupleid));
+
+	if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
+	{
+		bool	isnull;
+		tupleid = DatumGetItemPointer(slot_getsysattr(slot, RowIdAttributeNumber, &isnull));
+		Assert(!isnull);
+	}
+	else
+	{
+		Assert(ItemPointerIsValid(&slot->tts_tid));
+		tupleid = &slot->tts_tid;
+	}
 
 	/*
 	 * Get information from the result relation info structure.
@@ -501,6 +512,406 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
 	return result;
 }
 
+List *
+ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo,
+					  TupleTableSlot *slot,
+					  TupleTableSlot *oldSlot,
+					  EState *estate,
+					  bool noDupErr,
+					  bool *specConflict,
+					  List *arbiterIndexes,
+					  bool onlySummarizing)
+{
+	List	   *result = NIL;
+	int			i;
+	int			numIndices;
+	RelationPtr relationDescs;
+	Relation	heapRelation;
+	IndexInfo **indexInfoArray;
+	ExprContext *econtext;
+	Datum		values[INDEX_MAX_KEYS];
+	bool		isnull[INDEX_MAX_KEYS];
+	ItemPointer	tupleid;
+
+	if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
+	{
+		bool	isnull;
+		tupleid = DatumGetItemPointer(slot_getsysattr(slot, RowIdAttributeNumber, &isnull));
+		Assert(!isnull);
+	}
+	else
+	{
+		Assert(ItemPointerIsValid(&slot->tts_tid));
+		tupleid = &slot->tts_tid;
+	}
+
+	/*
+	 * Get information from the result relation info structure.
+	 */
+	numIndices = resultRelInfo->ri_NumIndices;
+	relationDescs = resultRelInfo->ri_IndexRelationDescs;
+	indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+	heapRelation = resultRelInfo->ri_RelationDesc;
+
+	/* Sanity check: slot must belong to the same rel as the resultRelInfo. */
+	Assert(slot->tts_tableOid == RelationGetRelid(heapRelation));
+
+	/*
+	 * for each index, form and insert the index tuple
+	 */
+	for (i = 0; i < numIndices; i++)
+	{
+		Relation	indexRelation = relationDescs[i];
+		IndexInfo  *indexInfo;
+		bool		applyNoDupErr;
+		IndexUniqueCheck checkUnique;
+		bool		satisfiesConstraint;
+		bool		new_valid = true;
+
+		if (indexRelation == NULL)
+			continue;
+
+		indexInfo = indexInfoArray[i];
+
+		/* If the index is marked as read-only, ignore it */
+		if (!indexInfo->ii_ReadyForInserts)
+			continue;
+
+		/*
+		 * Skip processing of non-summarizing indexes if we only update
+		 * summarizing indexes
+		 */
+		if (onlySummarizing && !indexInfo->ii_Summarizing)
+			continue;
+
+		/*
+		* We will use the EState's per-tuple context for evaluating predicates
+		* and index expressions (creating it if it's not already there).
+		*/
+		econtext = GetPerTupleExprContext(estate);
+
+		/* Arrange for econtext's scan tuple to be the tuple under test */
+		econtext->ecxt_scantuple = slot;
+
+		/* Check for partial index */
+		if (indexInfo->ii_Predicate != NIL)
+		{
+			ExprState  *predicate;
+
+			/*
+			 * If predicate state not set up yet, create it (in the estate's
+			 * per-query context)
+			 */
+			predicate = indexInfo->ii_PredicateState;
+			if (predicate == NULL)
+			{
+				predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+				indexInfo->ii_PredicateState = predicate;
+			}
+
+			/* Skip this index-update if the predicate isn't satisfied */
+			if (!ExecQual(predicate, econtext))
+			{
+				if (!indexRelation->rd_indam->ammvccaware)
+					continue;
+				new_valid = false;
+			}
+		}
+
+		/*
+		 * FormIndexDatum fills in its values and isnull parameters with the
+		 * appropriate values for the column(s) of the index.
+		 */
+		FormIndexDatum(indexInfo,
+					   slot,
+					   estate,
+					   values,
+					   isnull);
+
+		/* Check whether to apply noDupErr to this index */
+		applyNoDupErr = noDupErr &&
+			(arbiterIndexes == NIL ||
+			 list_member_oid(arbiterIndexes,
+							 indexRelation->rd_index->indexrelid));
+
+		/*
+		 * The index AM does the actual insertion, plus uniqueness checking.
+		 *
+		 * For an immediate-mode unique index, we just tell the index AM to
+		 * throw error if not unique.
+		 *
+		 * For a deferrable unique index, we tell the index AM to just detect
+		 * possible non-uniqueness, and we add the index OID to the result
+		 * list if further checking is needed.
+		 *
+		 * For a speculative insertion (used by INSERT ... ON CONFLICT), do
+		 * the same as for a deferrable unique index.
+		 */
+		if (!indexRelation->rd_index->indisunique)
+			checkUnique = UNIQUE_CHECK_NO;
+		else if (applyNoDupErr)
+			checkUnique = UNIQUE_CHECK_PARTIAL;
+		else if (indexRelation->rd_index->indimmediate)
+			checkUnique = UNIQUE_CHECK_YES;
+		else
+			checkUnique = UNIQUE_CHECK_PARTIAL;
+
+		if (indexRelation->rd_indam->ammvccaware)
+		{
+			Datum		valuesOld[INDEX_MAX_KEYS];
+			bool		isnullOld[INDEX_MAX_KEYS];
+			Datum		oldTupleid;
+			bool		old_valid = true;
+
+			if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
+			{
+				bool	isnull;
+				oldTupleid = slot_getsysattr(oldSlot, RowIdAttributeNumber, &isnull);
+				Assert(!isnull);
+			}
+			else
+			{
+				Assert(ItemPointerIsValid(&oldSlot->tts_tid));
+				oldTupleid = PointerGetDatum(&oldSlot->tts_tid);
+			}
+
+			econtext = GetPerTupleExprContext(estate);
+			econtext->ecxt_scantuple = oldSlot;
+
+			/* Check for partial index */
+			if (indexInfo->ii_Predicate != NIL)
+			{
+				ExprState  *predicate;
+
+				/*
+				* If predicate state not set up yet, create it (in the estate's
+				* per-query context)
+				*/
+				predicate = indexInfo->ii_PredicateState;
+				if (predicate == NULL)
+				{
+					predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+					indexInfo->ii_PredicateState = predicate;
+				}
+
+				/* Skip this index-update if the predicate isn't satisfied */
+				if (!ExecQual(predicate, econtext))
+					old_valid = false;
+			}
+
+			FormIndexDatum(indexInfo,
+						   oldSlot,
+						   estate,
+						   valuesOld,
+						   isnullOld);
+
+			satisfiesConstraint =
+				index_update(indexRelation, /* index relation */
+							 new_valid,
+							 old_valid,
+							 values,	/* array of index Datums */
+							 isnull,	/* null flags */
+							 ItemPointerGetDatum(tupleid),	/* tid of heap tuple */
+							 valuesOld,
+							 isnullOld,
+							 oldTupleid,
+							 heapRelation,	/* heap relation */
+							 checkUnique,	/* type of uniqueness check to do */
+							 indexInfo);	/* index AM may need this */
+
+		}
+		else
+		{
+			bool		indexUnchanged;
+			/*
+			* There's definitely going to be an index_insert() call for this
+			* index.  If we're being called as part of an UPDATE statement,
+			* consider if the 'indexUnchanged' = true hint should be passed.
+			*/
+			indexUnchanged = index_unchanged_by_update(resultRelInfo,
+													   estate,
+													   indexInfo,
+													   indexRelation);
+
+			satisfiesConstraint =
+				index_insert(indexRelation, /* index relation */
+							 values,	/* array of index Datums */
+							 isnull,	/* null flags */
+							 tupleid,	/* tid of heap tuple */
+							 heapRelation,	/* heap relation */
+							 checkUnique,	/* type of uniqueness check to do */
+							 indexUnchanged,	/* UPDATE without logical change? */
+							 indexInfo);	/* index AM may need this */
+		}
+
+		/*
+		 * If the index has an associated exclusion constraint, check that.
+		 * This is simpler than the process for uniqueness checks since we
+		 * always insert first and then check.  If the constraint is deferred,
+		 * we check now anyway, but don't throw error on violation or wait for
+		 * a conclusive outcome from a concurrent insertion; instead we'll
+		 * queue a recheck event.  Similarly, noDupErr callers (speculative
+		 * inserters) will recheck later, and wait for a conclusive outcome
+		 * then.
+		 *
+		 * An index for an exclusion constraint can't also be UNIQUE (not an
+		 * essential property, we just don't allow it in the grammar), so no
+		 * need to preserve the prior state of satisfiesConstraint.
+		 */
+		if (indexInfo->ii_ExclusionOps != NULL)
+		{
+			bool		violationOK;
+			CEOUC_WAIT_MODE waitMode;
+
+			if (applyNoDupErr)
+			{
+				violationOK = true;
+				waitMode = CEOUC_LIVELOCK_PREVENTING_WAIT;
+			}
+			else if (!indexRelation->rd_index->indimmediate)
+			{
+				violationOK = true;
+				waitMode = CEOUC_NOWAIT;
+			}
+			else
+			{
+				violationOK = false;
+				waitMode = CEOUC_WAIT;
+			}
+
+			satisfiesConstraint =
+				check_exclusion_or_unique_constraint(heapRelation,
+													 indexRelation, indexInfo,
+													 tupleid, values, isnull,
+													 estate, false,
+													 waitMode, violationOK, NULL);
+		}
+
+		if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
+			 indexInfo->ii_ExclusionOps != NULL) &&
+			!satisfiesConstraint)
+		{
+			/*
+			 * The tuple potentially violates the uniqueness or exclusion
+			 * constraint, so make a note of the index so that we can re-check
+			 * it later.  Speculative inserters are told if there was a
+			 * speculative conflict, since that always requires a restart.
+			 */
+			result = lappend_oid(result, RelationGetRelid(indexRelation));
+			if (indexRelation->rd_index->indimmediate && specConflict)
+				*specConflict = true;
+		}
+	}
+
+	return result;
+}
+
+void
+ExecDeleteIndexTuples(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
+					  EState *estate)
+{
+	int			i;
+	int			numIndices;
+	RelationPtr relationDescs;
+	Relation	heapRelation;
+	IndexInfo **indexInfoArray;
+	ExprContext *econtext;
+	Datum		values[INDEX_MAX_KEYS];
+	bool		isnull[INDEX_MAX_KEYS];
+	Datum		tupleid;
+
+	if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
+	{
+		bool	isnull;
+		tupleid = slot_getsysattr(slot, RowIdAttributeNumber, &isnull);
+		Assert(!isnull);
+	}
+	else
+	{
+		Assert(ItemPointerIsValid(&slot->tts_tid));
+		tupleid = PointerGetDatum(&slot->tts_tid);
+	}
+
+	/*
+	 * Get information from the result relation info structure.
+	 */
+	numIndices = resultRelInfo->ri_NumIndices;
+	relationDescs = resultRelInfo->ri_IndexRelationDescs;
+	indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+	heapRelation = resultRelInfo->ri_RelationDesc;
+
+	/* Sanity check: slot must belong to the same rel as the resultRelInfo. */
+	Assert(slot->tts_tableOid == RelationGetRelid(heapRelation));
+
+	/*
+	 * for each index, form and insert the index tuple
+	 */
+	for (i = 0; i < numIndices; i++)
+	{
+		Relation	indexRelation = relationDescs[i];
+		IndexInfo  *indexInfo;
+
+		if (indexRelation == NULL)
+			continue;
+
+		indexInfo = indexInfoArray[i];
+
+		/* If the index is marked as read-only, ignore it */
+		if (!indexInfo->ii_ReadyForInserts)
+			continue;
+
+		if (!indexRelation->rd_indam->ammvccaware)
+			continue;
+
+		/*
+		* We will use the EState's per-tuple context for evaluating predicates
+		* and index expressions (creating it if it's not already there).
+		*/
+		econtext = GetPerTupleExprContext(estate);
+
+		/* Arrange for econtext's scan tuple to be the tuple under test */
+		econtext->ecxt_scantuple = slot;
+
+		/* Check for partial index */
+		if (indexInfo->ii_Predicate != NIL)
+		{
+			ExprState  *predicate;
+
+			/*
+			 * If predicate state not set up yet, create it (in the estate's
+			 * per-query context)
+			 */
+			predicate = indexInfo->ii_PredicateState;
+			if (predicate == NULL)
+			{
+				predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+				indexInfo->ii_PredicateState = predicate;
+			}
+
+			/* Skip this index-update if the predicate isn't satisfied */
+			if (!ExecQual(predicate, econtext))
+				continue;
+		}
+
+		/*
+		 * FormIndexDatum fills in its values and isnull parameters with the
+		 * appropriate values for the column(s) of the index.
+		 */
+		FormIndexDatum(indexInfo,
+					   slot,
+					   estate,
+					   values,
+					   isnull);
+
+		index_delete(indexRelation, /* index relation */
+					 values,	/* array of index Datums */
+					 isnull,	/* null flags */
+					 tupleid,	/* tid of heap tuple */
+					 heapRelation,	/* heap relation */
+					 indexInfo);	/* index AM may need this */
+	}
+}
+
 /* ----------------------------------------------------------------
  *		ExecCheckIndexConstraints
  *
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 4c5a7bbf620..334458574ca 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -869,13 +869,15 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 			Oid			relid;
 			Relation	relation;
 			ExecRowMark *erm;
+			RangeTblEntry *rangeEntry;
 
 			/* ignore "parent" rowmarks; they are irrelevant at runtime */
 			if (rc->isParent)
 				continue;
 
 			/* get relation's OID (will produce InvalidOid if subquery) */
-			relid = exec_rt_fetch(rc->rti, estate)->relid;
+			rangeEntry = exec_rt_fetch(rc->rti, estate);
+			relid = rangeEntry->relid;
 
 			/* open relation, if we need to access it for this mark type */
 			switch (rc->markType)
@@ -908,6 +910,10 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 			erm->prti = rc->prti;
 			erm->rowmarkId = rc->rowmarkId;
 			erm->markType = rc->markType;
+			if (erm->markType == ROW_MARK_COPY)
+				erm->refType = ROW_REF_COPY;
+			else
+				erm->refType = rangeEntry->reftype;
 			erm->strength = rc->strength;
 			erm->waitPolicy = rc->waitPolicy;
 			erm->ermActive = false;
@@ -1295,6 +1301,8 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
 	resultRelInfo->ri_ChildToRootMap = NULL;
 	resultRelInfo->ri_ChildToRootMapValid = false;
 	resultRelInfo->ri_CopyMultiInsertBuffer = NULL;
+
+	resultRelInfo->ri_RowRefType = table_get_row_ref_type(resultRelationDesc);
 }
 
 /*
@@ -2429,17 +2437,28 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
 	aerm->rowmark = erm;
 
 	/* Look up the resjunk columns associated with this rowmark */
-	if (erm->markType != ROW_MARK_COPY)
+	if (erm->refType == ROW_REF_TID)
 	{
+		Assert(erm->markType != ROW_MARK_COPY);
 		/* need ctid for all methods other than COPY */
 		snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId);
 		aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
 													   resname);
 		if (!AttributeNumberIsValid(aerm->ctidAttNo))
 			elog(ERROR, "could not find junk %s column", resname);
+	} else if (erm->refType == ROW_REF_ROWID)
+	{
+		Assert(erm->markType != ROW_MARK_COPY);
+		/* need ctid for all methods other than COPY */
+		snprintf(resname, sizeof(resname), "rowid%u", erm->rowmarkId);
+		aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
+													   resname);
+		if (!AttributeNumberIsValid(aerm->ctidAttNo))
+			elog(ERROR, "could not find junk %s column", resname);
 	}
 	else
 	{
+		Assert(erm->markType == ROW_MARK_COPY);
 		/* need wholerow if COPY */
 		snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId);
 		aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist,
@@ -2727,8 +2746,9 @@ EvalPlanQualFetchRowMark(EPQState *epqstate, Index rti, TupleTableSlot *slot)
 		{
 			/* ordinary table, fetch the tuple */
 			if (!table_tuple_fetch_row_version(erm->relation,
-											   (ItemPointer) DatumGetPointer(datum),
-											   SnapshotAny, slot))
+											   datum,
+											   SnapshotAny,
+											   slot))
 				elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
 			return true;
 		}
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 25d2868744e..136e761fa2f 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -125,6 +125,25 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel,
 	return skey_attoff;
 }
 
+static Datum
+slot_get_tupleid(Relation rel, TupleTableSlot *slot)
+{
+	Datum	tupleid;
+
+	if (table_get_row_ref_type(rel) == ROW_REF_ROWID)
+	{
+		bool	isnull;
+		tupleid = slot_getsysattr(slot, RowIdAttributeNumber, &isnull);
+		Assert(!isnull);
+	}
+	else
+	{
+		tupleid = PointerGetDatum(&slot->tts_tid);
+	}
+
+	return tupleid;
+}
+
 /*
  * Search the relation 'rel' for tuple using the index.
  *
@@ -209,7 +228,8 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid,
 
 		PushActiveSnapshot(GetLatestSnapshot());
 
-		res = table_tuple_lock(rel, &(outslot->tts_tid), GetLatestSnapshot(),
+		res = table_tuple_lock(rel, slot_get_tupleid(rel, outslot),
+							   GetLatestSnapshot(),
 							   outslot,
 							   GetCurrentCommandId(false),
 							   lockmode,
@@ -393,7 +413,8 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode,
 
 		PushActiveSnapshot(GetLatestSnapshot());
 
-		res = table_tuple_lock(rel, &(outslot->tts_tid), GetLatestSnapshot(),
+		res = table_tuple_lock(rel, slot_get_tupleid(rel, outslot),
+							   GetLatestSnapshot(),
 							   outslot,
 							   GetCurrentCommandId(false),
 							   lockmode,
@@ -516,7 +537,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 {
 	bool		skip_tuple = false;
 	Relation	rel = resultRelInfo->ri_RelationDesc;
-	ItemPointer tid = &(searchslot->tts_tid);
+	Datum		tupleid = slot_get_tupleid(rel, searchslot);
 
 	/* For now we support only tables. */
 	Assert(rel->rd_rel->relkind == RELKIND_RELATION);
@@ -528,7 +549,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 		resultRelInfo->ri_TrigDesc->trig_update_before_row)
 	{
 		if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
-								  tid, NULL, slot, NULL, NULL))
+								  tupleid, NULL, slot, NULL, NULL))
 			skip_tuple = true;	/* "do nothing" */
 	}
 
@@ -536,6 +557,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 	{
 		List	   *recheckIndexes = NIL;
 		TU_UpdateIndexes update_indexes;
+		TupleTableSlot *oldSlot = NULL;
 
 		/* Compute stored generated columns */
 		if (rel->rd_att->constr &&
@@ -549,19 +571,24 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 		if (rel->rd_rel->relispartition)
 			ExecPartitionCheck(resultRelInfo, slot, estate, true);
 
-		simple_table_tuple_update(rel, tid, slot, estate->es_snapshot,
-								  &update_indexes);
+		oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
+
+		simple_table_tuple_update(rel, tupleid, slot, estate->es_snapshot,
+								  &update_indexes, oldSlot);
 
 		if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None))
-			recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
-												   slot, estate, true, false,
+			recheckIndexes = ExecUpdateIndexTuples(resultRelInfo,
+												   slot,
+												   oldSlot,
+												   estate,
+												   false,
 												   NULL, NIL,
 												   (update_indexes == TU_Summarizing));
 
 		/* AFTER ROW UPDATE Triggers */
 		ExecARUpdateTriggers(estate, resultRelInfo,
 							 NULL, NULL,
-							 tid, NULL, slot,
+							 NULL, oldSlot, slot,
 							 recheckIndexes, NULL, false);
 
 		list_free(recheckIndexes);
@@ -581,7 +608,7 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
 {
 	bool		skip_tuple = false;
 	Relation	rel = resultRelInfo->ri_RelationDesc;
-	ItemPointer tid = &searchslot->tts_tid;
+	Datum		tupleid = slot_get_tupleid(rel, searchslot);
 
 	CheckCmdReplicaIdentity(rel, CMD_DELETE);
 
@@ -590,17 +617,25 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
 		resultRelInfo->ri_TrigDesc->trig_delete_before_row)
 	{
 		skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
-										   tid, NULL, NULL, NULL, NULL);
+										   tupleid, NULL, NULL, NULL, NULL);
 	}
 
 	if (!skip_tuple)
 	{
+		TupleTableSlot *oldSlot = NULL;
+
+		oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
+
 		/* OK, delete the tuple */
-		simple_table_tuple_delete(rel, tid, estate->es_snapshot);
+		simple_table_tuple_delete(rel, tupleid, estate->es_snapshot, oldSlot);
+
+		/* delete index entries if necessary */
+		if (resultRelInfo->ri_NumIndices > 0)
+			ExecDeleteIndexTuples(resultRelInfo, oldSlot, estate);
 
 		/* AFTER ROW DELETE Triggers */
 		ExecARDeleteTriggers(estate, resultRelInfo,
-							 tid, NULL, NULL, false);
+							 NULL, oldSlot, NULL, false);
 	}
 }
 
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index c06b2288583..bb65bd078cb 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -1242,9 +1242,19 @@ ExecGetChildToRootMap(ResultRelInfo *resultRelInfo)
 		ResultRelInfo *rootRelInfo = resultRelInfo->ri_RootResultRelInfo;
 
 		if (rootRelInfo)
-			resultRelInfo->ri_ChildToRootMap =
-				convert_tuples_by_name(RelationGetDescr(resultRelInfo->ri_RelationDesc),
-									   RelationGetDescr(rootRelInfo->ri_RelationDesc));
+		{
+			TupleDesc	indesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
+			TupleDesc	outdesc = RelationGetDescr(rootRelInfo->ri_RelationDesc);
+			AttrMap    *attrMap;
+
+			if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) != ROW_REF_ROWID)
+				attrMap = build_attrmap_by_name_if_req(indesc, outdesc, false);
+			else
+				attrMap = build_attrmap_by_name(indesc, outdesc, false);
+			if (attrMap)
+				resultRelInfo->ri_ChildToRootMap =
+					convert_tuples_by_name_attrmap(indesc, outdesc, attrMap);
+		}
 		else					/* this isn't a child result rel */
 			resultRelInfo->ri_ChildToRootMap = NULL;
 
@@ -1281,8 +1291,10 @@ ExecGetRootToChildMap(ResultRelInfo *resultRelInfo, EState *estate)
 		 * to ignore by passing true for missing_ok.
 		 */
 		oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
-		attrMap = build_attrmap_by_name_if_req(indesc, outdesc,
-											   !childrel->rd_rel->relispartition);
+		if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) != ROW_REF_ROWID)
+			attrMap = build_attrmap_by_name_if_req(indesc, outdesc, !childrel->rd_rel->relispartition);
+		else
+			attrMap = build_attrmap_by_name(indesc, outdesc, !childrel->rd_rel->relispartition);
 		if (attrMap)
 			resultRelInfo->ri_RootToChildMap =
 				convert_tuples_by_name_attrmap(indesc, outdesc, attrMap);
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 45d1a67a713..6ebddd36c95 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -66,7 +66,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
 	ScanDirection direction;
 	IndexScanDesc scandesc;
 	TupleTableSlot *slot;
-	ItemPointer tid;
+	ItemPointer tid = NULL;
 
 	/*
 	 * extract necessary information from index scan node
@@ -118,12 +118,36 @@ IndexOnlyNext(IndexOnlyScanState *node)
 	/*
 	 * OK, now that we have what we need, fetch the next tuple.
 	 */
-	while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
+	while (true)
 	{
 		bool		tuple_from_heap = false;
 
 		CHECK_FOR_INTERRUPTS();
 
+		if (scandesc->xs_want_rowid)
+		{
+			NullableDatum rowid;
+			/* Time to fetch the next TID from the index */
+			rowid = index_getnext_rowid(scandesc, direction);
+
+			/* If we're out of index entries, we're done */
+			if (rowid.isnull)
+				break;
+
+			/* Assert(RowidEquals(rowid, &scan->xs_rowid)); */
+		}
+		else
+		{
+			/* Time to fetch the next TID from the index */
+			tid = index_getnext_tid(scandesc, direction);
+
+			/* If we're out of index entries, we're done */
+			if (tid == NULL)
+				break;
+
+			Assert(ItemPointerEquals(tid, &scandesc->xs_heaptid));
+		}
+
 		/*
 		 * We can skip the heap fetch if the TID references a heap page on
 		 * which all tuples are known visible to everybody.  In any case,
@@ -158,7 +182,8 @@ IndexOnlyNext(IndexOnlyScanState *node)
 		 * It's worth going through this complexity to avoid needing to lock
 		 * the VM buffer, which could cause significant contention.
 		 */
-		if (!VM_ALL_VISIBLE(scandesc->heapRelation,
+		if (!scandesc->xs_want_rowid &&
+			!VM_ALL_VISIBLE(scandesc->heapRelation,
 							ItemPointerGetBlockNumber(tid),
 							&node->ioss_VMBuffer))
 		{
@@ -243,7 +268,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
 		 * If we didn't access the heap, then we'll need to take a predicate
 		 * lock explicitly, as if we had.  For now we do that at page level.
 		 */
-		if (!tuple_from_heap)
+		if (!tuple_from_heap && !scandesc->xs_want_rowid)
 			PredicateLockPage(scandesc->heapRelation,
 							  ItemPointerGetBlockNumber(tid),
 							  estate->es_snapshot);
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index e459971d32e..049c9841309 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -27,6 +27,7 @@
 #include "executor/nodeLockRows.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
+#include "utils/datum.h"
 #include "utils/rel.h"
 
 
@@ -157,7 +158,16 @@ ExecLockRows(PlanState *pstate)
 		}
 
 		/* okay, try to lock (and fetch) the tuple */
-		tid = *((ItemPointer) DatumGetPointer(datum));
+		if (erm->refType == ROW_REF_TID)
+		{
+			tid = *((ItemPointer) DatumGetPointer(datum));
+			datum = PointerGetDatum(&tid);
+		}
+		else
+		{
+			Assert(erm->refType = ROW_REF_ROWID);
+			datum = datumCopy(datum, false, -1);
+		}
 		switch (erm->markType)
 		{
 			case ROW_MARK_EXCLUSIVE:
@@ -182,12 +192,15 @@ ExecLockRows(PlanState *pstate)
 		if (!IsolationUsesXactSnapshot())
 			lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION;
 
-		test = table_tuple_lock(erm->relation, &tid, estate->es_snapshot,
+		test = table_tuple_lock(erm->relation, datum, estate->es_snapshot,
 								markSlot, estate->es_output_cid,
 								lockmode, erm->waitPolicy,
 								lockflags,
 								&tmfd);
 
+		if (erm->refType == ROW_REF_ROWID)
+			pfree(DatumGetPointer(datum));
+
 		switch (test)
 		{
 			case TM_WouldBlock:
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 27b55334ed4..c10311cddb4 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -140,12 +140,11 @@ static void ExecPendingInserts(EState *estate);
 static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
 											   ResultRelInfo *sourcePartInfo,
 											   ResultRelInfo *destPartInfo,
-											   ItemPointer tupleid,
-											   TupleTableSlot *oldslot,
+											   Datum tupleid,
+											   TupleTableSlot *oldSlot,
 											   TupleTableSlot *newslot);
 static bool ExecOnConflictUpdate(ModifyTableContext *context,
 								 ResultRelInfo *resultRelInfo,
-								 ItemPointer conflictTid,
 								 TupleTableSlot *excludedSlot,
 								 bool canSetTag,
 								 TupleTableSlot **returning);
@@ -158,12 +157,12 @@ static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
 
 static TupleTableSlot *ExecMerge(ModifyTableContext *context,
 								 ResultRelInfo *resultRelInfo,
-								 ItemPointer tupleid,
+								 Datum tupleid,
 								 bool canSetTag);
 static void ExecInitMerge(ModifyTableState *mtstate, EState *estate);
 static bool ExecMergeMatched(ModifyTableContext *context,
 							 ResultRelInfo *resultRelInfo,
-							 ItemPointer tupleid,
+							 Datum tupleid,
 							 bool canSetTag);
 static void ExecMergeNotMatched(ModifyTableContext *context,
 								ResultRelInfo *resultRelInfo,
@@ -278,66 +277,6 @@ ExecProcessReturning(ResultRelInfo *resultRelInfo,
 	return ExecProject(projectReturning);
 }
 
-/*
- * ExecCheckTupleVisible -- verify tuple is visible
- *
- * It would not be consistent with guarantees of the higher isolation levels to
- * proceed with avoiding insertion (taking speculative insertion's alternative
- * path) on the basis of another tuple that is not visible to MVCC snapshot.
- * Check for the need to raise a serialization failure, and do so as necessary.
- */
-static void
-ExecCheckTupleVisible(EState *estate,
-					  Relation rel,
-					  TupleTableSlot *slot)
-{
-	if (!IsolationUsesXactSnapshot())
-		return;
-
-	if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
-	{
-		Datum		xminDatum;
-		TransactionId xmin;
-		bool		isnull;
-
-		xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
-		Assert(!isnull);
-		xmin = DatumGetTransactionId(xminDatum);
-
-		/*
-		 * We should not raise a serialization failure if the conflict is
-		 * against a tuple inserted by our own transaction, even if it's not
-		 * visible to our snapshot.  (This would happen, for example, if
-		 * conflicting keys are proposed for insertion in a single command.)
-		 */
-		if (!TransactionIdIsCurrentTransactionId(xmin))
-			ereport(ERROR,
-					(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-					 errmsg("could not serialize access due to concurrent update")));
-	}
-}
-
-/*
- * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
- */
-static void
-ExecCheckTIDVisible(EState *estate,
-					ResultRelInfo *relinfo,
-					ItemPointer tid,
-					TupleTableSlot *tempSlot)
-{
-	Relation	rel = relinfo->ri_RelationDesc;
-
-	/* Redundantly check isolation level */
-	if (!IsolationUsesXactSnapshot())
-		return;
-
-	if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot))
-		elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
-	ExecCheckTupleVisible(estate, rel, tempSlot);
-	ExecClearTuple(tempSlot);
-}
-
 /*
  * Initialize to compute stored generated columns for a tuple
  *
@@ -578,6 +517,10 @@ ExecInitInsertProjection(ModifyTableState *mtstate,
 	resultRelInfo->ri_newTupleSlot =
 		table_slot_create(resultRelInfo->ri_RelationDesc,
 						  &estate->es_tupleTable);
+	if (node->onConflictAction == ONCONFLICT_UPDATE)
+		resultRelInfo->ri_oldTupleSlot =
+			table_slot_create(resultRelInfo->ri_RelationDesc,
+							  &estate->es_tupleTable);
 
 	/* Build ProjectionInfo if needed (it probably isn't). */
 	if (need_projection)
@@ -1019,12 +962,19 @@ ExecInsert(ModifyTableContext *context,
 		if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
 		{
 			/* Perform a speculative insertion. */
-			uint32		specToken;
-			ItemPointerData conflictTid;
-			bool		specConflict;
 			List	   *arbiterIndexes;
+			TupleTableSlot *existing = NULL,
+					   *returningSlot,
+					   *inserted;
+			LockTupleMode lockmode = LockTupleExclusive;
 
 			arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
+			returningSlot = ExecGetReturningSlot(estate, resultRelInfo);
+			if (onconflict == ONCONFLICT_UPDATE)
+			{
+				lockmode = ExecUpdateLockMode(estate, resultRelInfo);
+				existing = resultRelInfo->ri_onConflict->oc_Existing;
+			}
 
 			/*
 			 * Do a non-conclusive check for conflicts first.
@@ -1041,23 +991,29 @@ ExecInsert(ModifyTableContext *context,
 			 */
 	vlock:
 			CHECK_FOR_INTERRUPTS();
-			specConflict = false;
-			if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate,
-										   &conflictTid, arbiterIndexes))
+
+			inserted = table_tuple_insert_with_arbiter(resultRelInfo,
+													   slot, estate->es_output_cid,
+													   0, NULL, arbiterIndexes, estate,
+													   lockmode, existing, returningSlot);
+			if (!inserted)
 			{
-				/* committed conflict tuple found */
 				if (onconflict == ONCONFLICT_UPDATE)
 				{
+					TupleTableSlot *returning = NULL;
+
+					if (TTS_EMPTY(existing))
+						goto vlock;
+
 					/*
 					 * In case of ON CONFLICT DO UPDATE, execute the UPDATE
 					 * part.  Be prepared to retry if the UPDATE fails because
 					 * of another concurrent UPDATE/DELETE to the conflict
 					 * tuple.
 					 */
-					TupleTableSlot *returning = NULL;
 
 					if (ExecOnConflictUpdate(context, resultRelInfo,
-											 &conflictTid, slot, canSetTag,
+											 slot, canSetTag,
 											 &returning))
 					{
 						InstrCountTuples2(&mtstate->ps, 1);
@@ -1080,57 +1036,13 @@ ExecInsert(ModifyTableContext *context,
 					 * ExecGetReturningSlot() in the DO NOTHING case...
 					 */
 					Assert(onconflict == ONCONFLICT_NOTHING);
-					ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid,
-										ExecGetReturningSlot(estate, resultRelInfo));
 					InstrCountTuples2(&mtstate->ps, 1);
 					return NULL;
 				}
 			}
-
-			/*
-			 * Before we start insertion proper, acquire our "speculative
-			 * insertion lock".  Others can use that to wait for us to decide
-			 * if we're going to go ahead with the insertion, instead of
-			 * waiting for the whole transaction to complete.
-			 */
-			specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
-
-			/* insert the tuple, with the speculative token */
-			table_tuple_insert_speculative(resultRelationDesc, slot,
-										   estate->es_output_cid,
-										   0,
-										   NULL,
-										   specToken);
-
-			/* insert index entries for tuple */
-			recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
-												   slot, estate, false, true,
-												   &specConflict,
-												   arbiterIndexes,
-												   false);
-
-			/* adjust the tuple's state accordingly */
-			table_tuple_complete_speculative(resultRelationDesc, slot,
-											 specToken, !specConflict);
-
-			/*
-			 * Wake up anyone waiting for our decision.  They will re-check
-			 * the tuple, see that it's no longer speculative, and wait on our
-			 * XID as if this was a regularly inserted tuple all along.  Or if
-			 * we killed the tuple, they will see it's dead, and proceed as if
-			 * the tuple never existed.
-			 */
-			SpeculativeInsertionLockRelease(GetCurrentTransactionId());
-
-			/*
-			 * If there was a conflict, start from the beginning.  We'll do
-			 * the pre-check again, which will now find the conflicting tuple
-			 * (unless it aborts before we get there).
-			 */
-			if (specConflict)
+			else
 			{
-				list_free(recheckIndexes);
-				goto vlock;
+				slot = inserted;
 			}
 
 			/* Since there was no insertion conflict, we're done */
@@ -1138,9 +1050,9 @@ ExecInsert(ModifyTableContext *context,
 		else
 		{
 			/* insert the tuple normally */
-			table_tuple_insert(resultRelationDesc, slot,
-							   estate->es_output_cid,
-							   0, NULL);
+			slot = table_tuple_insert(resultRelationDesc, slot,
+									  estate->es_output_cid,
+									  0, NULL);
 
 			/* insert index entries for tuple */
 			if (resultRelInfo->ri_NumIndices > 0)
@@ -1167,7 +1079,7 @@ ExecInsert(ModifyTableContext *context,
 		ExecARUpdateTriggers(estate, resultRelInfo,
 							 NULL, NULL,
 							 NULL,
-							 NULL,
+							 resultRelInfo->ri_oldTupleSlot,
 							 slot,
 							 NULL,
 							 mtstate->mt_transition_capture,
@@ -1316,12 +1228,20 @@ ExecPendingInserts(EState *estate)
  */
 static bool
 ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
-				   ItemPointer tupleid, HeapTuple oldtuple,
+				   Datum tupleid, HeapTuple oldtuple,
 				   TupleTableSlot **epqreturnslot, TM_Result *result)
 {
 	if (result)
 		*result = TM_Ok;
 
+	/*
+	 * Open the table's indexes, if we have not done so already, so that we
+	 * can delete index entries.
+	 */
+	if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
+		resultRelInfo->ri_IndexRelationDescs == NULL)
+		ExecOpenIndices(resultRelInfo, false);
+
 	/* BEFORE ROW DELETE triggers */
 	if (resultRelInfo->ri_TrigDesc &&
 		resultRelInfo->ri_TrigDesc->trig_delete_before_row)
@@ -1347,7 +1267,8 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
  */
 static TM_Result
 ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
-			  ItemPointer tupleid, bool changingPart)
+			  Datum tupleid, bool changingPart, int options,
+			  TupleTableSlot *oldSlot)
 {
 	EState	   *estate = context->estate;
 
@@ -1355,9 +1276,10 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 							  estate->es_output_cid,
 							  estate->es_snapshot,
 							  estate->es_crosscheck_snapshot,
-							  true /* wait for commit */ ,
+							  options /* wait for commit */ ,
 							  &context->tmfd,
-							  changingPart);
+							  changingPart,
+							  oldSlot);
 }
 
 /*
@@ -1369,12 +1291,17 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
  */
 static void
 ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
-				   ItemPointer tupleid, HeapTuple oldtuple, bool changingPart)
+				   HeapTuple oldtuple,
+				   TupleTableSlot *slot, bool changingPart)
 {
 	ModifyTableState *mtstate = context->mtstate;
 	EState	   *estate = context->estate;
 	TransitionCaptureState *ar_delete_trig_tcs;
 
+	/* delete index entries if necessary */
+	if (resultRelInfo->ri_NumIndices > 0)
+		ExecDeleteIndexTuples(resultRelInfo, slot, context->estate);
+
 	/*
 	 * If this delete is the result of a partition key update that moved the
 	 * tuple to a new partition, put this row into the transition OLD TABLE,
@@ -1387,8 +1314,8 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 	{
 		ExecARUpdateTriggers(estate, resultRelInfo,
 							 NULL, NULL,
-							 tupleid, oldtuple,
-							 NULL, NULL, mtstate->mt_transition_capture,
+							 oldtuple,
+							 slot, NULL, NULL, mtstate->mt_transition_capture,
 							 false);
 
 		/*
@@ -1399,10 +1326,30 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 	}
 
 	/* AFTER ROW DELETE Triggers */
-	ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
+	ExecARDeleteTriggers(estate, resultRelInfo, oldtuple, slot,
 						 ar_delete_trig_tcs, changingPart);
 }
 
+/*
+ * Initializes the tuple slot in a ResultRelInfo for DELETE action.
+ *
+ * We mark 'projectNewInfoValid' even though the projections themselves
+ * are not initialized here.
+ */
+static void
+ExecInitDeleteTupleSlot(ModifyTableState *mtstate,
+						ResultRelInfo *resultRelInfo)
+{
+	EState	   *estate = mtstate->ps.state;
+
+	Assert(!resultRelInfo->ri_projectNewInfoValid);
+
+	resultRelInfo->ri_oldTupleSlot =
+		table_slot_create(resultRelInfo->ri_RelationDesc,
+						  &estate->es_tupleTable);
+	resultRelInfo->ri_projectNewInfoValid = true;
+}
+
 /* ----------------------------------------------------------------
  *		ExecDelete
  *
@@ -1428,8 +1375,9 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 static TupleTableSlot *
 ExecDelete(ModifyTableContext *context,
 		   ResultRelInfo *resultRelInfo,
-		   ItemPointer tupleid,
+		   Datum tupleid,
 		   HeapTuple oldtuple,
+		   TupleTableSlot *oldSlot,
 		   bool processReturning,
 		   bool changingPart,
 		   bool canSetTag,
@@ -1493,6 +1441,11 @@ ExecDelete(ModifyTableContext *context,
 	}
 	else
 	{
+		int			options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE;
+
+		if (!IsolationUsesXactSnapshot())
+			options |= TABLE_MODIFY_LOCK_UPDATED;
+
 		/*
 		 * delete the tuple
 		 *
@@ -1503,7 +1456,8 @@ ExecDelete(ModifyTableContext *context,
 		 * transaction-snapshot mode transactions.
 		 */
 ldelete:
-		result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart);
+		result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart,
+							   options, oldSlot);
 
 		if (tmresult)
 			*tmresult = result;
@@ -1550,7 +1504,6 @@ ExecDelete(ModifyTableContext *context,
 
 			case TM_Updated:
 				{
-					TupleTableSlot *inputslot;
 					TupleTableSlot *epqslot;
 
 					if (IsolationUsesXactSnapshot())
@@ -1559,87 +1512,29 @@ ExecDelete(ModifyTableContext *context,
 								 errmsg("could not serialize access due to concurrent update")));
 
 					/*
-					 * Already know that we're going to need to do EPQ, so
-					 * fetch tuple directly into the right slot.
+					 * We need to do EPQ. The latest tuple is already found
+					 * and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
 					 */
-					EvalPlanQualBegin(context->epqstate);
-					inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
-												 resultRelInfo->ri_RangeTableIndex);
-
-					result = table_tuple_lock(resultRelationDesc, tupleid,
-											  estate->es_snapshot,
-											  inputslot, estate->es_output_cid,
-											  LockTupleExclusive, LockWaitBlock,
-											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
-											  &context->tmfd);
+					Assert(context->tmfd.traversed);
+					epqslot = EvalPlanQual(context->epqstate,
+										   resultRelationDesc,
+										   resultRelInfo->ri_RangeTableIndex,
+										   oldSlot);
+					if (TupIsNull(epqslot))
+						/* Tuple not passing quals anymore, exiting... */
+						return NULL;
 
-					switch (result)
+					/*
+					 * If requested, skip delete and pass back the updated
+					 * row.
+					 */
+					if (epqreturnslot)
 					{
-						case TM_Ok:
-							Assert(context->tmfd.traversed);
-							epqslot = EvalPlanQual(context->epqstate,
-												   resultRelationDesc,
-												   resultRelInfo->ri_RangeTableIndex,
-												   inputslot);
-							if (TupIsNull(epqslot))
-								/* Tuple not passing quals anymore, exiting... */
-								return NULL;
-
-							/*
-							 * If requested, skip delete and pass back the
-							 * updated row.
-							 */
-							if (epqreturnslot)
-							{
-								*epqreturnslot = epqslot;
-								return NULL;
-							}
-							else
-								goto ldelete;
-
-						case TM_SelfModified:
-
-							/*
-							 * This can be reached when following an update
-							 * chain from a tuple updated by another session,
-							 * reaching a tuple that was already updated in
-							 * this transaction. If previously updated by this
-							 * command, ignore the delete, otherwise error
-							 * out.
-							 *
-							 * See also TM_SelfModified response to
-							 * table_tuple_delete() above.
-							 */
-							if (context->tmfd.cmax != estate->es_output_cid)
-								ereport(ERROR,
-										(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
-										 errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
-										 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
-							return NULL;
-
-						case TM_Deleted:
-							/* tuple already deleted; nothing to do */
-							return NULL;
-
-						default:
-
-							/*
-							 * TM_Invisible should be impossible because we're
-							 * waiting for updated row versions, and would
-							 * already have errored out if the first version
-							 * is invisible.
-							 *
-							 * TM_Updated should be impossible, because we're
-							 * locking the latest version via
-							 * TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
-							 */
-							elog(ERROR, "unexpected table_tuple_lock status: %u",
-								 result);
-							return NULL;
+						*epqreturnslot = epqslot;
+						return NULL;
 					}
-
-					Assert(false);
-					break;
+					else
+						goto ldelete;
 				}
 
 			case TM_Deleted:
@@ -1673,7 +1568,8 @@ ExecDelete(ModifyTableContext *context,
 	if (tupleDeleted)
 		*tupleDeleted = true;
 
-	ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, changingPart);
+	ExecDeleteEpilogue(context, resultRelInfo, oldtuple,
+					   oldSlot, changingPart);
 
 	/* Process RETURNING if present and if requested */
 	if (processReturning && resultRelInfo->ri_projectReturning)
@@ -1689,19 +1585,15 @@ ExecDelete(ModifyTableContext *context,
 			/* FDW must have provided a slot containing the deleted row */
 			Assert(!TupIsNull(slot));
 		}
-		else
+		else if (!slot || TupIsNull(slot))
 		{
+			/* Copy old tuple to the returning slot */
 			slot = ExecGetReturningSlot(estate, resultRelInfo);
 			if (oldtuple != NULL)
-			{
 				ExecForceStoreHeapTuple(oldtuple, slot, false);
-			}
 			else
-			{
-				if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
-												   SnapshotAny, slot))
-					elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
-			}
+				ExecCopySlot(slot, oldSlot);
+			Assert(!TupIsNull(slot));
 		}
 
 		rslot = ExecProcessReturning(resultRelInfo, slot, context->planSlot);
@@ -1742,7 +1634,7 @@ ExecDelete(ModifyTableContext *context,
 static bool
 ExecCrossPartitionUpdate(ModifyTableContext *context,
 						 ResultRelInfo *resultRelInfo,
-						 ItemPointer tupleid, HeapTuple oldtuple,
+						 Datum tupleid, HeapTuple oldtuple,
 						 TupleTableSlot *slot,
 						 bool canSetTag,
 						 UpdateContext *updateCxt,
@@ -1801,12 +1693,16 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
 		MemoryContextSwitchTo(oldcxt);
 	}
 
+	/* Make sure ri_oldTupleSlot is initialized. */
+	if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
+		ExecInitUpdateProjection(mtstate, resultRelInfo);
+
 	/*
 	 * Row movement, part 1.  Delete the tuple, but skip RETURNING processing.
 	 * We want to return rows from INSERT.
 	 */
 	ExecDelete(context, resultRelInfo,
-			   tupleid, oldtuple,
+			   tupleid, oldtuple, resultRelInfo->ri_oldTupleSlot,
 			   false,			/* processReturning */
 			   true,			/* changingPart */
 			   false,			/* canSetTag */
@@ -1847,21 +1743,13 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
 			return true;
 		else
 		{
-			/* Fetch the most recent version of old tuple. */
-			TupleTableSlot *oldSlot;
-
-			/* ... but first, make sure ri_oldTupleSlot is initialized. */
-			if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
-				ExecInitUpdateProjection(mtstate, resultRelInfo);
-			oldSlot = resultRelInfo->ri_oldTupleSlot;
-			if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
-											   tupleid,
-											   SnapshotAny,
-											   oldSlot))
-				elog(ERROR, "failed to fetch tuple being updated");
-			/* and project the new tuple to retry the UPDATE with */
+			/*
+			 * ExecDelete already fetches the most recent version of old tuple
+			 * to resultRelInfo->ri_RelationDesc.  So, just project the new
+			 * tuple to retry the UPDATE with.
+			 */
 			*retry_slot = ExecGetUpdateNewTuple(resultRelInfo, epqslot,
-												oldSlot);
+												resultRelInfo->ri_oldTupleSlot);
 			return false;
 		}
 	}
@@ -1879,8 +1767,8 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
 
 	/* Tuple routing starts from the root table. */
 	context->cpUpdateReturningSlot =
-		ExecInsert(context, mtstate->rootResultRelInfo, slot, canSetTag,
-				   inserted_tuple, insert_destrel);
+		ExecInsert(context, mtstate->rootResultRelInfo,
+				   slot, canSetTag, inserted_tuple, insert_destrel);
 
 	/*
 	 * Reset the transition state that may possibly have been written by
@@ -1902,7 +1790,7 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
  */
 static bool
 ExecUpdatePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
-				   ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
+				   Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
 				   TM_Result *result)
 {
 	Relation	resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -1979,8 +1867,9 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
  */
 static TM_Result
 ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
-			  ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
-			  bool canSetTag, UpdateContext *updateCxt)
+			  Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
+			  bool canSetTag, int options, TupleTableSlot *oldSlot,
+			  UpdateContext *updateCxt)
 {
 	EState	   *estate = context->estate;
 	Relation	resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -2073,7 +1962,8 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 				ExecCrossPartitionUpdateForeignKey(context,
 												   resultRelInfo,
 												   insert_destrel,
-												   tupleid, slot,
+												   tupleid,
+												   resultRelInfo->ri_oldTupleSlot,
 												   inserted_tuple);
 
 			return TM_Ok;
@@ -2116,9 +2006,10 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 								estate->es_output_cid,
 								estate->es_snapshot,
 								estate->es_crosscheck_snapshot,
-								true /* wait for commit */ ,
+								options /* wait for commit */ ,
 								&context->tmfd, &updateCxt->lockmode,
-								&updateCxt->updateIndexes);
+								&updateCxt->updateIndexes,
+								oldSlot);
 	if (result == TM_Ok)
 		updateCxt->updated = true;
 
@@ -2133,24 +2024,29 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
  */
 static void
 ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
-				   ResultRelInfo *resultRelInfo, ItemPointer tupleid,
-				   HeapTuple oldtuple, TupleTableSlot *slot)
+				   ResultRelInfo *resultRelInfo,
+				   HeapTuple oldtuple, TupleTableSlot *slot,
+				   TupleTableSlot *oldSlot)
 {
 	ModifyTableState *mtstate = context->mtstate;
 	List	   *recheckIndexes = NIL;
 
 	/* insert index entries for tuple if necessary */
 	if (resultRelInfo->ri_NumIndices > 0 && (updateCxt->updateIndexes != TU_None))
-		recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
-											   slot, context->estate,
-											   true, false,
+	{
+		recheckIndexes = ExecUpdateIndexTuples(resultRelInfo,
+											   slot,
+											   oldSlot,
+											   context->estate,
+											   false,
 											   NULL, NIL,
 											   (updateCxt->updateIndexes == TU_Summarizing));
+	}
 
 	/* AFTER ROW UPDATE Triggers */
 	ExecARUpdateTriggers(context->estate, resultRelInfo,
 						 NULL, NULL,
-						 tupleid, oldtuple, slot,
+						 oldtuple, oldSlot, slot,
 						 recheckIndexes,
 						 mtstate->operation == CMD_INSERT ?
 						 mtstate->mt_oc_transition_capture :
@@ -2182,7 +2078,7 @@ static void
 ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
 								   ResultRelInfo *sourcePartInfo,
 								   ResultRelInfo *destPartInfo,
-								   ItemPointer tupleid,
+								   Datum tupleid,
 								   TupleTableSlot *oldslot,
 								   TupleTableSlot *newslot)
 {
@@ -2239,7 +2135,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
 	/* Perform the root table's triggers. */
 	ExecARUpdateTriggers(context->estate,
 						 rootRelInfo, sourcePartInfo, destPartInfo,
-						 tupleid, NULL, newslot, NIL, NULL, true);
+						 NULL, oldslot, newslot, NIL, NULL, true);
 }
 
 /* ----------------------------------------------------------------
@@ -2261,6 +2157,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
  *		NULL when the foreign table has no relevant triggers.
  *
  *		slot contains the new tuple value to be stored.
+ *		oldSlot is the slot to store the old tuple.
  *		planSlot is the output of the ModifyTable's subplan; we use it
  *		to access values from other input tables (for RETURNING),
  *		row-ID junk columns, etc.
@@ -2272,8 +2169,8 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
  */
 static TupleTableSlot *
 ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
-		   ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
-		   bool canSetTag)
+		   Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
+		   TupleTableSlot *oldSlot, bool canSetTag, bool locked)
 {
 	EState	   *estate = context->estate;
 	Relation	resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -2326,6 +2223,15 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 	}
 	else
 	{
+		int			options = TABLE_MODIFY_WAIT;
+
+		if (!locked)
+		{
+			options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
+			if (!IsolationUsesXactSnapshot())
+				options |= TABLE_MODIFY_LOCK_UPDATED;
+		}
+
 		/*
 		 * If we generate a new candidate tuple after EvalPlanQual testing, we
 		 * must loop back here to try again.  (We don't need to redo triggers,
@@ -2335,7 +2241,7 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 		 */
 redo_act:
 		result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
-							   canSetTag, &updateCxt);
+							   canSetTag, options, oldSlot, &updateCxt);
 
 		/*
 		 * If ExecUpdateAct reports that a cross-partition update was done,
@@ -2386,88 +2292,30 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 
 			case TM_Updated:
 				{
-					TupleTableSlot *inputslot;
 					TupleTableSlot *epqslot;
-					TupleTableSlot *oldSlot;
 
 					if (IsolationUsesXactSnapshot())
 						ereport(ERROR,
 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 								 errmsg("could not serialize access due to concurrent update")));
+					Assert(!locked);
 
 					/*
-					 * Already know that we're going to need to do EPQ, so
-					 * fetch tuple directly into the right slot.
+					 * We need to do EPQ. The latest tuple is already found
+					 * and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
 					 */
-					inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
-												 resultRelInfo->ri_RangeTableIndex);
-
-					result = table_tuple_lock(resultRelationDesc, tupleid,
-											  estate->es_snapshot,
-											  inputslot, estate->es_output_cid,
-											  updateCxt.lockmode, LockWaitBlock,
-											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
-											  &context->tmfd);
-
-					switch (result)
-					{
-						case TM_Ok:
-							Assert(context->tmfd.traversed);
-
-							epqslot = EvalPlanQual(context->epqstate,
-												   resultRelationDesc,
-												   resultRelInfo->ri_RangeTableIndex,
-												   inputslot);
-							if (TupIsNull(epqslot))
-								/* Tuple not passing quals anymore, exiting... */
-								return NULL;
-
-							/* Make sure ri_oldTupleSlot is initialized. */
-							if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
-								ExecInitUpdateProjection(context->mtstate,
-														 resultRelInfo);
-
-							/* Fetch the most recent version of old tuple. */
-							oldSlot = resultRelInfo->ri_oldTupleSlot;
-							if (!table_tuple_fetch_row_version(resultRelationDesc,
-															   tupleid,
-															   SnapshotAny,
-															   oldSlot))
-								elog(ERROR, "failed to fetch tuple being updated");
-							slot = ExecGetUpdateNewTuple(resultRelInfo,
-														 epqslot, oldSlot);
-							goto redo_act;
-
-						case TM_Deleted:
-							/* tuple already deleted; nothing to do */
-							return NULL;
-
-						case TM_SelfModified:
-
-							/*
-							 * This can be reached when following an update
-							 * chain from a tuple updated by another session,
-							 * reaching a tuple that was already updated in
-							 * this transaction. If previously modified by
-							 * this command, ignore the redundant update,
-							 * otherwise error out.
-							 *
-							 * See also TM_SelfModified response to
-							 * table_tuple_update() above.
-							 */
-							if (context->tmfd.cmax != estate->es_output_cid)
-								ereport(ERROR,
-										(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
-										 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
-										 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
-							return NULL;
-
-						default:
-							/* see table_tuple_lock call in ExecDelete() */
-							elog(ERROR, "unexpected table_tuple_lock status: %u",
-								 result);
-							return NULL;
-					}
+					Assert(context->tmfd.traversed);
+					epqslot = EvalPlanQual(context->epqstate,
+										   resultRelationDesc,
+										   resultRelInfo->ri_RangeTableIndex,
+										   oldSlot);
+					if (TupIsNull(epqslot))
+						/* Tuple not passing quals anymore, exiting... */
+						return NULL;
+					slot = ExecGetUpdateNewTuple(resultRelInfo,
+												 epqslot,
+												 oldSlot);
+					goto redo_act;
 				}
 
 				break;
@@ -2490,8 +2338,8 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 	if (canSetTag)
 		(estate->es_processed)++;
 
-	ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, tupleid, oldtuple,
-					   slot);
+	ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, oldtuple,
+					   slot, oldSlot);
 
 	/* Process RETURNING if present */
 	if (resultRelInfo->ri_projectReturning)
@@ -2514,144 +2362,26 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 static bool
 ExecOnConflictUpdate(ModifyTableContext *context,
 					 ResultRelInfo *resultRelInfo,
-					 ItemPointer conflictTid,
 					 TupleTableSlot *excludedSlot,
 					 bool canSetTag,
 					 TupleTableSlot **returning)
 {
 	ModifyTableState *mtstate = context->mtstate;
 	ExprContext *econtext = mtstate->ps.ps_ExprContext;
-	Relation	relation = resultRelInfo->ri_RelationDesc;
 	ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
 	TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
-	TM_FailureData tmfd;
-	LockTupleMode lockmode;
-	TM_Result	test;
-	Datum		xminDatum;
-	TransactionId xmin;
-	bool		isnull;
-
-	/* Determine lock mode to use */
-	lockmode = ExecUpdateLockMode(context->estate, resultRelInfo);
+	Datum		tupleid;
 
-	/*
-	 * Lock tuple for update.  Don't follow updates when tuple cannot be
-	 * locked without doing so.  A row locking conflict here means our
-	 * previous conclusion that the tuple is conclusively committed is not
-	 * true anymore.
-	 */
-	test = table_tuple_lock(relation, conflictTid,
-							context->estate->es_snapshot,
-							existing, context->estate->es_output_cid,
-							lockmode, LockWaitBlock, 0,
-							&tmfd);
-	switch (test)
+	if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID)
 	{
-		case TM_Ok:
-			/* success! */
-			break;
-
-		case TM_Invisible:
-
-			/*
-			 * This can occur when a just inserted tuple is updated again in
-			 * the same command. E.g. because multiple rows with the same
-			 * conflicting key values are inserted.
-			 *
-			 * This is somewhat similar to the ExecUpdate() TM_SelfModified
-			 * case.  We do not want to proceed because it would lead to the
-			 * same row being updated a second time in some unspecified order,
-			 * and in contrast to plain UPDATEs there's no historical behavior
-			 * to break.
-			 *
-			 * It is the user's responsibility to prevent this situation from
-			 * occurring.  These problems are why the SQL standard similarly
-			 * specifies that for SQL MERGE, an exception must be raised in
-			 * the event of an attempt to update the same row twice.
-			 */
-			xminDatum = slot_getsysattr(existing,
-										MinTransactionIdAttributeNumber,
-										&isnull);
-			Assert(!isnull);
-			xmin = DatumGetTransactionId(xminDatum);
-
-			if (TransactionIdIsCurrentTransactionId(xmin))
-				ereport(ERROR,
-						(errcode(ERRCODE_CARDINALITY_VIOLATION),
-				/* translator: %s is a SQL command name */
-						 errmsg("%s command cannot affect row a second time",
-								"ON CONFLICT DO UPDATE"),
-						 errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
-
-			/* This shouldn't happen */
-			elog(ERROR, "attempted to lock invisible tuple");
-			break;
-
-		case TM_SelfModified:
-
-			/*
-			 * This state should never be reached. As a dirty snapshot is used
-			 * to find conflicting tuples, speculative insertion wouldn't have
-			 * seen this row to conflict with.
-			 */
-			elog(ERROR, "unexpected self-updated tuple");
-			break;
-
-		case TM_Updated:
-			if (IsolationUsesXactSnapshot())
-				ereport(ERROR,
-						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-						 errmsg("could not serialize access due to concurrent update")));
-
-			/*
-			 * As long as we don't support an UPDATE of INSERT ON CONFLICT for
-			 * a partitioned table we shouldn't reach to a case where tuple to
-			 * be lock is moved to another partition due to concurrent update
-			 * of the partition key.
-			 */
-			Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
-
-			/*
-			 * Tell caller to try again from the very start.
-			 *
-			 * It does not make sense to use the usual EvalPlanQual() style
-			 * loop here, as the new version of the row might not conflict
-			 * anymore, or the conflicting tuple has actually been deleted.
-			 */
-			ExecClearTuple(existing);
-			return false;
-
-		case TM_Deleted:
-			if (IsolationUsesXactSnapshot())
-				ereport(ERROR,
-						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-						 errmsg("could not serialize access due to concurrent delete")));
-
-			/* see TM_Updated case */
-			Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
-			ExecClearTuple(existing);
-			return false;
-
-		default:
-			elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
+		bool	isnull;
+		tupleid = slot_getsysattr(existing, RowIdAttributeNumber, &isnull);
+		Assert(!isnull);
+	}
+	else
+	{
+		tupleid = PointerGetDatum(&existing->tts_tid);
 	}
-
-	/* Success, the tuple is locked. */
-
-	/*
-	 * Verify that the tuple is visible to our MVCC snapshot if the current
-	 * isolation level mandates that.
-	 *
-	 * It's not sufficient to rely on the check within ExecUpdate() as e.g.
-	 * CONFLICT ... WHERE clause may prevent us from reaching that.
-	 *
-	 * This means we only ever continue when a new command in the current
-	 * transaction could see the row, even though in READ COMMITTED mode the
-	 * tuple will not be visible according to the current statement's
-	 * snapshot.  This is in line with the way UPDATE deals with newer tuple
-	 * versions.
-	 */
-	ExecCheckTupleVisible(context->estate, relation, existing);
 
 	/*
 	 * Make tuple and any needed join variables available to ExecQual and
@@ -2707,9 +2437,10 @@ ExecOnConflictUpdate(ModifyTableContext *context,
 
 	/* Execute UPDATE with projection */
 	*returning = ExecUpdate(context, resultRelInfo,
-							conflictTid, NULL,
+							tupleid, NULL,
 							resultRelInfo->ri_onConflict->oc_ProjSlot,
-							canSetTag);
+							existing,
+							canSetTag, true);
 
 	/*
 	 * Clear out existing tuple, as there might not be another conflict among
@@ -2725,7 +2456,7 @@ ExecOnConflictUpdate(ModifyTableContext *context,
  */
 static TupleTableSlot *
 ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
-		  ItemPointer tupleid, bool canSetTag)
+		  Datum tupleid, bool canSetTag)
 {
 	bool		matched;
 
@@ -2772,7 +2503,7 @@ ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 	 * from ExecMergeNotMatched to ExecMergeMatched, there is no risk of a
 	 * livelock.
 	 */
-	matched = tupleid != NULL;
+	matched = DatumGetPointer(tupleid) != NULL;
 	if (matched)
 		matched = ExecMergeMatched(context, resultRelInfo, tupleid, canSetTag);
 
@@ -2811,7 +2542,7 @@ ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
  */
 static bool
 ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
-				 ItemPointer tupleid, bool canSetTag)
+				 Datum tupleid, bool canSetTag)
 {
 	ModifyTableState *mtstate = context->mtstate;
 	TupleTableSlot *newslot;
@@ -2913,7 +2644,8 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 					break;		/* concurrent update/delete */
 				}
 				result = ExecUpdateAct(context, resultRelInfo, tupleid, NULL,
-									   newslot, canSetTag, &updateCxt);
+									   newslot, canSetTag, TABLE_MODIFY_WAIT, NULL,
+									   &updateCxt);
 
 				/*
 				 * As in ExecUpdate(), if ExecUpdateAct() reports that a
@@ -2931,7 +2663,8 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 				if (result == TM_Ok && updateCxt.updated)
 				{
 					ExecUpdateEpilogue(context, &updateCxt, resultRelInfo,
-									   tupleid, NULL, newslot);
+									   NULL, newslot,
+									   resultRelInfo->ri_oldTupleSlot);
 					mtstate->mt_merge_updated += 1;
 				}
 				break;
@@ -2945,11 +2678,12 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 						return true;	/* "do nothing" */
 					break;		/* concurrent update/delete */
 				}
-				result = ExecDeleteAct(context, resultRelInfo, tupleid, false);
+				result = ExecDeleteAct(context, resultRelInfo, tupleid, false,
+										TABLE_MODIFY_WAIT, NULL);
 				if (result == TM_Ok)
 				{
-					ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL,
-									   false);
+					ExecDeleteEpilogue(context, resultRelInfo, NULL,
+									   resultRelInfo->ri_oldTupleSlot, false);
 					mtstate->mt_merge_deleted += 1;
 				}
 				break;
@@ -3068,7 +2802,11 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 							if (TupIsNull(epqslot))
 								return false;
 
-							(void) ExecGetJunkAttribute(epqslot,
+							/*
+							 * Update tupleid to that of the new tuple, for
+							 * the refetch we do at the top.
+							 */
+							 tupleid = ExecGetJunkAttribute(epqslot,
 														resultRelInfo->ri_RowIdAttNo,
 														&isNull);
 							if (isNull)
@@ -3095,10 +2833,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 							 * that the first qualifying WHEN MATCHED action
 							 * is executed.
 							 *
-							 * Update tupleid to that of the new tuple, for
-							 * the refetch we do at the top.
 							 */
-							ItemPointerCopy(&context->tmfd.ctid, tupleid);
 							goto lmerge_matched;
 
 						case TM_Deleted:
@@ -3605,10 +3340,10 @@ ExecModifyTable(PlanState *pstate)
 	PlanState  *subplanstate;
 	TupleTableSlot *slot;
 	TupleTableSlot *oldSlot;
+	Datum		tupleid;
 	ItemPointerData tuple_ctid;
 	HeapTupleData oldtupdata;
 	HeapTuple	oldtuple;
-	ItemPointer tupleid;
 
 	CHECK_FOR_INTERRUPTS();
 
@@ -3657,6 +3392,8 @@ ExecModifyTable(PlanState *pstate)
 	 */
 	for (;;)
 	{
+		RowRefType	refType;
+
 		/*
 		 * Reset the per-output-tuple exprcontext.  This is needed because
 		 * triggers expect to use that context as workspace.  It's a bit ugly
@@ -3706,7 +3443,7 @@ ExecModifyTable(PlanState *pstate)
 				{
 					EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot);
 
-					ExecMerge(&context, node->resultRelInfo, NULL, node->canSetTag);
+					ExecMerge(&context, node->resultRelInfo, PointerGetDatum(NULL), node->canSetTag);
 					continue;	/* no RETURNING support yet */
 				}
 
@@ -3742,7 +3479,8 @@ ExecModifyTable(PlanState *pstate)
 		EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot);
 		slot = context.planSlot;
 
-		tupleid = NULL;
+		refType = resultRelInfo->ri_RowRefType;
+		tupleid = PointerGetDatum(NULL);
 		oldtuple = NULL;
 
 		/*
@@ -3784,16 +3522,32 @@ ExecModifyTable(PlanState *pstate)
 					{
 						EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot);
 
-						ExecMerge(&context, node->resultRelInfo, NULL, node->canSetTag);
+						ExecMerge(&context, node->resultRelInfo,
+										PointerGetDatum(NULL), node->canSetTag);
 						continue;	/* no RETURNING support yet */
 					}
 
 					elog(ERROR, "ctid is NULL");
 				}
 
-				tupleid = (ItemPointer) DatumGetPointer(datum);
-				tuple_ctid = *tupleid;	/* be sure we don't free ctid!! */
-				tupleid = &tuple_ctid;
+				if (refType == ROW_REF_TID)
+				{
+					/* shouldn't ever get a null result... */
+					if (isNull)
+						elog(ERROR, "ctid is NULL");
+
+					tuple_ctid = *((ItemPointer) DatumGetPointer(datum));	/* be sure we don't free ctid!! */
+					tupleid = PointerGetDatum(&tuple_ctid);
+				}
+				else
+				{
+					Assert(refType == ROW_REF_ROWID);
+					/* shouldn't ever get a null result... */
+					if (isNull)
+						elog(ERROR, "rowid is NULL");
+
+					tupleid = datumCopy(datum, false, -1);
+				}
 			}
 
 			/*
@@ -3870,6 +3624,7 @@ ExecModifyTable(PlanState *pstate)
 					/* Fetch the most recent version of old tuple. */
 					Relation	relation = resultRelInfo->ri_RelationDesc;
 
+					Assert(DatumGetPointer(tupleid) != NULL);
 					if (!table_tuple_fetch_row_version(relation, tupleid,
 													   SnapshotAny,
 													   oldSlot))
@@ -3881,12 +3636,18 @@ ExecModifyTable(PlanState *pstate)
 
 				/* Now apply the update. */
 				slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
-								  slot, node->canSetTag);
+								  slot, resultRelInfo->ri_oldTupleSlot,
+								  node->canSetTag, false);
 				break;
 
 			case CMD_DELETE:
+				/* Initialize slot for DELETE to fetch the old tuple */
+				if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
+					ExecInitDeleteTupleSlot(node, resultRelInfo);
+
 				slot = ExecDelete(&context, resultRelInfo, tupleid, oldtuple,
-								  true, false, node->canSetTag, NULL, NULL, NULL);
+								  resultRelInfo->ri_oldTupleSlot, true, false,
+								  node->canSetTag, NULL, NULL, NULL);
 				break;
 
 			case CMD_MERGE:
@@ -3898,6 +3659,9 @@ ExecModifyTable(PlanState *pstate)
 				break;
 		}
 
+		if (refType == ROW_REF_ROWID && DatumGetPointer(tupleid) != NULL)
+			pfree(DatumGetPointer(tupleid));
+
 		/*
 		 * If we got a RETURNING result, return it to caller.  We'll continue
 		 * the work on next call.
@@ -4137,10 +3901,20 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 				relkind == RELKIND_MATVIEW ||
 				relkind == RELKIND_PARTITIONED_TABLE)
 			{
-				resultRelInfo->ri_RowIdAttNo =
-					ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid");
-				if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
-					elog(ERROR, "could not find junk ctid column");
+				if (resultRelInfo->ri_RowRefType == ROW_REF_TID)
+				{
+					resultRelInfo->ri_RowIdAttNo =
+						ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid");
+					if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
+						elog(ERROR, "could not find junk ctid column");
+				}
+				else
+				{
+					resultRelInfo->ri_RowIdAttNo =
+						ExecFindJunkAttributeInTlist(subplan->targetlist, "rowid");
+					if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
+						elog(ERROR, "could not find junk rowid column");
+				}
 			}
 			else if (relkind == RELKIND_FOREIGN_TABLE)
 			{
@@ -4452,6 +4226,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 		estate->es_auxmodifytables = lcons(mtstate,
 										   estate->es_auxmodifytables);
 
+
+
 	return mtstate;
 }
 
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index 862bd0330bc..8180a2991c3 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -378,7 +378,7 @@ TidNext(TidScanState *node)
 		if (node->tss_isCurrentOf)
 			table_tuple_get_latest_tid(scan, &tid);
 
-		if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot))
+		if (table_tuple_fetch_row_version(heapRelation, PointerGetDatum(&tid), snapshot, slot))
 			return slot;
 
 		/* Bad TID or failed snapshot qual; try next */
diff --git a/src/backend/nodes/read.c b/src/backend/nodes/read.c
index 5d76f56e4e8..07df92d813c 100644
--- a/src/backend/nodes/read.c
+++ b/src/backend/nodes/read.c
@@ -205,6 +205,17 @@ pg_strtok(int *length)
 	return ret_str;
 }
 
+bool
+pg_str_hasfield(void)
+{
+	const char *local_str = pg_strtok_ptr;
+
+	while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
+		local_str++;
+
+	return (*local_str == ':');
+}
+
 /*
  * debackslash -
  *	  create a palloc'd string holding the given token.
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index f75e0f99cb9..c62a407f4ca 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -86,6 +86,7 @@ int			min_parallel_index_scan_size;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
+set_plain_rel_pathlist_hook_type set_plain_rel_pathlist_hook = NULL;
 
 /* Hook for plugins to replace standard_join_search() */
 join_search_hook_type join_search_hook = NULL;
@@ -775,8 +776,10 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	 */
 	required_outer = rel->lateral_relids;
 
-	/* Consider sequential scan */
-	add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
+	if (!set_plain_rel_pathlist_hook ||
+		set_plain_rel_pathlist_hook(root, rel, rte))
+		/* Consider sequential scan */
+		add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
 
 	/* If appropriate, consider parallel sequential scan */
 	if (rel->consider_parallel && required_outer == NULL)
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 0065c8992bd..bf4968e348b 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -48,14 +48,6 @@ typedef enum
 	ST_ANYSCAN					/* either is okay */
 } ScanTypeControl;
 
-/* Data structure for collecting qual clauses that match an index */
-typedef struct
-{
-	bool		nonempty;		/* True if lists are not all empty */
-	/* Lists of IndexClause nodes, one list per index column */
-	List	   *indexclauses[INDEX_MAX_KEYS];
-} IndexClauseSet;
-
 /* Per-path data used within choose_bitmap_and() */
 typedef struct
 {
@@ -130,9 +122,6 @@ static double adjust_rowcount_for_semijoins(PlannerInfo *root,
 											Index outer_relid,
 											double rowcount);
 static double approximate_joinrel_size(PlannerInfo *root, Relids relids);
-static void match_restriction_clauses_to_index(PlannerInfo *root,
-											   IndexOptInfo *index,
-											   IndexClauseSet *clauseset);
 static void match_join_clauses_to_index(PlannerInfo *root,
 										RelOptInfo *rel, IndexOptInfo *index,
 										IndexClauseSet *clauseset,
@@ -2012,7 +2001,7 @@ approximate_joinrel_size(PlannerInfo *root, Relids relids)
  *	  Identify restriction clauses for the rel that match the index.
  *	  Matching clauses are added to *clauseset.
  */
-static void
+void
 match_restriction_clauses_to_index(PlannerInfo *root,
 								   IndexOptInfo *index,
 								   IndexClauseSet *clauseset)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 974c50b29f9..48f251738e2 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -164,16 +164,12 @@ static MergeJoin *create_mergejoin_plan(PlannerInfo *root, MergePath *best_path)
 static HashJoin *create_hashjoin_plan(PlannerInfo *root, HashPath *best_path);
 static Node *replace_nestloop_params(PlannerInfo *root, Node *expr);
 static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root);
-static void fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
-									 List **stripped_indexquals_p,
-									 List **fixed_indexquals_p);
 static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path);
 static Node *fix_indexqual_clause(PlannerInfo *root,
 								  IndexOptInfo *index, int indexcol,
 								  Node *clause, List *indexcolnos);
 static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol);
 static List *get_switched_clauses(List *clauses, Relids outerrelids);
-static List *order_qual_clauses(PlannerInfo *root, List *clauses);
 static void copy_generic_path_info(Plan *dest, Path *src);
 static void copy_plan_costsize(Plan *dest, Plan *src);
 static void label_sort_with_costsize(PlannerInfo *root, Sort *plan,
@@ -4897,6 +4893,14 @@ replace_nestloop_params(PlannerInfo *root, Node *expr)
 	return replace_nestloop_params_mutator(expr, root);
 }
 
+Node *
+replace_nestloop_params_compat(PlannerInfo *root, Node *expr)
+{
+	/* No setup needed for tree walk, so away we go */
+	return replace_nestloop_params_mutator(expr, root);
+}
+
+
 static Node *
 replace_nestloop_params_mutator(Node *node, PlannerInfo *root)
 {
@@ -4977,7 +4981,7 @@ replace_nestloop_params_mutator(Node *node, PlannerInfo *root)
  * are subplans in it (we need two separate copies of the subplan tree, or
  * things will go awry).
  */
-static void
+void
 fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
 						 List **stripped_indexquals_p, List **fixed_indexquals_p)
 {
@@ -5270,7 +5274,7 @@ get_switched_clauses(List *clauses, Relids outerrelids)
  * instead of bare clauses.  This is another reason why trying to consider
  * selectivity in the ordering would likely do the wrong thing.
  */
-static List *
+List *
 order_qual_clauses(PlannerInfo *root, List *clauses)
 {
 	typedef struct
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 2ffef1bad78..7198fd4777c 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2263,6 +2263,7 @@ preprocess_rowmarks(PlannerInfo *root)
 		RowMarkClause *rc = lfirst_node(RowMarkClause, l);
 		RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
 		PlanRowMark *newrc;
+		RowRefType refType;
 
 		/*
 		 * Currently, it is syntactically impossible to have FOR UPDATE et al
@@ -2285,8 +2286,8 @@ preprocess_rowmarks(PlannerInfo *root)
 		newrc = makeNode(PlanRowMark);
 		newrc->rti = newrc->prti = rc->rti;
 		newrc->rowmarkId = ++(root->glob->lastRowMarkId);
-		newrc->markType = select_rowmark_type(rte, rc->strength);
-		newrc->allMarkTypes = (1 << newrc->markType);
+		newrc->markType = select_rowmark_type(rte, rc->strength, &refType);
+		newrc->allRefTypes = (1 << refType);
 		newrc->strength = rc->strength;
 		newrc->waitPolicy = rc->waitPolicy;
 		newrc->isParent = false;
@@ -2302,6 +2303,7 @@ preprocess_rowmarks(PlannerInfo *root)
 	{
 		RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
 		PlanRowMark *newrc;
+		RowRefType refType = ROW_REF_TID;
 
 		i++;
 		if (!bms_is_member(i, rels))
@@ -2310,8 +2312,8 @@ preprocess_rowmarks(PlannerInfo *root)
 		newrc = makeNode(PlanRowMark);
 		newrc->rti = newrc->prti = i;
 		newrc->rowmarkId = ++(root->glob->lastRowMarkId);
-		newrc->markType = select_rowmark_type(rte, LCS_NONE);
-		newrc->allMarkTypes = (1 << newrc->markType);
+		newrc->markType = select_rowmark_type(rte, LCS_NONE, &refType);
+		newrc->allRefTypes = (1 << refType);
 		newrc->strength = LCS_NONE;
 		newrc->waitPolicy = LockWaitBlock;	/* doesn't matter */
 		newrc->isParent = false;
@@ -2326,11 +2328,13 @@ preprocess_rowmarks(PlannerInfo *root)
  * Select RowMarkType to use for a given table
  */
 RowMarkType
-select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
+select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength,
+					RowRefType *refType)
 {
 	if (rte->rtekind != RTE_RELATION)
 	{
 		/* If it's not a table at all, use ROW_MARK_COPY */
+		*refType = ROW_REF_COPY;
 		return ROW_MARK_COPY;
 	}
 	else if (rte->relkind == RELKIND_FOREIGN_TABLE)
@@ -2341,10 +2345,12 @@ select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
 		if (fdwroutine->GetForeignRowMarkType != NULL)
 			return fdwroutine->GetForeignRowMarkType(rte, strength);
 		/* Otherwise, use ROW_MARK_COPY by default */
+		*refType = ROW_REF_COPY;
 		return ROW_MARK_COPY;
 	}
 	else
 	{
+		*refType = rte->reftype;
 		/* Regular table, apply the appropriate lock type */
 		switch (strength)
 		{
diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c
index 9d46488ef7c..0d849332904 100644
--- a/src/backend/optimizer/prep/preptlist.c
+++ b/src/backend/optimizer/prep/preptlist.c
@@ -210,7 +210,7 @@ preprocess_targetlist(PlannerInfo *root)
 		if (rc->rti != rc->prti)
 			continue;
 
-		if (rc->allMarkTypes & ~(1 << ROW_MARK_COPY))
+		if (rc->allRefTypes & (1 << ROW_REF_TID))
 		{
 			/* Need to fetch TID */
 			var = makeVar(rc->rti,
@@ -226,7 +226,23 @@ preprocess_targetlist(PlannerInfo *root)
 								  true);
 			tlist = lappend(tlist, tle);
 		}
-		if (rc->allMarkTypes & (1 << ROW_MARK_COPY))
+		if (rc->allRefTypes & (1 << ROW_REF_ROWID))
+		{
+			/* Need to fetch TID */
+			var = makeVar(rc->rti,
+						  RowIdAttributeNumber,
+						  BYTEAOID,
+						  -1,
+						  InvalidOid,
+						  0);
+			snprintf(resname, sizeof(resname), "rowid%u", rc->rowmarkId);
+			tle = makeTargetEntry((Expr *) var,
+								  list_length(tlist) + 1,
+								  pstrdup(resname),
+								  true);
+			tlist = lappend(tlist, tle);
+		}
+		if (rc->allRefTypes & (1 << ROW_REF_COPY))
 		{
 			/* Need the whole row as a junk var */
 			var = makeWholeRowVar(rt_fetch(rc->rti, range_table),
diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c
index f456b3b0a44..43af763f1fe 100644
--- a/src/backend/optimizer/util/appendinfo.c
+++ b/src/backend/optimizer/util/appendinfo.c
@@ -896,17 +896,35 @@ add_row_identity_columns(PlannerInfo *root, Index rtindex,
 		relkind == RELKIND_MATVIEW ||
 		relkind == RELKIND_PARTITIONED_TABLE)
 	{
+		RowRefType refType = ROW_REF_TID;
+
+		refType = table_get_row_ref_type(target_relation);
+
 		/*
 		 * Emit CTID so that executor can find the row to merge, update or
 		 * delete.
 		 */
-		var = makeVar(rtindex,
-					  SelfItemPointerAttributeNumber,
-					  TIDOID,
-					  -1,
-					  InvalidOid,
-					  0);
-		add_row_identity_var(root, var, rtindex, "ctid");
+		if (refType == ROW_REF_TID)
+		{
+			var = makeVar(rtindex,
+						  SelfItemPointerAttributeNumber,
+						  TIDOID,
+						  -1,
+						  InvalidOid,
+						  0);
+			add_row_identity_var(root, var, rtindex, "ctid");
+		}
+		else
+		{
+			Assert(refType == ROW_REF_ROWID);
+			var = makeVar(rtindex,
+						  RowIdAttributeNumber,
+						  BYTEAOID,
+						  -1,
+						  InvalidOid,
+						  0);
+			add_row_identity_var(root, var, rtindex, "rowid");
+		}
 	}
 	else if (relkind == RELKIND_FOREIGN_TABLE)
 	{
diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c
index f9d3ff1e7ac..e16e855cf64 100644
--- a/src/backend/optimizer/util/inherit.c
+++ b/src/backend/optimizer/util/inherit.c
@@ -16,6 +16,7 @@
 
 #include "access/sysattr.h"
 #include "access/table.h"
+#include "access/tableam.h"
 #include "catalog/partition.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_type.h"
@@ -91,7 +92,7 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
 	LOCKMODE	lockmode;
 	PlanRowMark *oldrc;
 	bool		old_isParent = false;
-	int			old_allMarkTypes = 0;
+	int			old_allRefTypes = 0;
 
 	Assert(rte->inh);			/* else caller error */
 
@@ -131,8 +132,8 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
 	{
 		old_isParent = oldrc->isParent;
 		oldrc->isParent = true;
-		/* Save initial value of allMarkTypes before children add to it */
-		old_allMarkTypes = oldrc->allMarkTypes;
+		/* Save initial value of allRefTypes before children add to it */
+		old_allRefTypes = oldrc->allRefTypes;
 	}
 
 	/* Scan the inheritance set and expand it */
@@ -239,15 +240,15 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
 	 */
 	if (oldrc)
 	{
-		int			new_allMarkTypes = oldrc->allMarkTypes;
+		int			new_allRefTypes = oldrc->allRefTypes;
 		Var		   *var;
 		TargetEntry *tle;
 		char		resname[32];
 		List	   *newvars = NIL;
 
 		/* Add TID junk Var if needed, unless we had it already */
-		if (new_allMarkTypes & ~(1 << ROW_MARK_COPY) &&
-			!(old_allMarkTypes & ~(1 << ROW_MARK_COPY)))
+		if (new_allRefTypes & (1 << ROW_REF_TID) &&
+			!(old_allRefTypes & (1 << ROW_REF_TID)))
 		{
 			/* Need to fetch TID */
 			var = makeVar(oldrc->rti,
@@ -266,8 +267,8 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
 		}
 
 		/* Add whole-row junk Var if needed, unless we had it already */
-		if ((new_allMarkTypes & (1 << ROW_MARK_COPY)) &&
-			!(old_allMarkTypes & (1 << ROW_MARK_COPY)))
+		if ((new_allRefTypes & (1 << ROW_REF_COPY)) &&
+			!(old_allRefTypes & (1 << ROW_REF_COPY)))
 		{
 			var = makeWholeRowVar(planner_rt_fetch(oldrc->rti, root),
 								  oldrc->rti,
@@ -282,6 +283,24 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel,
 			newvars = lappend(newvars, var);
 		}
 
+		if ((new_allRefTypes & (1 << ROW_REF_ROWID)) &&
+			!(old_allRefTypes & (1 << ROW_REF_ROWID)))
+		{
+			var = makeVar(oldrc->rti,
+						  RowIdAttributeNumber,
+						  BYTEAOID,
+						  -1,
+						  InvalidOid,
+						  0);
+			snprintf(resname, sizeof(resname), "rowid%u", oldrc->rowmarkId);
+			tle = makeTargetEntry((Expr *) var,
+								  list_length(root->processed_tlist) + 1,
+								  pstrdup(resname),
+								  true);
+			root->processed_tlist = lappend(root->processed_tlist, tle);
+			newvars = lappend(newvars, var);
+		}
+
 		/* Add tableoid junk Var, unless we had it already */
 		if (!old_isParent)
 		{
@@ -441,7 +460,7 @@ expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo,
  * where the hierarchy is flattened during RTE expansion.)
  *
  * PlanRowMarks still carry the top-parent's RTI, and the top-parent's
- * allMarkTypes field still accumulates values from all descendents.
+ * allRefTypes field still accumulates values from all descendents.
  *
  * "parentrte" and "parentRTindex" are immediate parent's RTE and
  * RTI. "top_parentrc" is top parent's PlanRowMark.
@@ -485,6 +504,7 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
 	Assert(parentrte->rtekind == RTE_RELATION); /* else this is dubious */
 	childrte->relid = childOID;
 	childrte->relkind = childrel->rd_rel->relkind;
+	childrte->reftype = table_get_row_ref_type(childrel);
 	/* A partitioned child will need to be expanded further. */
 	if (childrte->relkind == RELKIND_PARTITIONED_TABLE)
 	{
@@ -574,14 +594,16 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
 	if (top_parentrc)
 	{
 		PlanRowMark *childrc = makeNode(PlanRowMark);
+		RowRefType refType;
 
 		childrc->rti = childRTindex;
 		childrc->prti = top_parentrc->rti;
 		childrc->rowmarkId = top_parentrc->rowmarkId;
 		/* Reselect rowmark type, because relkind might not match parent */
 		childrc->markType = select_rowmark_type(childrte,
-												top_parentrc->strength);
-		childrc->allMarkTypes = (1 << childrc->markType);
+												top_parentrc->strength,
+												&refType);
+		childrc->allRefTypes = (1 << refType);
 		childrc->strength = top_parentrc->strength;
 		childrc->waitPolicy = top_parentrc->waitPolicy;
 
@@ -592,8 +614,8 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
 		 */
 		childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
 
-		/* Include child's rowmark type in top parent's allMarkTypes */
-		top_parentrc->allMarkTypes |= childrc->allMarkTypes;
+		/* Include child's rowmark type in top parent's allRefTypes */
+		top_parentrc->allRefTypes |= childrc->allRefTypes;
 
 		root->rowMarks = lappend(root->rowMarks, childrc);
 	}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 1a3045479ff..f4a0fc2b6c6 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -60,6 +60,7 @@ int			constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION;
 
 /* Hook for plugins to get control in get_relation_info() */
 get_relation_info_hook_type get_relation_info_hook = NULL;
+skip_tree_height_hook_type skip_tree_height_hook = NULL;
 
 
 static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel,
@@ -457,7 +458,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 						info->tuples = rel->tuples;
 				}
 
-				if (info->relam == BTREE_AM_OID)
+				if (info->relam == BTREE_AM_OID && (!skip_tree_height_hook || !skip_tree_height_hook(indexRelation)))
 				{
 					/*
 					 * For btrees, get tree height while we have the index
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index c31b3733587..c39c6f21939 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -371,6 +371,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <list>	OptSchemaEltList parameter_name_list
 
 %type <chr>		am_type
+%type <str>		opt_for_tableam
 
 %type <boolean> TriggerForSpec TriggerForType
 %type <ival>	TriggerActionTime
@@ -5746,17 +5747,21 @@ row_security_cmd:
 /*****************************************************************************
  *
  *		QUERY:
- *             CREATE ACCESS METHOD name HANDLER handler_name
+ *				CREATE ACCESS METHOD name TYPE am_type
+ *					[FOR tableam_name]
+ *					HANDLER handler_name
  *
  *****************************************************************************/
 
-CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type HANDLER handler_name
+CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type
+				 opt_for_tableam HANDLER handler_name
 				{
 					CreateAmStmt *n = makeNode(CreateAmStmt);
 
 					n->amname = $4;
-					n->handler_name = $8;
 					n->amtype = $6;
+					n->tableam_name = $7;
+					n->handler_name = $9;
 					$$ = (Node *) n;
 				}
 		;
@@ -5766,6 +5771,11 @@ am_type:
 		|	TABLE			{ $$ = AMTYPE_TABLE; }
 		;
 
+opt_for_tableam:
+			FOR name							{ $$ = $2; }
+			| /*EMPTY*/							{ $$ = NULL; }
+		;
+
 /*****************************************************************************
  *
  *		QUERIES :
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 58bc222a8b9..23ef258340a 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -20,6 +20,7 @@
 #include "access/relation.h"
 #include "access/sysattr.h"
 #include "access/table.h"
+#include "access/tableam.h"
 #include "catalog/heap.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_type.h"
@@ -1502,6 +1503,7 @@ addRangeTableEntry(ParseState *pstate,
 	rte->relid = RelationGetRelid(rel);
 	rte->relkind = rel->rd_rel->relkind;
 	rte->rellockmode = lockmode;
+	rte->reftype = table_get_row_ref_type(rel);
 
 	/*
 	 * Build the list of effective column names using user-supplied aliases
@@ -1587,6 +1589,7 @@ addRangeTableEntryForRelation(ParseState *pstate,
 	rte->relid = RelationGetRelid(rel);
 	rte->relkind = rel->rd_rel->relkind;
 	rte->rellockmode = lockmode;
+	rte->reftype = table_get_row_ref_type(rel);
 
 	/*
 	 * Build the list of effective column names using user-supplied aliases
@@ -1656,6 +1659,7 @@ addRangeTableEntryForSubquery(ParseState *pstate,
 	rte->rtekind = RTE_SUBQUERY;
 	rte->subquery = subquery;
 	rte->alias = alias;
+	rte->reftype = ROW_REF_COPY;
 
 	eref = alias ? copyObject(alias) : makeAlias("unnamed_subquery", NIL);
 	numaliases = list_length(eref->colnames);
@@ -1764,6 +1768,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
 	rte->functions = NIL;		/* we'll fill this list below */
 	rte->funcordinality = rangefunc->ordinality;
 	rte->alias = alias;
+	rte->reftype = ROW_REF_COPY;
 
 	/*
 	 * Choose the RTE alias name.  We default to using the first function's
@@ -2083,6 +2088,7 @@ addRangeTableEntryForTableFunc(ParseState *pstate,
 	rte->coltypmods = tf->coltypmods;
 	rte->colcollations = tf->colcollations;
 	rte->alias = alias;
+	rte->reftype = ROW_REF_COPY;
 
 	eref = alias ? copyObject(alias) : makeAlias(refname, NIL);
 	numaliases = list_length(eref->colnames);
@@ -2159,6 +2165,7 @@ addRangeTableEntryForValues(ParseState *pstate,
 	rte->coltypmods = coltypmods;
 	rte->colcollations = colcollations;
 	rte->alias = alias;
+	rte->reftype = ROW_REF_COPY;
 
 	eref = alias ? copyObject(alias) : makeAlias(refname, NIL);
 
@@ -2256,6 +2263,7 @@ addRangeTableEntryForJoin(ParseState *pstate,
 	rte->joinrightcols = rightcols;
 	rte->join_using_alias = join_using_alias;
 	rte->alias = alias;
+	rte->reftype = ROW_REF_COPY;
 
 	eref = alias ? copyObject(alias) : makeAlias("unnamed_join", NIL);
 	numaliases = list_length(eref->colnames);
@@ -2337,6 +2345,7 @@ addRangeTableEntryForCTE(ParseState *pstate,
 	rte->rtekind = RTE_CTE;
 	rte->ctename = cte->ctename;
 	rte->ctelevelsup = levelsup;
+	rte->reftype = ROW_REF_COPY;
 
 	/* Self-reference if and only if CTE's parse analysis isn't completed */
 	rte->self_reference = !IsA(cte->ctequery, Query);
@@ -2499,6 +2508,7 @@ addRangeTableEntryForENR(ParseState *pstate,
 	 * if they access transition tables linked to a table that is altered.
 	 */
 	rte->relid = enrmd->reliddesc;
+	rte->reftype = ROW_REF_COPY;
 
 	/*
 	 * Build the list of effective column names using user-supplied aliases
@@ -3268,6 +3278,9 @@ get_rte_attribute_name(RangeTblEntry *rte, AttrNumber attnum)
 		attnum > 0 && attnum <= list_length(rte->alias->colnames))
 		return strVal(list_nth(rte->alias->colnames, attnum - 1));
 
+	if (attnum == RowIdAttributeNumber)
+		return "rowid";
+
 	/*
 	 * If the RTE is a relation, go to the system catalogs not the
 	 * eref->colnames list.  This is a little slower but it will give the
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index 441f599d1a0..87b962f05de 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -2320,19 +2320,6 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt)
 					 errdetail("Cannot create a non-deferrable constraint using a deferrable index."),
 					 parser_errposition(cxt->pstate, constraint->location)));
 
-		/*
-		 * Insist on it being a btree.  That's the only kind that supports
-		 * uniqueness at the moment anyway; but we must have an index that
-		 * exactly matches what you'd get from plain ADD CONSTRAINT syntax,
-		 * else dump and reload will produce a different index (breaking
-		 * pg_upgrade in particular).
-		 */
-		if (index_rel->rd_rel->relam != get_index_am_oid(DEFAULT_INDEX_TYPE, false))
-			ereport(ERROR,
-					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-					 errmsg("index \"%s\" is not a btree", index_name),
-					 parser_errposition(cxt->pstate, constraint->location)));
-
 		/* Must get indclass the hard way */
 		indclassDatum = SysCacheGetAttrNotNull(INDEXRELID,
 											   index_rel->rd_indextuple,
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 7dd9345c617..693db1b3c9f 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -538,6 +538,7 @@ AutoVacLauncherMain(int argc, char *argv[])
 		 * transaction.
 		 */
 		LWLockReleaseAll();
+		CustomErrorCleanup();
 		pgstat_report_wait_end();
 		UnlockBuffers();
 		/* this is probably dead code, but let's be safe: */
@@ -2834,7 +2835,9 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc)
 		   ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_MATVIEW ||
 		   ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_TOASTVALUE);
 
-	relopts = extractRelOptions(tup, pg_class_desc, NULL);
+	relopts = extractRelOptions(tup, pg_class_desc,
+								GetTableAmRoutineByAmOid(((Form_pg_class) GETSTRUCT(tup))->relam),
+								NULL);
 	if (relopts == NULL)
 		return NULL;
 
diff --git a/src/backend/postmaster/auxprocess.c b/src/backend/postmaster/auxprocess.c
index cae6feb3562..bc4c3d11359 100644
--- a/src/backend/postmaster/auxprocess.c
+++ b/src/backend/postmaster/auxprocess.c
@@ -178,6 +178,7 @@ static void
 ShutdownAuxiliaryProcess(int code, Datum arg)
 {
 	LWLockReleaseAll();
+	CustomErrorCleanup();
 	ConditionVariableCancelSleep();
 	pgstat_report_wait_end();
 }
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index f2e4f23d9fc..7963fcd2a38 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -166,6 +166,7 @@ BackgroundWriterMain(void)
 		 * about in bgwriter, but we do have LWLocks, buffers, and temp files.
 		 */
 		LWLockReleaseAll();
+		CustomErrorCleanup();
 		ConditionVariableCancelSleep();
 		UnlockBuffers();
 		ReleaseAuxProcessResources(false);
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index f482f6423d1..7cd4552c526 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -53,11 +53,20 @@
 #include "storage/proc.h"
 #include "storage/procsignal.h"
 #include "storage/shmem.h"
+#include "storage/sinvaladt.h"
 #include "storage/smgr.h"
 #include "storage/spin.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/resowner.h"
+#include "utils/syscache.h"
+
+/*
+ * Included for InitializeTimeouts and RegisterTimeout functions that
+ * needed for correct working of OrioleDB checkpoint.
+ * See comment for InitializeTimeouts call in CheckpointerMain for details.
+ */
+#include "utils/timeout.h"
 
 
 /*----------
@@ -207,6 +216,21 @@ CheckpointerMain(void)
 	 */
 	pqsignal(SIGCHLD, SIG_DFL);
 
+	/*
+	 * To use OrioleDB checkpoint, we must initialize the data for the primary
+	 * lock mechanism (lock.h) to work correctly. Because locks of this type are
+	 * needed by the OrioleDB module for debug events and relation locks, but
+	 * they are not used by the postgres checkpointer and are not initialized
+	 * for it.
+	 */
+	InitializeTimeouts(); /* establishes SIGALRM handler */
+	InitDeadLockChecking();
+	RegisterTimeout(DEADLOCK_TIMEOUT, CheckDeadLockAlert);
+	RelationCacheInitialize();
+	InitCatalogCache();
+	SharedInvalBackendInit(false);
+
+
 	/*
 	 * Initialize so that first time-driven event happens at the correct time.
 	 */
@@ -269,6 +293,7 @@ CheckpointerMain(void)
 		 * files.
 		 */
 		LWLockReleaseAll();
+		CustomErrorCleanup();
 		ConditionVariableCancelSleep();
 		pgstat_report_wait_end();
 		UnlockBuffers();
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 46af3495644..93ce77683a4 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -670,6 +670,22 @@ pgarch_readyXlog(char *xlog)
 	for (int i = 0; i < arch_files->arch_files_size; i++)
 		arch_files->arch_files[i] = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap));
 
+	/*
+	 * Preload the WAL files if the relevant callback is provided.
+	 */
+	if (ArchiveCallbacks->archive_preload_file_cb)
+	{
+		for (int i = 0; i < arch_files->arch_files_size; i++)
+		{
+			char	   *xlog1 = arch_files->arch_files[i];
+			char		pathname[MAXPGPATH];
+
+			snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog1);
+			ArchiveCallbacks->archive_preload_file_cb(archive_module_state,
+													  xlog1, pathname);
+		}
+	}
+
 	/* Return the highest priority file. */
 	arch_files->arch_files_size--;
 	strcpy(xlog, arch_files->arch_files[arch_files->arch_files_size]);
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index b42aae41fce..7a9c875ee7e 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -85,10 +85,6 @@
 #include <systemd/sd-daemon.h>
 #endif
 
-#ifdef HAVE_PTHREAD_IS_THREADED_NP
-#include <pthread.h>
-#endif
-
 #include "access/transam.h"
 #include "access/xlog.h"
 #include "access/xlogrecovery.h"
@@ -145,7 +141,8 @@
 #define BACKEND_TYPE_AUTOVAC	0x0002	/* autovacuum worker process */
 #define BACKEND_TYPE_WALSND		0x0004	/* walsender process */
 #define BACKEND_TYPE_BGWORKER	0x0008	/* bgworker process */
-#define BACKEND_TYPE_ALL		0x000F	/* OR of all the above */
+#define BACKEND_TYPE_SYSTEM_BGWORKER 0x0010	/* system bgworker process */
+#define BACKEND_TYPE_ALL		0x001F	/* OR of all the above */
 
 /*
  * List of active backends (or child processes anyway; we don't actually
@@ -451,7 +448,7 @@ static void InitPostmasterDeathWatchHandle(void);
  * even during recovery.
  */
 #define PgArchStartupAllowed()	\
-	(((XLogArchivingActive() && pmState == PM_RUN) ||			\
+	(((XLogArchivingActive() && (pmState == PM_RUN || pmState == PM_SHUTDOWN)) || \
 	  (XLogArchivingAlways() &&									  \
 	   (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
 	 PgArchCanRestart())
@@ -579,6 +576,12 @@ int			postmaster_alive_fds[2] = {-1, -1};
 HANDLE		PostmasterHandle;
 #endif
 
+bool
+IsFatalError(void)
+{
+	return FatalError;
+}
+
 /*
  * Postmaster main entry point
  */
@@ -1417,24 +1420,6 @@ PostmasterMain(int argc, char *argv[])
 		 */
 	}
 
-#ifdef HAVE_PTHREAD_IS_THREADED_NP
-
-	/*
-	 * On macOS, libintl replaces setlocale() with a version that calls
-	 * CFLocaleCopyCurrent() when its second argument is "" and every relevant
-	 * environment variable is unset or empty.  CFLocaleCopyCurrent() makes
-	 * the process multithreaded.  The postmaster calls sigprocmask() and
-	 * calls fork() without an immediate exec(), both of which have undefined
-	 * behavior in a multithreaded program.  A multithreaded postmaster is the
-	 * normal case on Windows, which offers neither fork() nor sigprocmask().
-	 */
-	if (pthread_is_threaded_np() != 0)
-		ereport(FATAL,
-				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("postmaster became multithreaded during startup"),
-				 errhint("Set the LC_ALL environment variable to a valid locale.")));
-#endif
-
 	/*
 	 * Remember postmaster startup time
 	 */
@@ -1852,15 +1837,6 @@ ServerLoop(void)
 		if (StartWorkerNeeded || HaveCrashedWorker)
 			maybe_start_bgworkers();
 
-#ifdef HAVE_PTHREAD_IS_THREADED_NP
-
-		/*
-		 * With assertions enabled, check regularly for appearance of
-		 * additional threads.  All builds check at start and exit.
-		 */
-		Assert(pthread_is_threaded_np() == 0);
-#endif
-
 		/*
 		 * Lastly, check to see if it's time to do some things that we don't
 		 * want to do every single time through the loop, because they're a
@@ -2466,8 +2442,9 @@ processCancelRequest(Port *port, void *pkt)
 /*
  * canAcceptConnections --- check to see if database state allows connections
  * of the specified type.  backend_type can be BACKEND_TYPE_NORMAL,
- * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER.  (Note that we don't yet
- * know whether a NORMAL connection might turn into a walsender.)
+ * BACKEND_TYPE_AUTOVAC, BACKEND_TYPE_BGWORKER or BACKEND_TYPE_SYSTEM_BGWORKER.
+ * (Note that we don't yet know whether a NORMAL connection might turn into
+ * a walsender.)
  */
 static CAC_state
 canAcceptConnections(int backend_type)
@@ -2481,7 +2458,8 @@ canAcceptConnections(int backend_type)
 	 * bgworker_should_start_now() decided whether the DB state allows them.
 	 */
 	if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
-		backend_type != BACKEND_TYPE_BGWORKER)
+		backend_type != BACKEND_TYPE_BGWORKER &&
+		backend_type != BACKEND_TYPE_SYSTEM_BGWORKER)
 	{
 		if (Shutdown > NoShutdown)
 			return CAC_SHUTDOWN;	/* shutdown is pending */
@@ -3160,6 +3138,13 @@ process_pm_child_exit(void)
 				if (PgArchPID != 0)
 					signal_child(PgArchPID, SIGUSR2);
 
+				/*
+				 * Terminate system background workers since checpoint is
+				 * complete.
+				 */
+				SignalSomeChildren(SIGTERM,
+								   BACKEND_TYPE_SYSTEM_BGWORKER);
+
 				/*
 				 * Waken walsenders for the last time. No regular backends
 				 * should be around anymore.
@@ -3561,7 +3546,8 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
 			 * Background workers were already processed above; ignore them
 			 * here.
 			 */
-			if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
+			if (bp->bkend_type == BACKEND_TYPE_BGWORKER ||
+				bp->bkend_type == BACKEND_TYPE_SYSTEM_BGWORKER)
 				continue;
 
 			if (take_action)
@@ -3740,7 +3726,7 @@ PostmasterStateMachine(void)
 
 		/* Signal all backend children except walsenders */
 		SignalSomeChildren(SIGTERM,
-						   BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND);
+						   BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND - BACKEND_TYPE_SYSTEM_BGWORKER);
 		/* and the autovac launcher too */
 		if (AutoVacPID != 0)
 			signal_child(AutoVacPID, SIGTERM);
@@ -3778,7 +3764,7 @@ PostmasterStateMachine(void)
 		 * and archiver are also disregarded, they will be terminated later
 		 * after writing the checkpoint record.
 		 */
-		if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 &&
+		if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND - BACKEND_TYPE_SYSTEM_BGWORKER) == 0 &&
 			StartupPID == 0 &&
 			WalReceiverPID == 0 &&
 			BgWriterPID == 0 &&
@@ -5045,21 +5031,6 @@ SubPostmasterMain(int argc, char *argv[])
 static void
 ExitPostmaster(int status)
 {
-#ifdef HAVE_PTHREAD_IS_THREADED_NP
-
-	/*
-	 * There is no known cause for a postmaster to become multithreaded after
-	 * startup.  Recheck to account for the possibility of unknown causes.
-	 * This message uses LOG level, because an unclean shutdown at this point
-	 * would usually not look much different from a clean shutdown.
-	 */
-	if (pthread_is_threaded_np() != 0)
-		ereport(LOG,
-				(errcode(ERRCODE_INTERNAL_ERROR),
-				 errmsg_internal("postmaster became multithreaded"),
-				 errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
-#endif
-
 	/* should cleanup shared memory and kill all backends */
 
 	/*
@@ -5788,16 +5759,20 @@ do_start_bgworker(RegisteredBgWorker *rw)
  * specified start_time?
  */
 static bool
-bgworker_should_start_now(BgWorkerStartTime start_time)
+bgworker_should_start_now(BgWorkerStartTime start_time, int flags)
 {
 	switch (pmState)
 	{
 		case PM_NO_CHILDREN:
 		case PM_WAIT_DEAD_END:
 		case PM_SHUTDOWN_2:
+			break;
+
 		case PM_SHUTDOWN:
 		case PM_WAIT_BACKENDS:
 		case PM_STOP_BACKENDS:
+			if (flags & BGWORKER_CLASS_SYSTEM)
+				return true;
 			break;
 
 		case PM_RUN:
@@ -5872,7 +5847,10 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
 
 	bn->cancel_key = MyCancelKey;
 	bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
-	bn->bkend_type = BACKEND_TYPE_BGWORKER;
+	if (rw->rw_worker.bgw_flags & BGWORKER_CLASS_SYSTEM)
+		bn->bkend_type = BACKEND_TYPE_SYSTEM_BGWORKER;
+	else
+		bn->bkend_type = BACKEND_TYPE_BGWORKER;
 	bn->dead_end = false;
 	bn->bgworker_notify = false;
 
@@ -5970,7 +5948,8 @@ maybe_start_bgworkers(void)
 			}
 		}
 
-		if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
+		if (bgworker_should_start_now(rw->rw_worker.bgw_start_time,
+									  rw->rw_worker.bgw_flags))
 		{
 			/* reset crash time before trying to start worker */
 			rw->rw_crashed_at = 0;
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 0e7de26bc28..ce79e4f8f43 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -79,6 +79,8 @@ static volatile sig_atomic_t startup_progress_timer_expired = false;
  */
 int			log_startup_progress_interval = 10000;	/* 10 sec */
 
+HandleStartupProcInterrupts_hook_type HandleStartupProcInterrupts_hook = NULL;
+
 /* Signal handlers */
 static void StartupProcTriggerHandler(SIGNAL_ARGS);
 static void StartupProcSigHupHandler(SIGNAL_ARGS);
@@ -186,6 +188,9 @@ HandleStartupProcInterrupts(void)
 	static uint32 postmaster_poll_count = 0;
 #endif
 
+	if (HandleStartupProcInterrupts_hook)
+		HandleStartupProcInterrupts_hook();
+
 	/*
 	 * Process any requests or signals received recently.
 	 */
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 266fbc23399..4e8a9573006 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -161,6 +161,7 @@ WalWriterMain(void)
 		 * about in walwriter, but we do have LWLocks, and perhaps buffers?
 		 */
 		LWLockReleaseAll();
+		CustomErrorCleanup();
 		ConditionVariableCancelSleep();
 		pgstat_report_wait_end();
 		UnlockBuffers();
diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c
index 504f94d4a77..03f9a54f587 100644
--- a/src/backend/replication/logical/proto.c
+++ b/src/backend/replication/logical/proto.c
@@ -814,7 +814,7 @@ logicalrep_write_tuple(StringInfo out, Relation rel, TupleTableSlot *slot,
 			continue;
 		}
 
-		if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i]))
+		if (att->attlen == -1 && (VARATT_IS_EXTERNAL_ONDISK(values[i]) || VARATT_IS_EXTERNAL_ORIOLEDB(values[i])))
 		{
 			/*
 			 * Unchanged toasted datum.  (Note that we don't promise to detect
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 3ed2f79dd06..3cc86087fd1 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -207,6 +207,8 @@ struct SnapBuild
 	 */
 	TransactionId next_phase_at;
 
+	CSNSnapshotData csnSnapshotData;
+
 	/*
 	 * Array of transactions which could have catalog changes that committed
 	 * between xmin and xmax.
@@ -404,6 +406,17 @@ SnapBuildCurrentState(SnapBuild *builder)
 	return builder->state;
 }
 
+/*
+ * An which transaction id the next phase of initial snapshot building will
+ * happen?
+ */
+TransactionId
+SnapBuildNextPhaseAt(SnapBuild *builder)
+{
+	return builder->next_phase_at;
+}
+
+
 /*
  * Return the LSN at which the two-phase decoding was first enabled.
  */
@@ -551,6 +564,8 @@ SnapBuildBuildSnapshot(SnapBuild *builder)
 	snapshot->regd_count = 0;
 	snapshot->snapXactCompletionCount = 0;
 
+	snapshot->csnSnapshotData = builder->csnSnapshotData;
+
 	return snapshot;
 }
 
@@ -648,6 +663,7 @@ SnapBuildInitialSnapshot(SnapBuild *builder)
 	snap->snapshot_type = SNAPSHOT_MVCC;
 	snap->xcnt = newxcnt;
 	snap->xip = newxip;
+	snap->csnSnapshotData = builder->csnSnapshotData;
 
 	return snap;
 }
@@ -1028,6 +1044,8 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid,
 
 	TransactionId xmax = xid;
 
+	builder->csnSnapshotData.xlogptr = lsn;
+
 	/*
 	 * Transactions preceding BUILDING_SNAPSHOT will neither be decoded, nor
 	 * will they be part of a snapshot.  So we don't need to record anything.
@@ -1215,6 +1233,10 @@ SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xact
 	ReorderBufferTXN *txn;
 	TransactionId xmin;
 
+	builder->csnSnapshotData.snapshotcsn = running->csn;
+	builder->csnSnapshotData.xmin = 0;
+	builder->csnSnapshotData.xlogptr = lsn;
+
 	/*
 	 * If we're not consistent yet, inspect the record to see whether it
 	 * allows to get closer to being consistent. If we are consistent, dump
@@ -2139,3 +2161,10 @@ CheckPointSnapBuild(void)
 	}
 	FreeDir(snap_dir);
 }
+
+void
+SnapBuildUpdateCSNSnaphot(SnapBuild *builder,
+						  CSNSnapshotData *csnSnapshotData)
+{
+	builder->csnSnapshotData = *csnSnapshotData;
+}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 832b1cf7642..dfd72bf8cca 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -2430,9 +2430,8 @@ apply_handle_insert(StringInfo s)
 	/* Initialize the executor state. */
 	edata = create_edata_for_relation(rel);
 	estate = edata->estate;
-	remoteslot = ExecInitExtraTupleSlot(estate,
-										RelationGetDescr(rel->localrel),
-										&TTSOpsVirtual);
+	remoteslot = table_slot_create(rel->localrel,
+								   &estate->es_tupleTable);
 
 	/* Process and store remote tuple in the slot */
 	oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
@@ -2586,9 +2585,8 @@ apply_handle_update(StringInfo s)
 	/* Initialize the executor state. */
 	edata = create_edata_for_relation(rel);
 	estate = edata->estate;
-	remoteslot = ExecInitExtraTupleSlot(estate,
-										RelationGetDescr(rel->localrel),
-										&TTSOpsVirtual);
+	remoteslot = table_slot_create(rel->localrel,
+								   &estate->es_tupleTable);
 
 	/*
 	 * Populate updatedCols so that per-column triggers can fire, and so
@@ -2766,9 +2764,8 @@ apply_handle_delete(StringInfo s)
 	/* Initialize the executor state. */
 	edata = create_edata_for_relation(rel);
 	estate = edata->estate;
-	remoteslot = ExecInitExtraTupleSlot(estate,
-										RelationGetDescr(rel->localrel),
-										&TTSOpsVirtual);
+	remoteslot = table_slot_create(rel->localrel,
+								   &estate->es_tupleTable);
 
 	/* Build the search tuple. */
 	oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index c57c5ed8de9..18f8824d5a3 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -1315,8 +1315,8 @@ pgoutput_row_filter(Relation relation, TupleTableSlot *old_slot,
 		 * VARTAG_INDIRECT. See ReorderBufferToastReplace.
 		 */
 		if (att->attlen == -1 &&
-			VARATT_IS_EXTERNAL_ONDISK(new_slot->tts_values[i]) &&
-			!VARATT_IS_EXTERNAL_ONDISK(old_slot->tts_values[i]))
+			(VARATT_IS_EXTERNAL_ONDISK(new_slot->tts_values[i]) || VARATT_IS_EXTERNAL_ORIOLEDB(new_slot->tts_values[i])) &&
+			!(VARATT_IS_EXTERNAL_ONDISK(old_slot->tts_values[i]) || VARATT_IS_EXTERNAL_ORIOLEDB(old_slot->tts_values[i])) )
 		{
 			if (!tmp_new_slot)
 			{
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 4c53de08b9b..ce4e40bf137 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -315,6 +315,7 @@ void
 WalSndErrorCleanup(void)
 {
 	LWLockReleaseAll();
+	CustomErrorCleanup();
 	ConditionVariableCancelSleep();
 	pgstat_report_wait_end();
 
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 9cd96fd17ef..f2307c43612 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -23,6 +23,7 @@
 #include "access/relation.h"
 #include "access/sysattr.h"
 #include "access/table.h"
+#include "access/tableam.h"
 #include "catalog/dependency.h"
 #include "catalog/pg_type.h"
 #include "commands/trigger.h"
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index e066a3f888f..aa82637b1d1 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2667,6 +2667,7 @@ BufferSync(int flags)
 		BufferDesc *bufHdr = NULL;
 		CkptTsStatus *ts_stat = (CkptTsStatus *)
 			DatumGetPointer(binaryheap_first(ts_heap));
+		double progress;
 
 		buf_id = CkptBufferIds[ts_stat->index].buf_id;
 		Assert(buf_id != -1);
@@ -2721,7 +2722,10 @@ BufferSync(int flags)
 		 *
 		 * (This will check for barrier events even if it doesn't sleep.)
 		 */
-		CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
+		progress = (double) num_processed / num_to_scan;
+		progress = CheckPointProgress + progress * (1 - CheckPointProgress);
+
+		CheckpointWriteDelay(flags, progress);
 	}
 
 	/*
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 316b4fa7197..a5ada9beb54 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -309,6 +309,8 @@ static GlobalVisState GlobalVisTempRels;
  */
 static TransactionId ComputeXidHorizonsResultLastXmin;
 
+snapshot_hook_type snapshot_hook = NULL;
+
 #ifdef XIDCACHE_DEBUG
 
 /* counters for XidCache measurement */
@@ -752,6 +754,7 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
 	proc->delayChkptFlags = 0;
 
 	proc->recoveryConflictPending = false;
+	proc->lastCommittedCSN = pg_atomic_fetch_add_u64(&ShmemVariableCache->nextCommitSeqNo, 1);
 
 	/* must be cleared with xid/xmin: */
 	/* avoid unnecessarily dirtying shared cachelines */
@@ -2258,6 +2261,8 @@ GetSnapshotData(Snapshot snapshot)
 
 	if (GetSnapshotDataReuse(snapshot))
 	{
+		if (snapshot_hook)
+			snapshot_hook(snapshot);
 		LWLockRelease(ProcArrayLock);
 		return snapshot;
 	}
@@ -2439,6 +2444,9 @@ GetSnapshotData(Snapshot snapshot)
 	if (!TransactionIdIsValid(MyProc->xmin))
 		MyProc->xmin = TransactionXmin = xmin;
 
+	if (snapshot_hook)
+		snapshot_hook(snapshot);
+
 	LWLockRelease(ProcArrayLock);
 
 	/* maintain state for GlobalVis* */
@@ -2858,6 +2866,7 @@ GetRunningTransactionData(void)
 	CurrentRunningXacts->nextXid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
 	CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
 	CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
+	CurrentRunningXacts->csn = pg_atomic_read_u64(&ShmemVariableCache->nextCommitSeqNo);
 
 	Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
 	Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 3bdc5f7fb6c..1e6760a7c49 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -1355,6 +1355,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
 	xlrec.nextXid = CurrRunningXacts->nextXid;
 	xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
 	xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
+	xlrec.csn = CurrRunningXacts->csn;
 
 	/* Header */
 	XLogBeginInsert();
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index ba66e820d06..245e15f0cc5 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -648,6 +648,27 @@ GetLockMethodLocalHash(void)
 }
 #endif
 
+/*
+ * Returns true if any LOCKMODE lock with given locktag exist in LocalMethodLocalHash.
+ */
+bool
+DoLocalLockExist(const LOCKTAG *locktag)
+{
+	HASH_SEQ_STATUS scan_status;
+	LOCALLOCK* locallock;
+
+	hash_seq_init(&scan_status, LockMethodLocalHash);
+	while ((locallock = (LOCALLOCK *) hash_seq_search(&scan_status)) != NULL)
+	{
+		if (memcmp(&locallock->tag.lock, locktag, sizeof(LOCKTAG)) == 0)
+		{
+			hash_seq_term(&scan_status);
+			return true;
+		}
+	}
+	return false;
+}
+
 /*
  * LockHasWaiters -- look up 'locktag' and check if releasing this
  *		lock would wake up other processes waiting for it.
@@ -797,7 +818,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
 					bool reportMemoryError,
 					LOCALLOCK **locallockp)
 {
-	LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid;
+	LOCKMETHODID lockmethodid;
 	LockMethod	lockMethodTable;
 	LOCALLOCKTAG localtag;
 	LOCALLOCK  *locallock;
@@ -809,6 +830,15 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	LWLock	   *partitionLock;
 	bool		found_conflict;
 	bool		log_lock = false;
+	bool		no_log_lock = false;
+
+	if (locktag->locktag_lockmethodid == NO_LOG_LOCKMETHOD)
+	{
+		((LOCKTAG *)locktag)->locktag_lockmethodid = DEFAULT_LOCKMETHOD;
+		no_log_lock = true;
+	}
+
+	lockmethodid = locktag->locktag_lockmethodid;
 
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
 		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
@@ -923,7 +953,8 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	if (lockmode >= AccessExclusiveLock &&
 		locktag->locktag_type == LOCKTAG_RELATION &&
 		!RecoveryInProgress() &&
-		XLogStandbyInfoActive())
+		XLogStandbyInfoActive() &&
+		!no_log_lock)
 	{
 		LogAccessExclusiveLockPrepare();
 		log_lock = true;
@@ -1134,12 +1165,35 @@ LockAcquireExtended(const LOCKTAG *locktag,
 		 */
 		if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
 		{
+			int		i;
+
 			AbortStrongLockAcquire();
 			PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
 			LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
 			/* Should we retry ? */
 			LWLockRelease(partitionLock);
-			elog(ERROR, "LockAcquire failed");
+			/*
+			 * We've been removed from the queue without obtaining a lock.
+			 * That's OK, we're going to return LOCKACQUIRE_NOT_AVAIL, but
+			 * need to release a local lock first.
+			 */
+			locallock->nLocks--;
+			for (i = 0; i < locallock->numLockOwners; i++)
+			{
+				if (locallock->lockOwners[i].owner == owner)
+				{
+					locallock->lockOwners[i].nLocks--;
+					if (locallock->lockOwners[i].nLocks == 0)
+					{
+						ResourceOwnerForgetLock(owner, locallock);
+						locallock->lockOwners[i] = locallock->lockOwners[--locallock->numLockOwners];
+					}
+					break;
+				}
+			}
+
+			return LOCKACQUIRE_NOT_AVAIL;
+
 		}
 		PROCLOCK_PRINT("LockAcquire: granted", proclock);
 		LOCK_PRINT("LockAcquire: granted", lock, lockmode);
@@ -4641,8 +4695,8 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait)
 	LWLockRelease(&proc->fpInfoLock);
 
 	/* Time to wait. */
-	(void) LockAcquire(&tag, ShareLock, false, false);
-
+	if (LockAcquire(&tag, ShareLock, false, false) == LOCKACQUIRE_NOT_AVAIL)
+		return false;
 	LockRelease(&tag, ShareLock, false);
 	return XactLockForVirtualXact(vxid, xid, wait);
 }
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index e9e445bb216..905fccd673d 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -829,6 +829,7 @@ ProcKill(int code, Datum arg)
 	 * facility by releasing our PGPROC ...
 	 */
 	LWLockReleaseAll();
+	CustomErrorCleanup();
 
 	/* Cancel any pending condition variable sleep, too */
 	ConditionVariableCancelSleep();
@@ -940,6 +941,7 @@ AuxiliaryProcKill(int code, Datum arg)
 
 	/* Release any LW locks I am holding (see notes above) */
 	LWLockReleaseAll();
+	CustomErrorCleanup();
 
 	/* Cancel any pending condition variable sleep, too */
 	ConditionVariableCancelSleep();
@@ -1190,7 +1192,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 	 * If InHotStandby we set lock waits slightly later for clarity with other
 	 * code.
 	 */
-	if (!InHotStandby)
+	if (!InHotStandby && !InRecovery)
 	{
 		if (LockTimeout > 0)
 		{
@@ -1550,7 +1552,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 	 * already caused QueryCancelPending to become set, we want the cancel to
 	 * be reported as a lock timeout, not a user cancel.
 	 */
-	if (!InHotStandby)
+	if (!InHotStandby && !InRecovery)
 	{
 		if (LockTimeout > 0)
 		{
diff --git a/src/backend/utils/adt/amutils.c b/src/backend/utils/adt/amutils.c
index 48852bf79e2..265fcfc86c4 100644
--- a/src/backend/utils/adt/amutils.c
+++ b/src/backend/utils/adt/amutils.c
@@ -195,7 +195,7 @@ indexam_property(FunctionCallInfo fcinfo,
 	/*
 	 * Get AM information.  If we don't have a valid AM OID, return NULL.
 	 */
-	routine = GetIndexAmRoutineByAmId(amoid, true);
+	routine = GetIndexAmRoutineByAmId(index_oid, amoid, true);
 	if (routine == NULL)
 		PG_RETURN_NULL();
 
@@ -455,7 +455,7 @@ pg_indexam_progress_phasename(PG_FUNCTION_ARGS)
 	IndexAmRoutine *routine;
 	char	   *name;
 
-	routine = GetIndexAmRoutineByAmId(amoid, true);
+	routine = GetIndexAmRoutineByAmId(InvalidOid, amoid, true);
 	if (routine == NULL || !routine->ambuildphasename)
 		PG_RETURN_NULL();
 
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index f9b9590997b..6f7bcc4394c 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -18,8 +18,11 @@
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "storage/predicate_internals.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
+#include "utils/wait_event.h"
 
 
 /*
@@ -614,6 +617,7 @@ pg_safe_snapshot_blocking_pids(PG_FUNCTION_ARGS)
 Datum
 pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS)
 {
+	PGPROC     *blocked_proc;
 	int			blocked_pid = PG_GETARG_INT32(0);
 	ArrayType  *interesting_pids_a = PG_GETARG_ARRAYTYPE_P(1);
 	ArrayType  *blocking_pids_a;
@@ -674,6 +678,10 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS)
 	if (GetSafeSnapshotBlockingPids(blocked_pid, &dummy, 1) > 0)
 		PG_RETURN_BOOL(true);
 
+	blocked_proc = BackendPidGetProc(blocked_pid);
+	if ((blocked_proc->wait_event_info & 0xFF000000) == PG_WAIT_EXTENSION)
+		PG_RETURN_BOOL(true);
+
 	PG_RETURN_BOOL(false);
 }
 
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 99a21f20b9f..d903a746b36 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -134,6 +134,7 @@ typedef struct
 
 static HTAB *collation_cache = NULL;
 
+pg_newlocale_from_collation_hook_type pg_newlocale_from_collation_hook = NULL;
 
 #if defined(WIN32) && defined(LC_MESSAGES)
 static char *IsoLocaleName(const char *);
@@ -1600,6 +1601,7 @@ pg_newlocale_from_collation(Oid collid)
 		{
 			char	   *actual_versionstr;
 			char	   *collversionstr;
+			int			level = WARNING;
 
 			collversionstr = TextDatumGetCString(datum);
 
@@ -1619,8 +1621,11 @@ pg_newlocale_from_collation(Oid collid)
 								NameStr(collform->collname))));
 			}
 
+			if (pg_newlocale_from_collation_hook && pg_newlocale_from_collation_hook())
+				level = ERROR;
+
 			if (strcmp(actual_versionstr, collversionstr) != 0)
-				ereport(WARNING,
+				ereport(level,
 						(errmsg("collation \"%s\" has version mismatch",
 								NameStr(collform->collname)),
 						 errdetail("The collation in the database was created using version %s, "
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 6945d99b3d5..9abe334b563 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -247,6 +247,7 @@ RI_FKey_check(TriggerData *trigdata)
 	TupleTableSlot *newslot;
 	RI_QueryKey qkey;
 	SPIPlanPtr	qplan;
+	Relation	rel = trigdata->tg_relation;
 
 	riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger,
 									trigdata->tg_relation, false);
@@ -264,7 +265,7 @@ RI_FKey_check(TriggerData *trigdata)
 	 * and lock on the buffer to call HeapTupleSatisfiesVisibility.  Caller
 	 * should be holding pin, but not lock.
 	 */
-	if (!table_tuple_satisfies_snapshot(trigdata->tg_relation, newslot, SnapshotSelf))
+	if (!table_tuple_satisfies_snapshot(rel, newslot, SnapshotSelf))
 		return PointerGetDatum(NULL);
 
 	/*
@@ -1263,9 +1264,6 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
 {
 	const RI_ConstraintInfo *riinfo;
 	int			ri_nullcheck;
-	Datum		xminDatum;
-	TransactionId xmin;
-	bool		isnull;
 
 	/*
 	 * AfterTriggerSaveEvent() handles things such that this function is never
@@ -1333,10 +1331,7 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
 	 * this if we knew the INSERT trigger already fired, but there is no easy
 	 * way to know that.)
 	 */
-	xminDatum = slot_getsysattr(oldslot, MinTransactionIdAttributeNumber, &isnull);
-	Assert(!isnull);
-	xmin = DatumGetTransactionId(xminDatum);
-	if (TransactionIdIsCurrentTransactionId(xmin))
+	if (table_tuple_is_current(fk_rel, oldslot))
 		return true;
 
 	/* If all old and new key values are equal, no check is needed */
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index f01cc2521c8..ecae9d86420 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -1313,7 +1313,7 @@ pg_get_indexdef_worker(Oid indexrelid, int colno,
 	amrec = (Form_pg_am) GETSTRUCT(ht_am);
 
 	/* Fetch the index AM's API struct */
-	amroutine = GetIndexAmRoutine(amrec->amhandler);
+	amroutine = GetIndexAmRoutineExtended(indexrelid, amrec->amhandler);
 
 	/*
 	 * Get the index expressions, if any.  (NOTE: we do not use the relcache
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index c4fcd0076ea..675c743bcc5 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -6223,12 +6223,32 @@ get_actual_variable_endpoint(Relation heapRel,
 	index_scan->xs_want_itup = true;
 	index_rescan(index_scan, scankeys, 1, NULL, 0);
 
-	/* Fetch first/next tuple in specified direction */
-	while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
+	while (true)
 	{
-		BlockNumber block = ItemPointerGetBlockNumber(tid);
+		BlockNumber block = InvalidBlockNumber;
 
-		if (!VM_ALL_VISIBLE(heapRel,
+		/* Fetch first/next tuple in specified direction */
+		if (index_scan->xs_want_rowid)
+		{
+			NullableDatum rowid;
+			rowid = index_getnext_rowid(index_scan, indexscandir);
+
+			if (rowid.isnull)
+				break;
+		}
+		else
+		{
+			tid = index_getnext_tid(index_scan, indexscandir);
+
+			if (tid == NULL)
+				break;
+
+			Assert(ItemPointerEquals(tid, &index_scan->xs_heaptid));
+			block = ItemPointerGetBlockNumber(tid);
+		}
+
+		if (!index_scan->xs_want_rowid &&
+			!VM_ALL_VISIBLE(heapRel,
 							block,
 							&vmbuffer))
 		{
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 000e81a2d96..91136f5cfbd 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -65,6 +65,10 @@
 /* Cache management header --- pointer is NULL until created */
 static CatCacheHeader *CacheHdr = NULL;
 
+SearchCatCacheInternal_hook_type SearchCatCacheInternal_hook = NULL;
+SearchCatCacheList_hook_type SearchCatCacheList_hook = NULL;
+GetCatCacheHashValue_hook_type GetCatCacheHashValue_hook = NULL;
+
 static inline HeapTuple SearchCatCacheInternal(CatCache *cache,
 											   int nkeys,
 											   Datum v1, Datum v2,
@@ -1270,6 +1274,14 @@ SearchCatCacheInternal(CatCache *cache,
 	dlist_head *bucket;
 	CatCTup    *ct;
 
+	if (SearchCatCacheInternal_hook)
+	{
+		ct = SearchCatCacheInternal_hook(cache, nkeys, v1, v2, v3, v4);
+
+		if (ct)
+			return &ct->tuple;
+	}
+
 	/* Make sure we're in an xact, even if this ends up being a cache hit */
 	Assert(IsTransactionState());
 
@@ -1555,6 +1567,11 @@ GetCatCacheHashValue(CatCache *cache,
 					 Datum v3,
 					 Datum v4)
 {
+	if (GetCatCacheHashValue_hook)
+	{
+		return GetCatCacheHashValue_hook(cache, cache->cc_nkeys,
+										 v1, v2, v3, v4);
+	}
 	/*
 	 * one-time startup overhead for each cache
 	 */
@@ -1605,6 +1622,14 @@ SearchCatCacheList(CatCache *cache,
 	MemoryContext oldcxt;
 	int			i;
 
+	if (SearchCatCacheList_hook)
+	{
+		cl = SearchCatCacheList_hook(cache, nkeys, v1, v2, v3);
+
+		if (cl)
+			return cl;
+	}
+
 	/*
 	 * one-time startup overhead for each cache
 	 */
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 0008826f67c..b7a5f0c48a0 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -252,6 +252,7 @@ int			debug_discard_caches = 0;
 
 #define MAX_SYSCACHE_CALLBACKS 64
 #define MAX_RELCACHE_CALLBACKS 10
+#define MAX_USERCACHE_CALLBACKS 10
 
 static struct SYSCACHECALLBACK
 {
@@ -273,6 +274,14 @@ static struct RELCACHECALLBACK
 
 static int	relcache_callback_count = 0;
 
+static struct USERCACHECALLBACK
+{
+	UsercacheCallbackFunction function;
+	Datum		arg;
+}			usercache_callback_list[MAX_RELCACHE_CALLBACKS];
+
+static int	usercache_callback_count = 0;
+
 /* ----------------------------------------------------------------
  *				Invalidation subgroup support functions
  * ----------------------------------------------------------------
@@ -683,6 +692,19 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
 		else if (msg->sn.dbId == MyDatabaseId)
 			InvalidateCatalogSnapshot();
 	}
+	else if (msg->id == SHAREDINVALUSERCACHE_ID)
+	{
+		int			i;
+		for (i = 0; i < usercache_callback_count; i++)
+		{
+			struct USERCACHECALLBACK *ccitem = usercache_callback_list + i;
+
+			ccitem->function(ccitem->arg,
+							 msg->usr.arg1,
+							 msg->usr.arg2,
+							 msg->usr.arg3);
+		}
+	}
 	else
 		elog(FATAL, "unrecognized SI message ID: %d", msg->id);
 }
@@ -726,6 +748,17 @@ InvalidateSystemCachesExtended(bool debug_discard)
 
 		ccitem->function(ccitem->arg, InvalidOid);
 	}
+
+	for (i = 0; i < usercache_callback_count; i++)
+	{
+		struct USERCACHECALLBACK *ccitem = usercache_callback_list + i;
+
+		ccitem->function(ccitem->arg,
+						 InvalidOid,
+						 InvalidOid,
+						 InvalidOid);
+	}
+
 }
 
 
@@ -1432,6 +1465,25 @@ CacheInvalidateRelcacheByRelid(Oid relid)
 	ReleaseSysCache(tup);
 }
 
+/*
+ * CacheInvalidateRelcacheByDbidRelid
+ */
+void
+CacheInvalidateRelcacheByDbidRelid(Oid dbid, Oid relid)
+{
+	SharedInvalidationMessage msg;
+
+	PrepareInvalidationState();
+
+	msg.rc.id = SHAREDINVALRELCACHE_ID;
+	msg.rc.dbId = dbid;
+	msg.rc.relId = relid;
+	/* check AddCatcacheInvalidationMessage() for an explanation */
+	VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg));
+
+	SendSharedInvalidMessages(&msg, 1);
+}
+
 
 /*
  * CacheInvalidateSmgr
@@ -1570,6 +1622,22 @@ CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
 	++relcache_callback_count;
 }
 
+/*
+ * CacheRegisterUsercacheCallback
+ */
+void
+CacheRegisterUsercacheCallback(UsercacheCallbackFunction func,
+							   Datum arg)
+{
+	if (usercache_callback_count >= MAX_USERCACHE_CALLBACKS)
+		elog(FATAL, "out of usercache_callback_list slots");
+
+	usercache_callback_list[usercache_callback_count].function = func;
+	usercache_callback_list[usercache_callback_count].arg = arg;
+
+	++usercache_callback_count;
+}
+
 /*
  * CallSyscacheCallbacks
  *
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 8e08ca1c680..18b2ebdd59f 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -34,6 +34,7 @@
 #include "access/multixact.h"
 #include "access/nbtree.h"
 #include "access/parallel.h"
+#include "access/relation.h"
 #include "access/reloptions.h"
 #include "access/sysattr.h"
 #include "access/table.h"
@@ -317,6 +318,7 @@ static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
 										  StrategyNumber numSupport);
 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
 static void unlink_initfile(const char *initfilename, int elevel);
+static void release_rd_amcache(Relation rel);
 
 
 /*
@@ -461,8 +463,9 @@ AllocateRelationDesc(Form_pg_class relp)
 static void
 RelationParseRelOptions(Relation relation, HeapTuple tuple)
 {
-	bytea	   *options;
-	amoptions_function amoptsfn;
+	bytea				   *options;
+	amoptions_function		amoptsfn;
+	const TableAmRoutine   *tableam = NULL;
 
 	relation->rd_options = NULL;
 
@@ -474,9 +477,10 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple)
 	{
 		case RELKIND_RELATION:
 		case RELKIND_TOASTVALUE:
-		case RELKIND_VIEW:
 		case RELKIND_MATVIEW:
+		case RELKIND_VIEW:
 		case RELKIND_PARTITIONED_TABLE:
+			tableam = relation->rd_tableam;
 			amoptsfn = NULL;
 			break;
 		case RELKIND_INDEX:
@@ -488,11 +492,12 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple)
 	}
 
 	/*
-	 * Fetch reloptions from tuple; have to use a hardwired descriptor because
-	 * we might not have any other for pg_class yet (consider executing this
-	 * code for pg_class itself)
-	 */
-	options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn);
+	* Fetch reloptions from tuple; have to use a hardwired descriptor because
+	* we might not have any other for pg_class yet (consider executing this
+	* code for pg_class itself)
+	*/
+	options = extractRelOptions(tuple, GetPgClassDescriptor(),
+								tableam, amoptsfn);
 
 	/*
 	 * Copy parsed data into CacheMemoryContext.  To guard against the
@@ -1399,7 +1404,7 @@ InitIndexAmRoutine(Relation relation)
 	 * Call the amhandler in current, short-lived memory context, just in case
 	 * it leaks anything (it probably won't, but let's be paranoid).
 	 */
-	tmp = GetIndexAmRoutine(relation->rd_amhandler);
+	tmp = GetIndexAmRoutineExtended(relation->rd_id, relation->rd_amhandler);
 
 	/* OK, now transfer the data into relation's rd_indexcxt. */
 	cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
@@ -2230,9 +2235,7 @@ RelationReloadIndexInfo(Relation relation)
 	RelationCloseSmgr(relation);
 
 	/* Must free any AM cached data upon relcache flush */
-	if (relation->rd_amcache)
-		pfree(relation->rd_amcache);
-	relation->rd_amcache = NULL;
+	release_rd_amcache(relation);
 
 	/*
 	 * If it's a shared index, we might be called before backend startup has
@@ -2452,8 +2455,7 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
 		pfree(relation->rd_options);
 	if (relation->rd_indextuple)
 		pfree(relation->rd_indextuple);
-	if (relation->rd_amcache)
-		pfree(relation->rd_amcache);
+	release_rd_amcache(relation);
 	if (relation->rd_fdwroutine)
 		pfree(relation->rd_fdwroutine);
 	if (relation->rd_indexcxt)
@@ -2515,9 +2517,7 @@ RelationClearRelation(Relation relation, bool rebuild)
 	RelationCloseSmgr(relation);
 
 	/* Free AM cached data, if any */
-	if (relation->rd_amcache)
-		pfree(relation->rd_amcache);
-	relation->rd_amcache = NULL;
+	release_rd_amcache(relation);
 
 	/*
 	 * Treat nailed-in system relations separately, they always need to be
@@ -6820,3 +6820,9 @@ unlink_initfile(const char *initfilename, int elevel)
 							initfilename)));
 	}
 }
+
+static void
+release_rd_amcache(Relation rel)
+{
+	table_free_rd_amcache(rel);
+}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 4e4a34bde80..a5b3e437f7c 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -696,6 +696,7 @@ static int	SysCacheSupportingRelOidSize;
 
 static int	oid_compare(const void *a, const void *b);
 
+SysCacheGetAttr_hook_type SysCacheGetAttr_hook = NULL;
 
 /*
  * InitCatalogCache - initialize the caches
@@ -1080,6 +1081,7 @@ SysCacheGetAttr(int cacheId, HeapTuple tup,
 				AttrNumber attributeNumber,
 				bool *isNull)
 {
+	TupleDesc cc_tupdesc = SysCache[cacheId]->cc_tupdesc;
 	/*
 	 * We just need to get the TupleDesc out of the cache entry, and then we
 	 * can apply heap_getattr().  Normally the cache control data is already
@@ -1089,14 +1091,18 @@ SysCacheGetAttr(int cacheId, HeapTuple tup,
 	if (cacheId < 0 || cacheId >= SysCacheSize ||
 		!PointerIsValid(SysCache[cacheId]))
 		elog(ERROR, "invalid cache ID: %d", cacheId);
-	if (!PointerIsValid(SysCache[cacheId]->cc_tupdesc))
+
+	if (!PointerIsValid(cc_tupdesc) && SysCacheGetAttr_hook)
+		cc_tupdesc = SysCacheGetAttr_hook(SysCache[cacheId]);
+	if (!PointerIsValid(cc_tupdesc))
 	{
 		InitCatCachePhase2(SysCache[cacheId], false);
 		Assert(PointerIsValid(SysCache[cacheId]->cc_tupdesc));
+		cc_tupdesc = SysCache[cacheId]->cc_tupdesc;
 	}
 
 	return heap_getattr(tup, attributeNumber,
-						SysCache[cacheId]->cc_tupdesc,
+						cc_tupdesc,
 						isNull);
 }
 
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index 608cd5e8e43..71619cf04d0 100644
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -290,6 +290,8 @@ static int32 NextRecordTypmod = 0;	/* number of entries used */
  * as identifiers, so we start the counter at INVALID_TUPLEDESC_IDENTIFIER.
  */
 static uint64 tupledesc_id_counter = INVALID_TUPLEDESC_IDENTIFIER;
+load_typcache_tupdesc_hook_type load_typcache_tupdesc_hook = NULL;
+load_enum_cache_data_hook_type load_enum_cache_data_hook = NULL;
 
 static void load_typcache_tupdesc(TypeCacheEntry *typentry);
 static void load_rangetype_info(TypeCacheEntry *typentry);
@@ -879,6 +881,12 @@ load_typcache_tupdesc(TypeCacheEntry *typentry)
 {
 	Relation	rel;
 
+	if (load_typcache_tupdesc_hook)
+	{
+		load_typcache_tupdesc_hook(typentry);
+		return;
+	}
+
 	if (!OidIsValid(typentry->typrelid))	/* should not happen */
 		elog(ERROR, "invalid typrelid for composite type %u",
 			 typentry->type_id);
@@ -2560,6 +2568,12 @@ load_enum_cache_data(TypeCacheEntry *tcache)
 	int			bm_size,
 				start_pos;
 
+	if (load_enum_cache_data_hook)
+	{
+		load_enum_cache_data_hook(tcache);
+		return;
+	}
+
 	/* Check that this is actually an enum */
 	if (tcache->typtype != TYPTYPE_ENUM)
 		ereport(ERROR,
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 7112fb00069..5badf5eaedc 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -3749,7 +3749,6 @@ write_stderr(const char *fmt,...)
 	va_end(ap);
 }
 
-
 /*
  * Write a message to STDERR using only async-signal-safe functions.  This can
  * be used to safely emit a message from a signal handler.
@@ -3802,3 +3801,12 @@ trace_recovery(int trace_level)
 
 	return trace_level;
 }
+
+CustomErrorCleanupHookType CustomErrorCleanupHook = NULL;
+
+void
+CustomErrorCleanup(void)
+{
+	if (CustomErrorCleanupHook)
+		CustomErrorCleanupHook();
+}
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index 9208c31fe06..85811af84ff 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -72,7 +72,7 @@ extern Datum fmgr_security_definer(PG_FUNCTION_ARGS);
  * or name, but search by Oid is much faster.
  */
 
-static const FmgrBuiltin *
+const FmgrBuiltin *
 fmgr_isbuiltin(Oid id)
 {
 	uint16		index;
@@ -97,7 +97,7 @@ fmgr_isbuiltin(Oid id)
  * the array with the same name, but they should all point to the same
  * routine.
  */
-static const FmgrBuiltin *
+const FmgrBuiltin *
 fmgr_lookupByName(const char *name)
 {
 	int			i;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index df4d15a50fb..7b89c11feb6 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -81,7 +81,7 @@ static void ClientCheckTimeoutHandler(void);
 static bool ThereIsAtLeastOneRole(void);
 static void process_startup_options(Port *port, bool am_superuser);
 static void process_settings(Oid databaseid, Oid roleid);
-
+base_init_startup_hook_type base_init_startup_hook = NULL;
 
 /*** InitPostgres support ***/
 
@@ -641,6 +641,9 @@ BaseInit(void)
 	 */
 	InitFileAccess();
 
+	if (base_init_startup_hook)
+		base_init_startup_hook();
+
 	/*
 	 * Initialize statistics reporting. This needs to happen early to ensure
 	 * that pgstat's shutdown callback runs after the shutdown callbacks of
diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index f60633df241..120db339150 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -1100,6 +1100,36 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
 	}
 }
 
+/*
+ * Same as tuplestore_gettupleslot(), but foces tuple storage to slot.  Thus,
+ * it can work with slot types different than minimal tuple.
+ */
+bool
+tuplestore_force_gettupleslot(Tuplestorestate *state, bool forward,
+							  bool copy, TupleTableSlot *slot)
+{
+	MinimalTuple tuple;
+	bool		should_free;
+
+	tuple = (MinimalTuple) tuplestore_gettuple(state, forward, &should_free);
+
+	if (tuple)
+	{
+		if (copy && !should_free)
+		{
+			tuple = heap_copy_minimal_tuple(tuple);
+			should_free = true;
+		}
+		ExecForceStoreMinimalTuple(tuple, slot, should_free);
+		return true;
+	}
+	else
+	{
+		ExecClearTuple(slot);
+		return false;
+	}
+}
+
 /*
  * tuplestore_advance - exported function to adjust position without fetching
  *
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 3a419e348fa..283255cdaad 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -116,6 +116,10 @@ TransactionId RecentXmin = FirstNormalTransactionId;
 /* (table, ctid) => (cmin, cmax) mapping during timetravel */
 static HTAB *tuplecid_data = NULL;
 
+snapshot_hook_type snapshot_register_hook = NULL;
+snapshot_hook_type snapshot_deregister_hook = NULL;
+reset_xmin_hook_type reset_xmin_hook = NULL;
+
 /*
  * Elements of the active snapshot stack.
  *
@@ -192,6 +196,11 @@ typedef struct SerializedSnapshotData
 	CommandId	curcid;
 	TimestampTz whenTaken;
 	XLogRecPtr	lsn;
+	CSNSnapshotData	csnSnapshotData;
+	uint64		undoRegularLocation;
+	uint64		undoRegularXmin;
+	uint64		undoSystemLocation;
+	uint64		undoSystemXmin;
 } SerializedSnapshotData;
 
 Size
@@ -298,6 +307,8 @@ GetTransactionSnapshot(void)
 			/* Mark it as "registered" in FirstXactSnapshot */
 			FirstXactSnapshot->regd_count++;
 			pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
+			if (snapshot_register_hook)
+				snapshot_register_hook(FirstXactSnapshot);
 		}
 		else
 			CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
@@ -438,6 +449,8 @@ GetNonHistoricCatalogSnapshot(Oid relid)
 		 * CatalogSnapshot pointer is already valid.
 		 */
 		pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
+		if (snapshot_register_hook)
+			snapshot_register_hook(CatalogSnapshot);
 	}
 
 	return CatalogSnapshot;
@@ -459,6 +472,8 @@ InvalidateCatalogSnapshot(void)
 	if (CatalogSnapshot)
 	{
 		pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
+		if (snapshot_deregister_hook)
+			snapshot_deregister_hook(CatalogSnapshot);
 		CatalogSnapshot = NULL;
 		SnapshotResetXmin();
 	}
@@ -536,6 +551,7 @@ SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
 	CurrentSnapshot->xmin = sourcesnap->xmin;
 	CurrentSnapshot->xmax = sourcesnap->xmax;
 	CurrentSnapshot->xcnt = sourcesnap->xcnt;
+	CurrentSnapshot->csnSnapshotData = sourcesnap->csnSnapshotData;
 	Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
 	if (sourcesnap->xcnt > 0)
 		memcpy(CurrentSnapshot->xip, sourcesnap->xip,
@@ -593,6 +609,8 @@ SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
 		/* Mark it as "registered" in FirstXactSnapshot */
 		FirstXactSnapshot->regd_count++;
 		pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
+		if (snapshot_register_hook)
+			snapshot_register_hook(FirstXactSnapshot);
 	}
 
 	FirstSnapshotSet = true;
@@ -855,7 +873,11 @@ RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
 	ResourceOwnerRememberSnapshot(owner, snap);
 
 	if (snap->regd_count == 1)
+	{
 		pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
+		if (snapshot_register_hook)
+			snapshot_register_hook(snap);
+	}
 
 	return snap;
 }
@@ -893,7 +915,11 @@ UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
 
 	snapshot->regd_count--;
 	if (snapshot->regd_count == 0)
+	{
 		pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
+		if (snapshot_deregister_hook)
+			snapshot_deregister_hook(snapshot);
+	}
 
 	if (snapshot->regd_count == 0 && snapshot->active_count == 0)
 	{
@@ -945,6 +971,9 @@ SnapshotResetXmin(void)
 {
 	Snapshot	minSnapshot;
 
+	if (reset_xmin_hook)
+		reset_xmin_hook();
+
 	if (ActiveSnapshot != NULL)
 		return;
 
@@ -1038,6 +1067,8 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin)
 		Assert(FirstXactSnapshot->regd_count > 0);
 		Assert(!pairingheap_is_empty(&RegisteredSnapshots));
 		pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
+		if (snapshot_deregister_hook)
+			snapshot_deregister_hook(FirstXactSnapshot);
 	}
 	FirstXactSnapshot = NULL;
 
@@ -1069,6 +1100,8 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin)
 
 			pairingheap_remove(&RegisteredSnapshots,
 							   &esnap->snapshot->ph_node);
+			if (snapshot_deregister_hook)
+				snapshot_deregister_hook(esnap->snapshot);
 		}
 
 		exportedSnapshots = NIL;
@@ -1196,6 +1229,8 @@ ExportSnapshot(Snapshot snapshot)
 
 	snapshot->regd_count++;
 	pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
+	if (snapshot_register_hook)
+		snapshot_register_hook(snapshot);
 
 	/*
 	 * Fill buf with a text serialization of the snapshot, plus identification
@@ -2160,6 +2195,13 @@ SerializeSnapshot(Snapshot snapshot, char *start_address)
 	serialized_snapshot.curcid = snapshot->curcid;
 	serialized_snapshot.whenTaken = snapshot->whenTaken;
 	serialized_snapshot.lsn = snapshot->lsn;
+	serialized_snapshot.csnSnapshotData.xmin = snapshot->csnSnapshotData.xmin;
+	serialized_snapshot.csnSnapshotData.snapshotcsn = snapshot->csnSnapshotData.snapshotcsn;
+	serialized_snapshot.csnSnapshotData.xlogptr = snapshot->csnSnapshotData.xlogptr;
+	serialized_snapshot.undoRegularXmin = snapshot->undoRegularLocationPhNode.xmin;
+	serialized_snapshot.undoRegularLocation = snapshot->undoRegularLocationPhNode.undoLocation;
+	serialized_snapshot.undoSystemXmin = snapshot->undoSystemLocationPhNode.xmin;
+	serialized_snapshot.undoSystemLocation = snapshot->undoSystemLocationPhNode.undoLocation;
 
 	/*
 	 * Ignore the SubXID array if it has overflowed, unless the snapshot was
@@ -2235,6 +2277,13 @@ RestoreSnapshot(char *start_address)
 	snapshot->whenTaken = serialized_snapshot.whenTaken;
 	snapshot->lsn = serialized_snapshot.lsn;
 	snapshot->snapXactCompletionCount = 0;
+	snapshot->csnSnapshotData.xmin = serialized_snapshot.csnSnapshotData.xmin;
+	snapshot->csnSnapshotData.snapshotcsn = serialized_snapshot.csnSnapshotData.snapshotcsn;
+	snapshot->csnSnapshotData.xlogptr = serialized_snapshot.csnSnapshotData.xlogptr;
+	snapshot->undoRegularLocationPhNode.xmin = serialized_snapshot.undoRegularXmin;
+	snapshot->undoRegularLocationPhNode.undoLocation = serialized_snapshot.undoRegularLocation;
+	snapshot->undoSystemLocationPhNode.xmin = serialized_snapshot.undoSystemXmin;
+	snapshot->undoSystemLocationPhNode.undoLocation = serialized_snapshot.undoSystemLocation;
 
 	/* Copy XIDs, if present. */
 	if (serialized_snapshot.xcnt > 0)
diff --git a/src/bin/pg_rewind/Makefile b/src/bin/pg_rewind/Makefile
index bed05f1609c..5ff8163b841 100644
--- a/src/bin/pg_rewind/Makefile
+++ b/src/bin/pg_rewind/Makefile
@@ -21,6 +21,7 @@ LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
 OBJS = \
 	$(WIN32RES) \
 	datapagemap.o \
+	extension.o \
 	file_ops.o \
 	filemap.o \
 	libpq_source.o \
@@ -35,19 +36,21 @@ EXTRA_CLEAN = xlogreader.c
 all: pg_rewind
 
 pg_rewind: $(OBJS) | submake-libpq submake-libpgport
-	$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+	$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LDFLAGS_EX_BE) $(LIBS) -o $@$(X)
 
 xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
 	rm -f $@ && $(LN_S) $< .
 
 install: all installdirs
 	$(INSTALL_PROGRAM) pg_rewind$(X) '$(DESTDIR)$(bindir)/pg_rewind$(X)'
+	$(INSTALL_DATA) $(srcdir)/pg_rewind_ext.h '$(DESTDIR)$(includedir)'
 
 installdirs:
-	$(MKDIR_P) '$(DESTDIR)$(bindir)'
+	$(MKDIR_P) '$(DESTDIR)$(bindir)' '$(DESTDIR)$(includedir)'
 
 uninstall:
 	rm -f '$(DESTDIR)$(bindir)/pg_rewind$(X)'
+	rm -f '$(DESTDIR)$(includedir)/pg_rewind_ext.h'
 
 clean distclean maintainer-clean:
 	rm -f pg_rewind$(X) $(OBJS) xlogreader.c
diff --git a/src/bin/pg_rewind/extension.c b/src/bin/pg_rewind/extension.c
new file mode 100644
index 00000000000..29ec4b5a6f6
--- /dev/null
+++ b/src/bin/pg_rewind/extension.c
@@ -0,0 +1,132 @@
+/*-------------------------------------------------------------------------
+ *
+ * extension.c
+ *	  Functions for processing shared libraries loaded by pg_rewind.
+ *
+ * Copyright (c) 2013-2023, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#ifndef WIN32
+#include <dlfcn.h>
+
+/*
+ * On macOS, <dlfcn.h> insists on including <stdbool.h>.  If we're not
+ * using stdbool, undef bool to undo the damage.
+ */
+#ifndef PG_USE_STDBOOL
+#ifdef bool
+#undef bool
+#endif
+#endif
+#endif							/* !WIN32 */
+
+#include <sys/stat.h>
+
+#include "access/xlog_internal.h"
+#include "pg_rewind.h"
+
+/* signature for pg_rewind extension library rewind function */
+typedef void (*PG_rewind_t) (const char *datadir_target, char *datadir_source,
+							 char *connstr_source, XLogRecPtr startpoint,
+							 int tliIndex, XLogRecPtr endpoint,
+							 const char *restoreCommand, const char *argv0,
+							 bool debug);
+
+static bool
+file_exists(const char *argv0, const char *name)
+{
+	struct stat st;
+
+	Assert(name != NULL);
+
+	if (stat(name, &st) == 0)
+		return !S_ISDIR(st.st_mode);
+	else if (!(errno == ENOENT || errno == ENOTDIR || errno == EACCES))
+	{
+		const char *progname;
+
+		progname = get_progname(argv0);
+		pg_log_error("could not access file \"%s\": %m", name);
+		pg_log_error_hint("Try \"%s --help\" for more information.", progname);
+		exit(1);
+	}
+
+	return false;
+}
+
+static char *
+expand_dynamic_library_name(const char *argv0, const char *name)
+{
+	char	   *full;
+	char		my_exec_path[MAXPGPATH];
+	char		pkglib_path[MAXPGPATH];
+
+	Assert(name);
+
+	if (find_my_exec(argv0, my_exec_path) < 0)
+		pg_fatal("%s: could not locate my own executable path", argv0);
+	get_pkglib_path(my_exec_path, pkglib_path);
+	full = palloc(strlen(pkglib_path) + 1 + strlen(name) + 1);
+	sprintf(full, "%s/%s", pkglib_path, name);
+	if (file_exists(argv0, full))
+		return full;
+	pfree(full);
+
+	full = palloc(strlen(pkglib_path) + 1 + strlen(name) + 1 +
+				  strlen(DLSUFFIX) + 1);
+	sprintf(full, "%s/%s%s", pkglib_path, name, DLSUFFIX);
+	if (file_exists(argv0, full))
+		return full;
+	pfree(full);
+
+	return pstrdup(name);
+}
+
+void
+process_extensions(SimpleStringList *extensions, const char *datadir_target,
+				   char *datadir_source, char *connstr_source,
+				   XLogRecPtr startpoint, int tliIndex, XLogRecPtr endpoint,
+				   const char *restoreCommand, const char *argv0,
+				   bool debug)
+{
+	SimpleStringListCell *cell;
+
+	if (extensions->head == NULL)
+		return;					/* nothing to do */
+
+	for (cell = extensions->head; cell; cell = cell->next)
+	{
+		char	   *filename = cell->val;
+		char	   *fullname;
+		void	   *lib_handle;
+		PG_rewind_t PG_rewind;
+		char	   *load_error;
+
+		fullname = expand_dynamic_library_name(argv0, filename);
+
+		lib_handle = dlopen(fullname, RTLD_NOW | RTLD_GLOBAL);
+		if (lib_handle == NULL)
+		{
+			load_error = dlerror();
+			pg_fatal("could not load library \"%s\": %s", fullname, load_error);
+		}
+
+		PG_rewind = dlsym(lib_handle, "_PG_rewind");
+
+		if (PG_rewind == NULL)
+			pg_fatal("could not find function \"_PG_rewind\" in \"%s\"",
+					 fullname);
+		pfree(fullname);
+
+		if (showprogress)
+			pg_log_info("performing rewind for '%s' extension", filename);
+		PG_rewind(datadir_target, datadir_source, connstr_source, startpoint,
+				  tliIndex, endpoint, restoreCommand, argv0, debug);
+
+		pg_log_debug("loaded library \"%s\"", filename);
+	}
+}
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 435742d20d1..a936c3d3586 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -54,6 +54,7 @@ static uint32 hash_string_pointer(const char *s);
 #define FILEHASH_INITIAL_SIZE	1000
 
 static filehash_hash *filehash;
+static SimpleStringList extensions_exclude = {NULL, NULL};
 
 static bool isRelDataFile(const char *path);
 static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
@@ -261,6 +262,8 @@ process_target_file(const char *path, file_type_t type, size_t size,
 	 * from the target data folder all paths which have been filtered out from
 	 * the source data folder when processing the source files.
 	 */
+	if (check_file_excluded(path, false))
+		return;
 
 	/*
 	 * Like in process_source_file, pretend that pg_wal is always a directory.
@@ -405,6 +408,31 @@ check_file_excluded(const char *path, bool is_source)
 		}
 	}
 
+	/*
+	 * Exclude extensions directories
+	 */
+	if (extensions_exclude.head != NULL)
+	{
+		SimpleStringListCell *cell;
+
+		for (cell = extensions_exclude.head; cell; cell = cell->next)
+		{
+			char	   *exclude_dir = cell->val;
+
+			snprintf(localpath, sizeof(localpath), "%s/", exclude_dir);
+			if (strstr(path, localpath) == path)
+			{
+				if (is_source)
+					pg_log_debug("entry \"%s\" excluded from source file list",
+								 path);
+				else
+					pg_log_debug("entry \"%s\" excluded from target file list",
+								 path);
+				return true;
+			}
+		}
+	}
+
 	return false;
 }
 
@@ -822,7 +850,6 @@ decide_file_actions(void)
 	return filemap;
 }
 
-
 /*
  * Helper function for filemap hash table.
  */
@@ -833,3 +860,15 @@ hash_string_pointer(const char *s)
 
 	return hash_bytes(ss, strlen(s));
 }
+
+void
+extensions_exclude_add(char **exclude_dirs)
+{
+	int			i;
+
+	for (i = 0; exclude_dirs[i] != NULL; i++)
+	{
+		simple_string_list_append(&extensions_exclude,
+								  pstrdup(exclude_dirs[i]));
+	}
+}
diff --git a/src/bin/pg_rewind/meson.build b/src/bin/pg_rewind/meson.build
index fd22818be4d..36e9a4766f3 100644
--- a/src/bin/pg_rewind/meson.build
+++ b/src/bin/pg_rewind/meson.build
@@ -2,6 +2,7 @@
 
 pg_rewind_sources = files(
   'datapagemap.c',
+	'extension.c',
   'file_ops.c',
   'filemap.c',
   'libpq_source.c',
@@ -23,6 +24,7 @@ pg_rewind = executable('pg_rewind',
   pg_rewind_sources,
   dependencies: [frontend_code, libpq, lz4, zstd],
   c_args: ['-DFRONTEND'], # needed for xlogreader et al
+  export_dynamic: true,
   kwargs: default_bin_args,
 )
 bin_targets += pg_rewind
@@ -48,3 +50,7 @@ tests += {
 }
 
 subdir('po', if_found: libintl)
+
+install_headers(
+  'pg_rewind_ext.h'
+)
\ No newline at end of file
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 27782237d05..f8202d298e4 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -38,7 +38,7 @@ static const char *RmgrNames[RM_MAX_ID + 1] = {
 #define RmgrName(rmid) (((rmid) <= RM_MAX_BUILTIN_ID) ? \
 						RmgrNames[rmid] : "custom")
 
-static void extractPageInfo(XLogReaderState *record);
+static void extractPageInfo(XLogReaderState *record, void *arg);
 
 static int	xlogreadfd = -1;
 static XLogSegNo xlogreadsegno = 0;
@@ -54,17 +54,11 @@ static int	SimpleXLogPageRead(XLogReaderState *xlogreader,
 							   XLogRecPtr targetPagePtr,
 							   int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
 
-/*
- * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
- * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
- * the data blocks touched by the WAL records, and return them in a page map.
- *
- * 'endpoint' is the end of the last record to read. The record starting at
- * 'endpoint' is the first one that is not read.
- */
 void
-extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
-			   XLogRecPtr endpoint, const char *restoreCommand)
+SimpleXLogRead(const char *datadir, XLogRecPtr startpoint, int tliIndex,
+			   XLogRecPtr endpoint, const char *restoreCommand,
+			   void (*page_callback) (XLogReaderState *, void *arg),
+			   void *arg)
 {
 	XLogRecord *record;
 	XLogReaderState *xlogreader;
@@ -97,7 +91,7 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
 						 LSN_FORMAT_ARGS(errptr));
 		}
 
-		extractPageInfo(xlogreader);
+		page_callback(xlogreader, arg);
 	} while (xlogreader->EndRecPtr < endpoint);
 
 	/*
@@ -116,6 +110,22 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
 	}
 }
 
+/*
+ * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
+ * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
+ * the data blocks touched by the WAL records, and return them in a page map.
+ *
+ * 'endpoint' is the end of the last record to read. The record starting at
+ * 'endpoint' is the first one that is not read.
+ */
+void
+extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
+			   XLogRecPtr endpoint, const char *restoreCommand)
+{
+	SimpleXLogRead(datadir, startpoint, tliIndex, endpoint, restoreCommand,
+				   extractPageInfo, NULL);
+}
+
 /*
  * Reads one WAL record. Returns the end position of the record, without
  * doing anything with the record itself.
@@ -365,7 +375,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
  * Extract information on which blocks the current record modifies.
  */
 static void
-extractPageInfo(XLogReaderState *record)
+extractPageInfo(XLogReaderState *record, void *arg)
 {
 	int			block_id;
 	RmgrId		rmid = XLogRecGetRmid(record);
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index f7f3b8227fd..d08d421bbdd 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -75,6 +75,8 @@ bool		dry_run = false;
 bool		do_sync = true;
 bool		restore_wal = false;
 
+static SimpleStringList extensions = {NULL, NULL};
+
 /* Target history */
 TimeLineHistoryEntry *targetHistory;
 int			targetNentries;
@@ -107,6 +109,7 @@ usage(const char *progname)
 			 "                                 file when running target cluster\n"));
 	printf(_("      --debug                    write a lot of debug messages\n"));
 	printf(_("      --no-ensure-shutdown       do not automatically fix unclean shutdown\n"));
+	printf(_("  -e, --extension=PATH           path to library performing rewind for extension\n"));
 	printf(_("  -V, --version                  output version information, then exit\n"));
 	printf(_("  -?, --help                     show this help, then exit\n"));
 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
@@ -131,6 +134,7 @@ main(int argc, char **argv)
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
+		{"extension", required_argument, NULL, 'e'},
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -169,7 +173,7 @@ main(int argc, char **argv)
 		}
 	}
 
-	while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "cD:nNPRe", long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
@@ -218,6 +222,10 @@ main(int argc, char **argv)
 				config_file = pg_strdup(optarg);
 				break;
 
+			case 'e':			/* -e or --extension */
+				simple_string_list_append(&extensions, optarg);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -454,6 +462,12 @@ main(int argc, char **argv)
 	/* Initialize the hash table to track the status of each file */
 	filehash_init();
 
+	if (extensions.head != NULL)
+		process_extensions(&extensions, datadir_target, datadir_source,
+						   connstr_source, chkptrec, lastcommontliIndex,
+						   target_wal_endrec, restore_command, argv[0],
+						   debug);
+
 	/*
 	 * Collect information about all files in the both data directories.
 	 */
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ef8bdc1fbb8..1d42a921246 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -14,7 +14,9 @@
 #include "access/timeline.h"
 #include "common/logging.h"
 #include "datapagemap.h"
+#include "fe_utils/simple_list.h"
 #include "libpq-fe.h"
+#include "pg_rewind_ext.h"
 #include "storage/block.h"
 #include "storage/relfilelocator.h"
 
@@ -53,4 +55,12 @@ extern TimeLineHistoryEntry *rewind_parseTimeLineHistory(char *buffer,
 														 TimeLineID targetTLI,
 														 int *nentries);
 
+/* in extension.c */
+extern void process_extensions(SimpleStringList *extensions,
+							   const char *datadir_target, char *datadir_source,
+							   char *connstr_source, XLogRecPtr startpoint,
+							   int tliIndex, XLogRecPtr endpoint,
+							   const char *restoreCommand, const char *argv0,
+							   bool debug);
+
 #endif							/* PG_REWIND_H */
diff --git a/src/bin/pg_rewind/pg_rewind_ext.h b/src/bin/pg_rewind/pg_rewind_ext.h
new file mode 100644
index 00000000000..3616d94f588
--- /dev/null
+++ b/src/bin/pg_rewind/pg_rewind_ext.h
@@ -0,0 +1,44 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_rewind_ext.h
+ *
+ *
+ * Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_REWIND_EXT_H
+#define PG_REWIND_EXT_H
+
+#include "access/xlogreader.h"
+
+/* in parsexlog.c */
+/*
+ * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
+ * index 'tliIndex' in target timeline history, until 'endpoint'.
+ * Pass all WAL records to 'page_callback'.
+ *
+ * 'endpoint' is the end of the last record to read. The record starting at
+ * 'endpoint' is the first one that is not read.
+ */
+extern void SimpleXLogRead(const char *datadir, XLogRecPtr startpoint,
+						   int tliIndex, XLogRecPtr endpoint,
+						   const char *restoreCommand,
+						   void (*page_callback) (XLogReaderState *,
+												  void *arg),
+						   void *arg);
+
+
+/* in filemap.c */
+/* Add NULL-terminated list of dirs that pg_rewind can skip copying */
+extern void extensions_exclude_add(char **exclude_dirs);
+
+/* signature for pg_rewind extension library rewind function */
+extern PGDLLEXPORT void _PG_rewind(const char *datadir_target,
+								   char *datadir_source, char *connstr_source,
+								   XLogRecPtr startpoint, int tliIndex,
+								   XLogRecPtr endpoint,
+								   const char *restoreCommand,
+								   const char *argv0, bool debug);
+
+#endif							/* PG_REWIND_EXT_H */
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index c1134eae5b5..70db51c6bec 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -220,6 +220,11 @@ double		throttle_delay = 0;
  */
 int64		latency_limit = 0;
 
+/*
+ * tableam selection
+ */
+char	   *tableam = NULL;
+
 /*
  * tablespace selection
  */
@@ -890,6 +895,7 @@ usage(void)
 		   "  --partition-method=(range|hash)\n"
 		   "                           partition pgbench_accounts with this method (default: range)\n"
 		   "  --partitions=NUM         partition pgbench_accounts into NUM parts (default: 0)\n"
+		   "  --tableam=TABLEAM        create tables using the specified tableam\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tables        create tables as unlogged tables\n"
 		   "\nOptions to select what to run:\n"
@@ -4749,14 +4755,34 @@ createPartitions(PGconn *con)
 				appendPQExpBufferStr(&query, "maxvalue");
 
 			appendPQExpBufferChar(&query, ')');
+
+			if (tableam != NULL)
+			{
+				char	   *escape_tableam;
+
+				escape_tableam = PQescapeIdentifier(con, tableam, strlen(tableam));
+				appendPQExpBuffer(&query, " using %s", escape_tableam);
+				PQfreemem(escape_tableam);
+			}
 		}
 		else if (partition_method == PART_HASH)
+		{
 			printfPQExpBuffer(&query,
 							  "create%s table pgbench_accounts_%d\n"
 							  "  partition of pgbench_accounts\n"
 							  "  for values with (modulus %d, remainder %d)",
 							  unlogged_tables ? " unlogged" : "", p,
 							  partitions, p - 1);
+
+			if (tableam != NULL)
+			{
+				char	   *escape_tableam;
+
+				escape_tableam = PQescapeIdentifier(con, tableam, strlen(tableam));
+				appendPQExpBuffer(&query, " using %s", escape_tableam);
+				PQfreemem(escape_tableam);
+			}
+		}
 		else					/* cannot get there */
 			Assert(0);
 
@@ -4843,10 +4869,20 @@ initCreateTables(PGconn *con)
 		if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
 			appendPQExpBuffer(&query,
 							  " partition by %s (aid)", PARTITION_METHOD[partition_method]);
-		else if (ddl->declare_fillfactor)
+		else
 		{
+			if (tableam != NULL)
+			{
+				char	   *escape_tableam;
+
+				escape_tableam = PQescapeIdentifier(con, tableam, strlen(tableam));
+				appendPQExpBuffer(&query, " using %s", escape_tableam);
+				PQfreemem(escape_tableam);
+			}
+
 			/* fillfactor is only expected on actual tables */
-			appendPQExpBuffer(&query, " with (fillfactor=%d)", fillfactor);
+			if (ddl->declare_fillfactor)
+				appendPQExpBuffer(&query, " with (fillfactor=%d)", fillfactor);
 		}
 
 		if (tablespace != NULL)
@@ -6602,6 +6638,7 @@ main(int argc, char **argv)
 		{"failures-detailed", no_argument, NULL, 13},
 		{"max-tries", required_argument, NULL, 14},
 		{"verbose-errors", no_argument, NULL, 15},
+		{"tableam", required_argument, NULL, 16},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -6939,6 +6976,10 @@ main(int argc, char **argv)
 				benchmarking_option_set = true;
 				verbose_errors = true;
 				break;
+			case 16:			/* tableam */
+				initialization_option_set = true;
+				tableam = pg_strdup(optarg);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h
index 4476ff7fba1..73320f93be7 100644
--- a/src/include/access/amapi.h
+++ b/src/include/access/amapi.h
@@ -107,12 +107,42 @@ typedef void (*ambuildempty_function) (Relation indexRelation);
 typedef bool (*aminsert_function) (Relation indexRelation,
 								   Datum *values,
 								   bool *isnull,
-								   ItemPointer heap_tid,
+								   ItemPointer tupleid,
 								   Relation heapRelation,
 								   IndexUniqueCheck checkUnique,
 								   bool indexUnchanged,
 								   struct IndexInfo *indexInfo);
 
+/* extended version of aminsert taking Datum tupleid */
+typedef bool (*aminsert_extended_function) (Relation indexRelation,
+								   Datum *values,
+								   bool *isnull,
+								   Datum tupleid,
+								   Relation heapRelation,
+								   IndexUniqueCheck checkUnique,
+								   bool indexUnchanged,
+								   struct IndexInfo *indexInfo);
+
+/* update this tuple */
+typedef bool (*amupdate_function) (Relation indexRelation,
+								   bool new_valid,
+								   bool old_valid,
+								   Datum *values,
+								   bool *isnull,
+								   Datum tupleid,
+								   Datum *valuesOld,
+								   bool *isnullOld,
+								   Datum oldTupleid,
+								   Relation heapRelation,
+								   IndexUniqueCheck checkUnique,
+								   struct IndexInfo *indexInfo);
+/* delete this tuple */
+typedef bool (*amdelete_function) (Relation indexRelation,
+								   Datum *values, bool *isnull,
+								   Datum tupleid,
+								   Relation heapRelation,
+								   struct IndexInfo *indexInfo);
+
 /* bulk delete */
 typedef IndexBulkDeleteResult *(*ambulkdelete_function) (IndexVacuumInfo *info,
 														 IndexBulkDeleteResult *stats,
@@ -246,6 +276,8 @@ typedef struct IndexAmRoutine
 	bool		amusemaintenanceworkmem;
 	/* does AM store tuple information only at block granularity? */
 	bool		amsummarizing;
+	/* does AM can provide MVCC */
+	bool		ammvccaware;
 	/* OR of parallel vacuum flags.  See vacuum.h for flags. */
 	uint8		amparallelvacuumoptions;
 	/* type of data stored in index, or InvalidOid if variable */
@@ -261,6 +293,9 @@ typedef struct IndexAmRoutine
 	ambuild_function ambuild;
 	ambuildempty_function ambuildempty;
 	aminsert_function aminsert;
+	aminsert_extended_function aminsertextended;
+	amupdate_function amupdate;
+	amdelete_function amdelete;
 	ambulkdelete_function ambulkdelete;
 	amvacuumcleanup_function amvacuumcleanup;
 	amcanreturn_function amcanreturn;	/* can be NULL */
@@ -286,7 +321,13 @@ typedef struct IndexAmRoutine
 
 
 /* Functions in access/index/amapi.c */
+extern IndexAmRoutine *GetIndexAmRoutineWithTableAM(Oid tamoid, Oid amhandler);
 extern IndexAmRoutine *GetIndexAmRoutine(Oid amhandler);
-extern IndexAmRoutine *GetIndexAmRoutineByAmId(Oid amoid, bool noerror);
+extern IndexAmRoutine *GetIndexAmRoutineExtended(Oid indoid, Oid amhandler);
+extern IndexAmRoutine *GetIndexAmRoutineByAmId(Oid indoid, Oid amoid, bool noerror);
+
+typedef IndexAmRoutine *(*IndexAMRoutineHookType) (Oid tamoid, Oid amhandler);
+
+extern IndexAMRoutineHookType IndexAMRoutineHook;
 
 #endif							/* AMAPI_H */
diff --git a/src/include/access/brin_internal.h b/src/include/access/brin_internal.h
index 97ddc925b27..418b32d5515 100644
--- a/src/include/access/brin_internal.h
+++ b/src/include/access/brin_internal.h
@@ -92,7 +92,7 @@ extern IndexBuildResult *brinbuild(Relation heap, Relation index,
 								   struct IndexInfo *indexInfo);
 extern void brinbuildempty(Relation index);
 extern bool brininsert(Relation idxRel, Datum *values, bool *nulls,
-					   ItemPointer heaptid, Relation heapRel,
+					   Datum tupleid, Relation heapRel,
 					   IndexUniqueCheck checkUnique,
 					   bool indexUnchanged,
 					   struct IndexInfo *indexInfo);
diff --git a/src/include/access/detoast.h b/src/include/access/detoast.h
index 908e1fc6919..26ef91e23df 100644
--- a/src/include/access/detoast.h
+++ b/src/include/access/detoast.h
@@ -63,6 +63,13 @@ extern struct varlena *detoast_attr_slice(struct varlena *attr,
 										  int32 sliceoffset,
 										  int32 slicelength);
 
+/* ----------
+ * toast_decompress_datum -
+ *
+ * Decompress a compressed version of a varlena datum
+ */
+extern struct varlena *toast_decompress_datum(struct varlena *attr);
+
 /* ----------
  * toast_raw_datum_size -
  *
@@ -79,4 +86,11 @@ extern Size toast_raw_datum_size(Datum value);
  */
 extern Size toast_datum_size(Datum value);
 
+/*
+ * for in_memory module
+ */
+typedef struct varlena* (*ToastFunc) (struct varlena *attr);
+extern void register_o_detoast_func(ToastFunc func);
+extern void deregister_o_detoast_func(void);
+
 #endif							/* DETOAST_H */
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index b071cedd44b..0de79f782a5 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -144,11 +144,26 @@ extern void index_close(Relation relation, LOCKMODE lockmode);
 
 extern bool index_insert(Relation indexRelation,
 						 Datum *values, bool *isnull,
-						 ItemPointer heap_t_ctid,
+						 ItemPointer tupleid,
 						 Relation heapRelation,
 						 IndexUniqueCheck checkUnique,
 						 bool indexUnchanged,
 						 struct IndexInfo *indexInfo);
+extern bool index_update(Relation indexRelation,
+						 bool new_valid,
+						 bool old_valid,
+						 Datum *values,
+						 bool *isnull,
+						 Datum tupleid,
+						 Datum *valuesOld,
+						 bool *isnullOld,
+						 Datum oldTupleid,
+						 Relation heapRelation,
+						 IndexUniqueCheck checkUnique,
+						 struct IndexInfo *indexInfo);
+extern bool index_delete(Relation indexRelation, Datum *values, bool *isnull,
+						 Datum tupleid, Relation heapRelation,
+						 struct IndexInfo *indexInfo);
 
 extern IndexScanDesc index_beginscan(Relation heapRelation,
 									 Relation indexRelation,
@@ -173,6 +188,9 @@ extern IndexScanDesc index_beginscan_parallel(Relation heaprel,
 											  ParallelIndexScanDesc pscan);
 extern ItemPointer index_getnext_tid(IndexScanDesc scan,
 									 ScanDirection direction);
+extern NullableDatum index_getnext_rowid(IndexScanDesc scan,
+										 ScanDirection direction);
+extern Datum index_getnext_tupleid(IndexScanDesc scan, ScanDirection direction);
 struct TupleTableSlot;
 extern bool index_fetch_heap(IndexScanDesc scan, struct TupleTableSlot *slot);
 extern bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction,
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 6da64928b66..7ba1d4bc999 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -114,7 +114,7 @@ extern IndexBuildResult *ginbuild(Relation heap, Relation index,
 								  struct IndexInfo *indexInfo);
 extern void ginbuildempty(Relation index);
 extern bool gininsert(Relation index, Datum *values, bool *isnull,
-					  ItemPointer ht_ctid, Relation heapRel,
+					  Datum tupleid, Relation heapRel,
 					  IndexUniqueCheck checkUnique,
 					  bool indexUnchanged,
 					  struct IndexInfo *indexInfo);
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 3edc740a3f3..0cd19757208 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -401,7 +401,7 @@ typedef struct GiSTOptions
 /* gist.c */
 extern void gistbuildempty(Relation index);
 extern bool gistinsert(Relation r, Datum *values, bool *isnull,
-					   ItemPointer ht_ctid, Relation heapRel,
+					   Datum tupleid, Relation heapRel,
 					   IndexUniqueCheck checkUnique,
 					   bool indexUnchanged,
 					   struct IndexInfo *indexInfo);
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 9e035270a16..14fb8e4ce1e 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -364,7 +364,7 @@ extern IndexBuildResult *hashbuild(Relation heap, Relation index,
 								   struct IndexInfo *indexInfo);
 extern void hashbuildempty(Relation index);
 extern bool hashinsert(Relation rel, Datum *values, bool *isnull,
-					   ItemPointer ht_ctid, Relation heapRel,
+					   Datum tupleid, Relation heapRel,
 					   IndexUniqueCheck checkUnique,
 					   bool indexUnchanged,
 					   struct IndexInfo *indexInfo);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index c7278219b24..72b4f8e7634 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -242,19 +242,22 @@ extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
 							  int ntuples, CommandId cid, int options,
 							  BulkInsertState bistate);
 extern TM_Result heap_delete(Relation relation, ItemPointer tid,
-							 CommandId cid, Snapshot crosscheck, bool wait,
-							 struct TM_FailureData *tmfd, bool changingPart);
+							 CommandId cid, Snapshot crosscheck, int options,
+							 struct TM_FailureData *tmfd, bool changingPart,
+							 TupleTableSlot *oldSlot);
 extern void heap_finish_speculative(Relation relation, ItemPointer tid);
 extern void heap_abort_speculative(Relation relation, ItemPointer tid);
 extern TM_Result heap_update(Relation relation, ItemPointer otid,
 							 HeapTuple newtup,
-							 CommandId cid, Snapshot crosscheck, bool wait,
+							 CommandId cid, Snapshot crosscheck, int options,
 							 struct TM_FailureData *tmfd, LockTupleMode *lockmode,
-							 TU_UpdateIndexes *update_indexes);
-extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
-								 CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
-								 bool follow_updates,
-								 Buffer *buffer, struct TM_FailureData *tmfd);
+							 TU_UpdateIndexes *update_indexes,
+							 TupleTableSlot *oldSlot);
+extern TM_Result heap_lock_tuple(Relation relation, ItemPointer tid,
+								 TupleTableSlot *slot,
+								 CommandId cid, LockTupleMode mode,
+								 LockWaitPolicy wait_policy, bool follow_updates,
+								 struct TM_FailureData *tmfd);
 
 extern void heap_inplace_update(Relation relation, HeapTuple tuple);
 extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 9020abebc92..3f36ea455aa 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1128,7 +1128,7 @@ typedef struct BTOptions
  */
 extern void btbuildempty(Relation index);
 extern bool btinsert(Relation rel, Datum *values, bool *isnull,
-					 ItemPointer ht_ctid, Relation heapRel,
+					 Datum tupleid, Relation heapRel,
 					 IndexUniqueCheck checkUnique,
 					 bool indexUnchanged,
 					 struct IndexInfo *indexInfo);
diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h
index 1d5bfa62ffc..4812bc4481d 100644
--- a/src/include/access/reloptions.h
+++ b/src/include/access/reloptions.h
@@ -21,6 +21,7 @@
 
 #include "access/amapi.h"
 #include "access/htup.h"
+#include "access/tableam.h"
 #include "access/tupdesc.h"
 #include "nodes/pg_list.h"
 #include "storage/lock.h"
@@ -224,6 +225,7 @@ extern Datum transformRelOptions(Datum oldOptions, List *defList,
 								 bool acceptOidsOff, bool isReset);
 extern List *untransformRelOptions(Datum options);
 extern bytea *extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
+								const TableAmRoutine *tableam,
 								amoptions_function amoptions);
 extern void *build_reloptions(Datum reloptions, bool validate,
 							  relopt_kind kind,
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index d03360eac04..ea0913ce6f2 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -122,6 +122,7 @@ typedef struct IndexScanDescData
 	struct ScanKeyData *keyData;	/* array of index qualifier descriptors */
 	struct ScanKeyData *orderByData;	/* array of ordering op descriptors */
 	bool		xs_want_itup;	/* caller requests index tuples */
+	bool		xs_want_rowid;	/* caller requests index tuples */
 	bool		xs_temp_snap;	/* unregister snapshot at scan end? */
 
 	/* signaling to index AM about killing index tuples */
@@ -145,6 +146,7 @@ typedef struct IndexScanDescData
 	struct TupleDescData *xs_hitupdesc; /* rowtype descriptor of xs_hitup */
 
 	ItemPointerData xs_heaptid; /* result */
+	NullableDatum	xs_rowid; /* result if xs_want_rowid */
 	bool		xs_heap_continue;	/* T if must keep walking, potential
 									 * further results */
 	IndexFetchTableData *xs_heapfetch;
diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h
index fe31d32dbe9..e44d3561abf 100644
--- a/src/include/access/spgist.h
+++ b/src/include/access/spgist.h
@@ -197,7 +197,7 @@ extern IndexBuildResult *spgbuild(Relation heap, Relation index,
 								  struct IndexInfo *indexInfo);
 extern void spgbuildempty(Relation index);
 extern bool spginsert(Relation index, Datum *values, bool *isnull,
-					  ItemPointer ht_ctid, Relation heapRel,
+					  Datum tupleid, Relation heapRel,
 					  IndexUniqueCheck checkUnique,
 					  bool indexUnchanged,
 					  struct IndexInfo *indexInfo);
diff --git a/src/include/access/sysattr.h b/src/include/access/sysattr.h
index 8f08682750b..d717a7cafec 100644
--- a/src/include/access/sysattr.h
+++ b/src/include/access/sysattr.h
@@ -24,6 +24,7 @@
 #define MaxTransactionIdAttributeNumber			(-4)
 #define MaxCommandIdAttributeNumber				(-5)
 #define TableOidAttributeNumber					(-6)
-#define FirstLowInvalidHeapAttributeNumber		(-7)
+#define RowIdAttributeNumber					(-7)
+#define FirstLowInvalidHeapAttributeNumber		(-8)
 
 #endif							/* SYSATTR_H */
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 5e195fd292f..62ffc14e8f8 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -17,10 +17,14 @@
 #ifndef TABLEAM_H
 #define TABLEAM_H
 
+#include "access/amapi.h"
 #include "access/relscan.h"
 #include "access/sdir.h"
 #include "access/xact.h"
 #include "executor/tuptable.h"
+#include "nodes/execnodes.h"
+#include "storage/bufmgr.h"
+#include "utils/guc.h"
 #include "utils/rel.h"
 #include "utils/snapshot.h"
 
@@ -39,6 +43,16 @@ struct TBMIterateResult;
 struct VacuumParams;
 struct ValidateIndexState;
 
+typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
+									  HeapTuple *rows, int targrows,
+									  double *totalrows,
+									  double *totaldeadrows);
+
+/* in commands/analyze.c */
+extern int acquire_sample_rows(Relation onerel, int elevel,
+							   HeapTuple *rows, int targrows,
+							   double *totalrows, double *totaldeadrows);
+
 /*
  * Bitmask values for the flags argument to the scan_begin callback.
  */
@@ -259,6 +273,11 @@ typedef struct TM_IndexDeleteOp
 /* Follow update chain and lock latest version of tuple */
 #define TUPLE_LOCK_FLAG_FIND_LAST_VERSION		(1 << 1)
 
+/* "options" flag bits for table_tuple_update and table_tuple_delete */
+#define TABLE_MODIFY_WAIT			0x0001
+#define TABLE_MODIFY_FETCH_OLD_TUPLE 0x0002
+#define TABLE_MODIFY_LOCK_UPDATED	0x0004
+
 
 /* Typedef for callback function for table_index_build_scan */
 typedef void (*IndexBuildCallback) (Relation index,
@@ -295,6 +314,9 @@ typedef struct TableAmRoutine
 	 */
 	const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
 
+	RowRefType	(*get_row_ref_type) (Relation rel);
+
+	void		(*free_rd_amcache) (Relation rel);
 
 	/* ------------------------------------------------------------------------
 	 * Table scan callbacks.
@@ -447,7 +469,7 @@ typedef struct TableAmRoutine
 	 * future searches.
 	 */
 	bool		(*index_fetch_tuple) (struct IndexFetchTableData *scan,
-									  ItemPointer tid,
+									  Datum tupleid,
 									  Snapshot snapshot,
 									  TupleTableSlot *slot,
 									  bool *call_again, bool *all_dead);
@@ -464,7 +486,7 @@ typedef struct TableAmRoutine
 	 * test, returns true, false otherwise.
 	 */
 	bool		(*tuple_fetch_row_version) (Relation rel,
-											ItemPointer tid,
+											Datum tupleid,
 											Snapshot snapshot,
 											TupleTableSlot *slot);
 
@@ -500,23 +522,19 @@ typedef struct TableAmRoutine
 	 */
 
 	/* see table_tuple_insert() for reference about parameters */
-	void		(*tuple_insert) (Relation rel, TupleTableSlot *slot,
+	TupleTableSlot *(*tuple_insert) (Relation rel, TupleTableSlot *slot,
 								 CommandId cid, int options,
 								 struct BulkInsertStateData *bistate);
 
-	/* see table_tuple_insert_speculative() for reference about parameters */
-	void		(*tuple_insert_speculative) (Relation rel,
-											 TupleTableSlot *slot,
-											 CommandId cid,
-											 int options,
-											 struct BulkInsertStateData *bistate,
-											 uint32 specToken);
-
-	/* see table_tuple_complete_speculative() for reference about parameters */
-	void		(*tuple_complete_speculative) (Relation rel,
-											   TupleTableSlot *slot,
-											   uint32 specToken,
-											   bool succeeded);
+	TupleTableSlot *(*tuple_insert_with_arbiter) (ResultRelInfo *resultRelInfo,
+								 TupleTableSlot *slot,
+								 CommandId cid, int options,
+								 struct BulkInsertStateData *bistate,
+								 List *arbiterIndexes,
+								 EState *estate,
+								 LockTupleMode lockmode,
+								 TupleTableSlot *lockedSlot,
+								 TupleTableSlot *tempSlot);
 
 	/* see table_multi_insert() for reference about parameters */
 	void		(*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
@@ -524,29 +542,31 @@ typedef struct TableAmRoutine
 
 	/* see table_tuple_delete() for reference about parameters */
 	TM_Result	(*tuple_delete) (Relation rel,
-								 ItemPointer tid,
+								 Datum tupleid,
 								 CommandId cid,
 								 Snapshot snapshot,
 								 Snapshot crosscheck,
-								 bool wait,
+								 int options,
 								 TM_FailureData *tmfd,
-								 bool changingPart);
+								 bool changingPart,
+								 TupleTableSlot *oldSlot);
 
 	/* see table_tuple_update() for reference about parameters */
 	TM_Result	(*tuple_update) (Relation rel,
-								 ItemPointer otid,
+								 Datum tupleid,
 								 TupleTableSlot *slot,
 								 CommandId cid,
 								 Snapshot snapshot,
 								 Snapshot crosscheck,
-								 bool wait,
+								 int options,
 								 TM_FailureData *tmfd,
 								 LockTupleMode *lockmode,
-								 TU_UpdateIndexes *update_indexes);
+								 TU_UpdateIndexes *update_indexes,
+								 TupleTableSlot *oldSlot);
 
 	/* see table_tuple_lock() for reference about parameters */
 	TM_Result	(*tuple_lock) (Relation rel,
-							   ItemPointer tid,
+							   Datum tupleid,
 							   Snapshot snapshot,
 							   TupleTableSlot *slot,
 							   CommandId cid,
@@ -866,6 +886,14 @@ typedef struct TableAmRoutine
 										   struct SampleScanState *scanstate,
 										   TupleTableSlot *slot);
 
+	/* Check if tuple in the slot belongs to the current transaction */
+	bool		(*tuple_is_current) (Relation rel, TupleTableSlot *slot);
+
+	void		(*analyze_table) (Relation relation,
+								  AcquireSampleRowsFunc *func,
+								  BlockNumber *totalpages);
+
+	bytea	   *(*reloptions) (char relkind, Datum reloptions, bool validate);
 } TableAmRoutine;
 
 
@@ -1234,7 +1262,7 @@ table_index_fetch_end(struct IndexFetchTableData *scan)
  */
 static inline bool
 table_index_fetch_tuple(struct IndexFetchTableData *scan,
-						ItemPointer tid,
+						Datum tupleid,
 						Snapshot snapshot,
 						TupleTableSlot *slot,
 						bool *call_again, bool *all_dead)
@@ -1247,7 +1275,7 @@ table_index_fetch_tuple(struct IndexFetchTableData *scan,
 	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
 		elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding");
 
-	return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
+	return scan->rel->rd_tableam->index_fetch_tuple(scan, tupleid, snapshot,
 													slot, call_again,
 													all_dead);
 }
@@ -1281,7 +1309,7 @@ extern bool table_index_fetch_tuple_check(Relation rel,
  */
 static inline bool
 table_tuple_fetch_row_version(Relation rel,
-							  ItemPointer tid,
+							  Datum tupleid,
 							  Snapshot snapshot,
 							  TupleTableSlot *slot)
 {
@@ -1293,7 +1321,7 @@ table_tuple_fetch_row_version(Relation rel,
 	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
 		elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
 
-	return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
+	return rel->rd_tableam->tuple_fetch_row_version(rel, tupleid, snapshot, slot);
 }
 
 /*
@@ -1393,45 +1421,32 @@ table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
  * insertion. But note that any toasting of fields within the slot is NOT
  * reflected in the slots contents.
  */
-static inline void
+static inline TupleTableSlot *
 table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid,
 				   int options, struct BulkInsertStateData *bistate)
 {
-	rel->rd_tableam->tuple_insert(rel, slot, cid, options,
-								  bistate);
+	return rel->rd_tableam->tuple_insert(rel, slot, cid, options, bistate);
 }
 
-/*
- * Perform a "speculative insertion". These can be backed out afterwards
- * without aborting the whole transaction.  Other sessions can wait for the
- * speculative insertion to be confirmed, turning it into a regular tuple, or
- * aborted, as if it never existed.  Speculatively inserted tuples behave as
- * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
- *
- * A transaction having performed a speculative insertion has to either abort,
- * or finish the speculative insertion with
- * table_tuple_complete_speculative(succeeded = ...).
- */
-static inline void
-table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot,
-							   CommandId cid, int options,
-							   struct BulkInsertStateData *bistate,
-							   uint32 specToken)
+static inline TupleTableSlot *
+table_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo,
+								TupleTableSlot *slot,
+								CommandId cid, int options,
+								struct BulkInsertStateData *bistate,
+								List *arbiterIndexes,
+								EState *estate,
+								LockTupleMode lockmode,
+								TupleTableSlot *lockedSlot,
+								TupleTableSlot *tempSlot)
 {
-	rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
-											  bistate, specToken);
-}
-
-/*
- * Complete "speculative insertion" started in the same transaction. If
- * succeeded is true, the tuple is fully inserted, if false, it's removed.
- */
-static inline void
-table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot,
-								 uint32 specToken, bool succeeded)
-{
-	rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
-												succeeded);
+	Relation	rel = resultRelInfo->ri_RelationDesc;
+
+	return rel->rd_tableam->tuple_insert_with_arbiter(resultRelInfo,
+													  slot, cid, options,
+													  bistate, arbiterIndexes,
+													  estate,
+													  lockmode, lockedSlot,
+													  tempSlot);
 }
 
 /*
@@ -1457,7 +1472,7 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
 }
 
 /*
- * Delete a tuple.
+ * Delete a tuple (and optionally lock the last tuple version).
  *
  * NB: do not call this directly unless prepared to deal with
  * concurrent-update conditions.  Use simple_table_tuple_delete instead.
@@ -1468,11 +1483,21 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
  *	cid - delete command ID (used for visibility test, and stored into
  *		cmax if successful)
  *	crosscheck - if not InvalidSnapshot, also check tuple against this
- *	wait - true if should wait for any conflicting update to commit/abort
+ *	options:
+ *		If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
+ *		If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
+ *		fetched into oldSlot when the update is successful.
+ *		If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
+ *		concurrently updated, then the last tuple version is locked and fetched
+ *		into oldSlot.
+ *
  * Output parameters:
  *	tmfd - filled in failure cases (see below)
  *	changingPart - true iff the tuple is being moved to another partition
  *		table due to an update of the partition key. Otherwise, false.
+ *	oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
+ *		TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
+ *		is specified.
  *
  * Normal, successful return value is TM_Ok, which means we did actually
  * delete it.  Failure return codes are TM_SelfModified, TM_Updated, and
@@ -1483,17 +1508,19 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
  * TM_FailureData for additional info.
  */
 static inline TM_Result
-table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
-				   Snapshot snapshot, Snapshot crosscheck, bool wait,
-				   TM_FailureData *tmfd, bool changingPart)
+table_tuple_delete(Relation rel, Datum tupleid, CommandId cid,
+				   Snapshot snapshot, Snapshot crosscheck, int options,
+				   TM_FailureData *tmfd, bool changingPart,
+				   TupleTableSlot *oldSlot)
 {
-	return rel->rd_tableam->tuple_delete(rel, tid, cid,
+	return rel->rd_tableam->tuple_delete(rel, tupleid, cid,
 										 snapshot, crosscheck,
-										 wait, tmfd, changingPart);
+										 options, tmfd, changingPart,
+										 oldSlot);
 }
 
 /*
- * Update a tuple.
+ * Update a tuple (and optionally lock the last tuple version).
  *
  * NB: do not call this directly unless you are prepared to deal with
  * concurrent-update conditions.  Use simple_table_tuple_update instead.
@@ -1505,13 +1532,23 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
  *	cid - update command ID (used for visibility test, and stored into
  *		cmax/cmin if successful)
  *	crosscheck - if not InvalidSnapshot, also check old tuple against this
- *	wait - true if should wait for any conflicting update to commit/abort
+ *	options:
+ *		If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
+ *		If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
+ *		fetched into oldSlot when the update is successful.
+ *		If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
+ *		concurrently updated, then the last tuple version is locked and fetched
+ *		into oldSlot.
+ *
  * Output parameters:
  *	tmfd - filled in failure cases (see below)
  *	lockmode - filled with lock mode acquired on tuple
  *  update_indexes - in success cases this is set to true if new index entries
  *		are required for this tuple
- *
+ *	oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
+ *		TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
+ *		is specified.
+
  * Normal, successful return value is TM_Ok, which means we did actually
  * update it.  Failure return codes are TM_SelfModified, TM_Updated, and
  * TM_BeingModified (the last only possible if wait == false).
@@ -1527,15 +1564,17 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
  * for additional info.
  */
 static inline TM_Result
-table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
+table_tuple_update(Relation rel, Datum tupleid, TupleTableSlot *slot,
 				   CommandId cid, Snapshot snapshot, Snapshot crosscheck,
-				   bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
-				   TU_UpdateIndexes *update_indexes)
+				   int options, TM_FailureData *tmfd, LockTupleMode *lockmode,
+				   TU_UpdateIndexes *update_indexes,
+				   TupleTableSlot *oldSlot)
 {
-	return rel->rd_tableam->tuple_update(rel, otid, slot,
+	return rel->rd_tableam->tuple_update(rel, tupleid, slot,
 										 cid, snapshot, crosscheck,
-										 wait, tmfd,
-										 lockmode, update_indexes);
+										 options, tmfd,
+										 lockmode, update_indexes,
+										 oldSlot);
 }
 
 /*
@@ -1572,12 +1611,12 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
  * comments for struct TM_FailureData for additional info.
  */
 static inline TM_Result
-table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot,
+table_tuple_lock(Relation rel, Datum tupleid, Snapshot snapshot,
 				 TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
 				 LockWaitPolicy wait_policy, uint8 flags,
 				 TM_FailureData *tmfd)
 {
-	return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
+	return rel->rd_tableam->tuple_lock(rel, tupleid, snapshot, slot,
 									   cid, mode, wait_policy,
 									   flags, tmfd);
 }
@@ -2043,6 +2082,11 @@ table_scan_sample_next_tuple(TableScanDesc scan,
 														   slot);
 }
 
+static inline bool
+table_tuple_is_current(Relation rel, TupleTableSlot *slot)
+{
+	return rel->rd_tableam->tuple_is_current(rel, slot);
+}
 
 /* ----------------------------------------------------------------------------
  * Functions to make modifications a bit simpler.
@@ -2050,11 +2094,13 @@ table_scan_sample_next_tuple(TableScanDesc scan,
  */
 
 extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
-extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
-									  Snapshot snapshot);
-extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
+extern void simple_table_tuple_delete(Relation rel, Datum tupleid,
+									  Snapshot snapshot,
+									  TupleTableSlot *oldSlot);
+extern void simple_table_tuple_update(Relation rel, Datum tupleid,
 									  TupleTableSlot *slot, Snapshot snapshot,
-									  TU_UpdateIndexes *update_indexes);
+									  TU_UpdateIndexes *update_indexes,
+									  TupleTableSlot *oldSlot);
 
 
 /* ----------------------------------------------------------------------------
@@ -2095,6 +2141,60 @@ extern void table_block_relation_estimate_size(Relation rel,
  */
 
 extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
+extern const TableAmRoutine *GetTableAmRoutineByAmOid(Oid amoid);
 extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
 
+static inline RowRefType
+table_get_row_ref_type(Relation rel)
+{
+	if (rel->rd_tableam)
+		return rel->rd_tableam->get_row_ref_type(rel);
+	else
+		return ROW_REF_TID;
+}
+
+static inline void
+table_free_rd_amcache(Relation rel)
+{
+	if (rel->rd_tableam)
+	{
+		rel->rd_tableam->free_rd_amcache(rel);
+	}
+	else
+	{
+		if (rel->rd_amcache)
+			pfree(rel->rd_amcache);
+		rel->rd_amcache = NULL;
+	}
+}
+
+static inline void
+table_analyze(Relation relation, AcquireSampleRowsFunc *func,
+			  BlockNumber *totalpages)
+{
+	if (relation->rd_tableam->analyze_table)
+	{
+		relation->rd_tableam->analyze_table(relation, func, totalpages);
+	}
+	else
+	{
+		*func = acquire_sample_rows;
+		*totalpages = RelationGetNumberOfBlocks(relation);
+	}
+}
+
+static inline bytea *
+table_reloptions(Relation rel, char relkind,
+				 Datum reloptions, bool validate)
+{
+	return rel->rd_tableam->reloptions(relkind, reloptions, validate);
+}
+
+static inline bytea *
+tableam_reloptions(const TableAmRoutine *tableam, char relkind,
+				   Datum reloptions, bool validate)
+{
+	return tableam->reloptions(relkind, reloptions, validate);
+}
+
 #endif							/* TABLEAM_H */
diff --git a/src/include/access/transam.h b/src/include/access/transam.h
index f5af6d30556..ed931c770ec 100644
--- a/src/include/access/transam.h
+++ b/src/include/access/transam.h
@@ -15,7 +15,9 @@
 #define TRANSAM_H
 
 #include "access/xlogdefs.h"
-
+#ifndef FRONTEND
+#include "port/atomics.h"
+#endif
 
 /* ----------------
  *		Special transaction ID values
@@ -196,6 +198,22 @@ FullTransactionIdAdvance(FullTransactionId *dest)
 #define FirstUnpinnedObjectId	12000
 #define FirstNormalObjectId		16384
 
+#define COMMITSEQNO_INPROGRESS	UINT64CONST(0x0)
+#define COMMITSEQNO_NON_DELETED	UINT64CONST(0x1)
+#define COMMITSEQNO_ABORTED		UINT64CONST(0x2)
+#define COMMITSEQNO_FROZEN		UINT64CONST(0x3)
+#define COMMITSEQNO_COMMITTING	UINT64CONST(0x4)
+#define COMMITSEQNO_FIRST_NORMAL UINT64CONST(0x5)
+#define COMMITSEQNO_MAX_NORMAL UINT64CONST(0x7FFFFFFFFFFFFFFF)
+
+#define COMMITSEQNO_IS_INPROGRESS(csn) ((csn) == COMMITSEQNO_INPROGRESS || (csn) == COMMITSEQNO_NON_DELETED)
+#define COMMITSEQNO_IS_NON_DELETED(csn) ((csn) == COMMITSEQNO_NON_DELETED)
+#define COMMITSEQNO_IS_ABORTED(csn) ((csn) == COMMITSEQNO_ABORTED)
+#define COMMITSEQNO_IS_FROZEN(csn) ((csn) == COMMITSEQNO_FROZEN)
+#define COMMITSEQNO_IS_NORMAL(csn) ((csn) >= COMMITSEQNO_FIRST_NORMAL)
+#define COMMITSEQNO_IS_COMMITTING(csn) ((csn) == COMMITSEQNO_COMMITTING)
+#define COMMITSEQNO_IS_COMMITTED(csn) ((csn) >= COMMITSEQNO_FROZEN)
+
 /*
  * VariableCache is a data structure in shared memory that is used to track
  * OID and XID assignment state.  For largely historical reasons, there is
@@ -252,6 +270,11 @@ typedef struct VariableCacheData
 	 */
 	TransactionId oldestClogXid;	/* oldest it's safe to look up in clog */
 
+#ifndef FRONTEND
+	pg_atomic_uint64 nextCommitSeqNo;
+#else
+	CommitSeqNo nextCommitSeqNo;
+#endif
 } VariableCacheData;
 
 typedef VariableCacheData *VariableCache;
@@ -294,6 +317,7 @@ extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid);
 extern bool ForceTransactionIdLimitUpdate(void);
 extern Oid	GetNewObjectId(void);
 extern void StopGeneratingPinnedObjectIds(void);
+extern CommitSeqNo GetCurrentCSN(void);
 
 #ifdef USE_ASSERT_CHECKING
 extern void AssertTransactionIdInAllowableRange(TransactionId xid);
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 7d3b9446e62..e8200d55720 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -527,4 +527,7 @@ extern void EnterParallelMode(void);
 extern void ExitParallelMode(void);
 extern bool IsInParallelMode(void);
 
+typedef void (*xact_redo_hook_type) (TransactionId xid, XLogRecPtr lsn);
+extern xact_redo_hook_type xact_redo_hook;
+
 #endif							/* XACT_H */
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 48ca8523810..b3b2191e733 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -53,6 +53,7 @@ extern PGDLLIMPORT bool track_wal_io_timing;
 extern PGDLLIMPORT int wal_decode_buffer_size;
 
 extern PGDLLIMPORT int CheckPointSegments;
+extern PGDLLIMPORT CommitSeqNo startupCommitSeqNo;
 
 /* Archive modes */
 typedef enum ArchiveMode
@@ -284,6 +285,7 @@ extern void do_pg_backup_start(const char *backupidstr, bool fast,
 							   StringInfo tblspcmapfile);
 extern void do_pg_backup_stop(BackupState *state, bool waitforarchive);
 extern void do_pg_abort_backup(int code, Datum arg);
+extern bool have_backup_in_progress(void);
 extern void register_persistent_abort_backup_handler(void);
 extern SessionBackupState get_backup_status(void);
 
@@ -299,4 +301,14 @@ extern SessionBackupState get_backup_status(void);
 /* files to signal promotion to primary */
 #define PROMOTE_SIGNAL_FILE		"promote"
 
+typedef void (*CheckPoint_hook_type) (XLogRecPtr checkPointRedo, int flags);
+extern PGDLLIMPORT CheckPoint_hook_type CheckPoint_hook;
+extern double CheckPointProgress;
+typedef void (*after_checkpoint_cleanup_hook_type)(XLogRecPtr checkPointRedo,
+												   int flags);
+extern PGDLLIMPORT after_checkpoint_cleanup_hook_type
+	after_checkpoint_cleanup_hook;
+
+extern void (*RedoShutdownHook) (void);
+
 #endif							/* XLOG_H */
diff --git a/src/include/archive/archive_module.h b/src/include/archive/archive_module.h
index 679ce5a6dbd..2921c0a05f8 100644
--- a/src/include/archive/archive_module.h
+++ b/src/include/archive/archive_module.h
@@ -37,13 +37,17 @@ typedef struct ArchiveModuleState
  */
 typedef void (*ArchiveStartupCB) (ArchiveModuleState *state);
 typedef bool (*ArchiveCheckConfiguredCB) (ArchiveModuleState *state);
-typedef bool (*ArchiveFileCB) (ArchiveModuleState *state, const char *file, const char *path);
+typedef void (*ArchivePreloadFileCB) (ArchiveModuleState *state,
+									  const char *file, const char *path);
+typedef bool (*ArchiveFileCB) (ArchiveModuleState *state,
+							   const char *file, const char *path);
 typedef void (*ArchiveShutdownCB) (ArchiveModuleState *state);
 
 typedef struct ArchiveModuleCallbacks
 {
 	ArchiveStartupCB startup_cb;
 	ArchiveCheckConfiguredCB check_configured_cb;
+	ArchivePreloadFileCB archive_preload_file_cb;
 	ArchiveFileCB archive_file_cb;
 	ArchiveShutdownCB shutdown_cb;
 } ArchiveModuleCallbacks;
diff --git a/src/include/c.h b/src/include/c.h
index f69d739be57..024d376e9fa 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -621,7 +621,7 @@ typedef double float8;
 
 /*
  * Oid, RegProcedure, TransactionId, SubTransactionId, MultiXactId,
- * CommandId
+ * CommandId, CommitSeqNo
  */
 
 /* typedef Oid is in postgres_ext.h */
@@ -652,6 +652,8 @@ typedef uint32 CommandId;
 #define FirstCommandId	((CommandId) 0)
 #define InvalidCommandId	(~(CommandId)0)
 
+typedef uint64 CommitSeqNo;
+
 
 /* ----------------
  *		Variable-length datatypes all share the 'struct varlena' header.
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
index ffd5e9dc82d..4cf8df01077 100644
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -140,6 +140,8 @@ typedef enum ObjectClass
 #define PERFORM_DELETION_SKIP_EXTENSIONS	0x0010	/* keep extensions */
 #define PERFORM_DELETION_CONCURRENT_LOCK	0x0020	/* normal drop with
 													 * concurrent lock mode */
+#define PERFORM_DELETION_OF_RELATION		0x0040	/* used for orioledb
+													 * extension */
 
 
 /* in dependency.c */
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index c8532fb97c8..3fa15391d83 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -211,4 +211,6 @@ itemptr_decode(ItemPointer itemptr, int64 encoded)
 	ItemPointerSet(itemptr, block, offset);
 }
 
+extern void index_update_stats(Relation rel, bool hasindex, double reltuples);
+
 #endif							/* INDEX_H */
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index 478203ed4c4..b8b4dbfc2a3 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -41,6 +41,10 @@ extern char *makeObjectName(const char *name1, const char *name2,
 extern char *ChooseRelationName(const char *name1, const char *name2,
 								const char *label, Oid namespaceid,
 								bool isconstraint);
+extern List *ChooseIndexColumnNames(List *indexElems);
+extern char *ChooseIndexName(const char *tabname, Oid namespaceId,
+							 List *colnames, List *exclusionOpNames,
+							 bool primary, bool isconstraint);
 extern bool CheckIndexCompatible(Oid oldId,
 								 const char *accessMethodName,
 								 List *attributeList,
@@ -158,4 +162,7 @@ extern int	defGetTypeLength(DefElem *def);
 extern List *defGetStringList(DefElem *def);
 extern void errorConflictingDefElem(DefElem *defel, ParseState *pstate) pg_attribute_noreturn();
 
+typedef Oid (*GetDefaultOpClass_hook_type)(Oid type_id, Oid am_id);
+extern PGDLLIMPORT GetDefaultOpClass_hook_type GetDefaultOpClass_hook;
+
 #endif							/* DEFREM_H */
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index 3d3e632a0cc..ae8b2b63de9 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -93,6 +93,14 @@ extern void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into,
 						   ParamListInfo params, QueryEnvironment *queryEnv,
 						   const instr_time *planduration,
 						   const BufferUsage *bufusage);
+extern void ExplainNode(PlanState *planstate, List *ancestors,
+						const char *relationship, const char *plan_name,
+						ExplainState *es);
+extern void show_scan_qual(List *qual, const char *qlabel,
+						   PlanState *planstate, List *ancestors,
+						   ExplainState *es);
+extern void show_instrumentation_count(const char *qlabel, int which,
+									   PlanState *planstate, ExplainState *es);
 
 extern void ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc);
 extern void ExplainPrintTriggers(ExplainState *es, QueryDesc *queryDesc);
diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h
index 430e3ca7ddf..15e1fbe7700 100644
--- a/src/include/commands/trigger.h
+++ b/src/include/commands/trigger.h
@@ -209,15 +209,15 @@ extern void ExecASDeleteTriggers(EState *estate,
 extern bool ExecBRDeleteTriggers(EState *estate,
 								 EPQState *epqstate,
 								 ResultRelInfo *relinfo,
-								 ItemPointer tupleid,
+								 Datum tupleid,
 								 HeapTuple fdw_trigtuple,
 								 TupleTableSlot **epqslot,
 								 TM_Result *tmresult,
 								 TM_FailureData *tmfd);
 extern void ExecARDeleteTriggers(EState *estate,
 								 ResultRelInfo *relinfo,
-								 ItemPointer tupleid,
 								 HeapTuple fdw_trigtuple,
+								 TupleTableSlot *slot,
 								 TransitionCaptureState *transition_capture,
 								 bool is_crosspart_update);
 extern bool ExecIRDeleteTriggers(EState *estate,
@@ -231,7 +231,7 @@ extern void ExecASUpdateTriggers(EState *estate,
 extern bool ExecBRUpdateTriggers(EState *estate,
 								 EPQState *epqstate,
 								 ResultRelInfo *relinfo,
-								 ItemPointer tupleid,
+								 Datum tupleid,
 								 HeapTuple fdw_trigtuple,
 								 TupleTableSlot *newslot,
 								 TM_Result *tmresult,
@@ -240,8 +240,8 @@ extern void ExecARUpdateTriggers(EState *estate,
 								 ResultRelInfo *relinfo,
 								 ResultRelInfo *src_partinfo,
 								 ResultRelInfo *dst_partinfo,
-								 ItemPointer tupleid,
 								 HeapTuple fdw_trigtuple,
+								 TupleTableSlot *oldslot,
 								 TupleTableSlot *newslot,
 								 List *recheckIndexes,
 								 TransitionCaptureState *transition_capture,
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 39fbd5f10a5..3a8ee4fbf05 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -376,6 +376,9 @@ extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
 extern void analyze_rel(Oid relid, RangeVar *relation,
 						VacuumParams *params, List *va_cols, bool in_outer_xact,
 						BufferAccessStrategy bstrategy);
+extern int acquire_sample_rows(Relation onerel, int elevel,
+							   HeapTuple *rows, int targrows,
+							   double *totalrows, double *totaldeadrows);
 extern bool std_typanalyze(VacAttrStats *stats);
 
 /* in utils/misc/sampling.c --- duplicate of declarations in utils/sampling.h */
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index ac02247947e..2cc92d66f93 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -633,6 +633,16 @@ extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
 								   bool noDupErr,
 								   bool *specConflict, List *arbiterIndexes,
 								   bool onlySummarizing);
+extern List *ExecUpdateIndexTuples(ResultRelInfo *resultRelInfo,
+								   TupleTableSlot *slot,
+								   TupleTableSlot *oldSlot,
+								   EState *estate,
+								   bool noDupErr,
+								   bool *specConflict, List *arbiterIndexes,
+								   bool onlySummarizing);
+extern void ExecDeleteIndexTuples(ResultRelInfo *resultRelInfo,
+								  TupleTableSlot *slot,
+								  EState *estate);
 extern bool ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo,
 									  TupleTableSlot *slot,
 									  EState *estate, ItemPointer conflictTid,
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 996c62e3055..50a2494c019 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -13,6 +13,7 @@
 #define FDWAPI_H
 
 #include "access/parallel.h"
+#include "access/tableam.h"
 #include "nodes/execnodes.h"
 #include "nodes/pathnodes.h"
 
@@ -148,11 +149,6 @@ typedef void (*ExplainForeignModify_function) (ModifyTableState *mtstate,
 typedef void (*ExplainDirectModify_function) (ForeignScanState *node,
 											  struct ExplainState *es);
 
-typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
-									  HeapTuple *rows, int targrows,
-									  double *totalrows,
-									  double *totaldeadrows);
-
 typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 											  AcquireSampleRowsFunc *func,
 											  BlockNumber *totalpages);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 49419f14f0d..037ab7dd3da 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -449,6 +449,8 @@ typedef struct ResultRelInfo
 	/* relation descriptor for result relation */
 	Relation	ri_RelationDesc;
 
+	RowRefType	ri_RowRefType;
+
 	/* # of indices existing on result relation */
 	int			ri_NumIndices;
 
@@ -744,6 +746,7 @@ typedef struct ExecRowMark
 	Index		prti;			/* parent range table index, if child */
 	Index		rowmarkId;		/* unique identifier for resjunk columns */
 	RowMarkType markType;		/* see enum in nodes/plannodes.h */
+	RowRefType	refType;
 	LockClauseStrength strength;	/* LockingClause's strength, or LCS_NONE */
 	LockWaitPolicy waitPolicy;	/* NOWAIT and SKIP LOCKED */
 	bool		ermActive;		/* is this mark relevant for current tuple? */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 9dca3b65287..f51ec29fc92 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1073,6 +1073,7 @@ typedef struct RangeTblEntry
 	int			rellockmode;	/* lock level that query requires on the rel */
 	struct TableSampleClause *tablesample;	/* sampling info, or NULL */
 	Index		perminfoindex;
+	RowRefType	reftype;
 
 	/*
 	 * Fields valid for a subquery RTE (else NULL):
@@ -2823,6 +2824,7 @@ typedef struct CreateAmStmt
 	char	   *amname;			/* access method name */
 	List	   *handler_name;	/* handler function name */
 	char		amtype;			/* type of access method */
+	char	   *tableam_name;	/* table AM name */
 } CreateAmStmt;
 
 /* ----------------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index d64fe6a328b..77130245e8f 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1352,7 +1352,7 @@ typedef enum RowMarkType
  * child relations will also have entries with isParent = true.  The child
  * entries have rti == child rel's RT index and prti == top parent's RT index,
  * and can therefore be recognized as children by the fact that prti != rti.
- * The parent's allMarkTypes field gets the OR of (1<<markType) across all
+ * The parent's allRefTypes field gets the OR of (1<<refType) across all
  * its children (this definition allows children to use different markTypes).
  *
  * The planner also adds resjunk output columns to the plan that carry
@@ -1382,7 +1382,7 @@ typedef struct PlanRowMark
 	Index		prti;			/* range table index of parent relation */
 	Index		rowmarkId;		/* unique identifier for resjunk columns */
 	RowMarkType markType;		/* see enum above */
-	int			allMarkTypes;	/* OR of (1<<markType) for all children */
+	int			allRefTypes;	/* OR of (1<<refType) for all children */
 	LockClauseStrength strength;	/* LockingClause's strength, or LCS_NONE */
 	LockWaitPolicy waitPolicy;	/* NOWAIT and SKIP LOCKED options */
 	bool		isParent;		/* true if this is a "dummy" parent entry */
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index e1aadc39cfb..6fe0b7324ed 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -2038,4 +2038,11 @@ typedef struct OnConflictExpr
 	List	   *exclRelTlist;	/* tlist of the EXCLUDED pseudo relation */
 } OnConflictExpr;
 
+typedef enum RowRefType
+{
+	ROW_REF_TID,
+	ROW_REF_ROWID,
+	ROW_REF_COPY
+} RowRefType;
+
 #endif							/* PRIMNODES_H */
diff --git a/src/include/nodes/readfuncs.h b/src/include/nodes/readfuncs.h
index cba6f0be75a..c36042d1e2a 100644
--- a/src/include/nodes/readfuncs.h
+++ b/src/include/nodes/readfuncs.h
@@ -27,6 +27,7 @@ extern PGDLLIMPORT bool restore_location_fields;
  * prototypes for functions in read.c (the lisp token parser)
  */
 extern const char *pg_strtok(int *length);
+extern bool pg_str_hasfield(void);
 extern char *debackslash(const char *token, int length);
 extern void *nodeRead(const char *token, int tok_len);
 
diff --git a/src/include/optimizer/appendinfo.h b/src/include/optimizer/appendinfo.h
index a05f91f77d0..d100b13088e 100644
--- a/src/include/optimizer/appendinfo.h
+++ b/src/include/optimizer/appendinfo.h
@@ -47,4 +47,9 @@ extern void add_row_identity_columns(PlannerInfo *root, Index rtindex,
 									 Relation target_relation);
 extern void distribute_row_identity_vars(PlannerInfo *root);
 
+typedef bool (*UpdateTargetsHook)(PlannerInfo *root, Index rtindex,
+								  RangeTblEntry *target_rte,
+								  Relation target_relation);
+extern UpdateTargetsHook UpdateTargets_hook;
+
 #endif							/* APPENDINFO_H */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 50bc3b503a6..6e4b570fce0 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -31,6 +31,10 @@ typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
 											Index rti,
 											RangeTblEntry *rte);
 extern PGDLLIMPORT set_rel_pathlist_hook_type set_rel_pathlist_hook;
+typedef bool (*set_plain_rel_pathlist_hook_type)(PlannerInfo *root,
+												 RelOptInfo *rel,
+												 RangeTblEntry *rte);
+extern PGDLLIMPORT set_plain_rel_pathlist_hook_type set_plain_rel_pathlist_hook;
 
 /* Hook for plugins to get control in add_paths_to_joinrel() */
 typedef void (*set_join_pathlist_hook_type) (PlannerInfo *root,
@@ -67,6 +71,14 @@ extern void generate_partitionwise_join_paths(PlannerInfo *root,
 extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
 #endif
 
+/* Data structure for collecting qual clauses that match an index */
+typedef struct
+{
+	bool		nonempty;		/* True if lists are not all empty */
+	/* Lists of IndexClause nodes, one list per index column */
+	List	   *indexclauses[INDEX_MAX_KEYS];
+} IndexClauseSet;
+
 /*
  * indxpath.c
  *	  routines to generate index paths
@@ -82,6 +94,10 @@ extern bool match_index_to_operand(Node *operand, int indexcol,
 								   IndexOptInfo *index);
 extern void check_index_predicates(PlannerInfo *root, RelOptInfo *rel);
 
+extern void match_restriction_clauses_to_index(PlannerInfo *root,
+											   IndexOptInfo *index,
+											   IndexClauseSet *clauseset);
+
 /*
  * tidpath.h
  *	  routines to generate tid paths
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index eb1c3ccc4bf..812927ddcf4 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -24,6 +24,9 @@ typedef void (*get_relation_info_hook_type) (PlannerInfo *root,
 											 RelOptInfo *rel);
 extern PGDLLIMPORT get_relation_info_hook_type get_relation_info_hook;
 
+typedef bool (*skip_tree_height_hook_type) (Relation indexRelation);
+extern PGDLLIMPORT skip_tree_height_hook_type skip_tree_height_hook;
+
 
 extern void get_relation_info(PlannerInfo *root, Oid relationObjectId,
 							  bool inhparent, RelOptInfo *rel);
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 31c188176b7..8ec52018173 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -39,6 +39,11 @@ extern void preprocess_minmax_aggregates(PlannerInfo *root);
  * prototypes for plan/createplan.c
  */
 extern Plan *create_plan(PlannerInfo *root, Path *best_path);
+extern List *order_qual_clauses(PlannerInfo *root, List *clauses);
+extern void fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
+									 List **stripped_indexquals_p,
+									 List **fixed_indexquals_p);
+extern Node *replace_nestloop_params_compat(PlannerInfo *root, Node *expr);
 extern ForeignScan *make_foreignscan(List *qptlist, List *qpqual,
 									 Index scanrelid, List *fdw_exprs, List *fdw_private,
 									 List *fdw_scan_tlist, List *fdw_recheck_quals,
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h
index fc2e15496dd..b4d615f0844 100644
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -47,7 +47,8 @@ extern PlannerInfo *subquery_planner(PlannerGlobal *glob, Query *parse,
 									 bool hasRecursion, double tuple_fraction);
 
 extern RowMarkType select_rowmark_type(RangeTblEntry *rte,
-									   LockClauseStrength strength);
+									   LockClauseStrength strength,
+									   RowRefType *refType);
 
 extern bool limit_needed(Query *parse);
 
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index cb380c96e26..4b4218a574c 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -343,9 +343,6 @@
 /* Define to 1 if you have the `pthread_barrier_wait' function. */
 #undef HAVE_PTHREAD_BARRIER_WAIT
 
-/* Define to 1 if you have the `pthread_is_threaded_np' function. */
-#undef HAVE_PTHREAD_IS_THREADED_NP
-
 /* Have PTHREAD_PRIO_INHERIT. */
 #undef HAVE_PTHREAD_PRIO_INHERIT
 
diff --git a/src/include/postmaster/bgworker.h b/src/include/postmaster/bgworker.h
index 845d4498e65..e5af3247632 100644
--- a/src/include/postmaster/bgworker.h
+++ b/src/include/postmaster/bgworker.h
@@ -66,6 +66,12 @@
  * background workers should not use this class.
  */
 #define BGWORKER_CLASS_PARALLEL					0x0010
+
+/*
+ * This class of bgworkers are allowed to stay working during shutdown
+ * checkpointing.
+ */
+#define BGWORKER_CLASS_SYSTEM					0x0020
 /* add additional bgworker classes here */
 
 
diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h
index 3b3889c58c0..3d1da495915 100644
--- a/src/include/postmaster/postmaster.h
+++ b/src/include/postmaster/postmaster.h
@@ -50,6 +50,7 @@ extern PGDLLIMPORT int postmaster_alive_fds[2];
 
 extern PGDLLIMPORT const char *progname;
 
+extern bool IsFatalError(void);
 extern void PostmasterMain(int argc, char *argv[]) pg_attribute_noreturn();
 extern void ClosePostmasterPorts(bool am_syslogger);
 extern void InitProcessGlobals(void);
@@ -58,6 +59,10 @@ extern int	MaxLivePostmasterChildren(void);
 
 extern bool PostmasterMarkPIDForWorkerNotify(int);
 
+typedef void (*base_init_startup_hook_type)(void);
+
+extern PGDLLIMPORT base_init_startup_hook_type base_init_startup_hook;
+
 #ifdef EXEC_BACKEND
 extern pid_t postmaster_forkexec(int argc, char *argv[]);
 extern void SubPostmasterMain(int argc, char *argv[]) pg_attribute_noreturn();
diff --git a/src/include/postmaster/startup.h b/src/include/postmaster/startup.h
index 6a2e4c4526b..95eb25f9f4d 100644
--- a/src/include/postmaster/startup.h
+++ b/src/include/postmaster/startup.h
@@ -23,7 +23,10 @@
 			ereport(LOG, errmsg(msg, secs, (usecs / 10000),  __VA_ARGS__ )); \
 	} while(0)
 
+typedef void (*HandleStartupProcInterrupts_hook_type)(void);
+
 extern PGDLLIMPORT int log_startup_progress_interval;
+extern PGDLLIMPORT HandleStartupProcInterrupts_hook_type HandleStartupProcInterrupts_hook;
 
 extern void HandleStartupProcInterrupts(void);
 extern void StartupProcessMain(void) pg_attribute_noreturn();
diff --git a/src/include/replication/snapbuild.h b/src/include/replication/snapbuild.h
index f49b941b53e..a7b793dae3c 100644
--- a/src/include/replication/snapbuild.h
+++ b/src/include/replication/snapbuild.h
@@ -73,6 +73,7 @@ extern void SnapBuildClearExportedSnapshot(void);
 extern void SnapBuildResetExportedSnapshotState(void);
 
 extern SnapBuildState SnapBuildCurrentState(SnapBuild *builder);
+extern TransactionId SnapBuildNextPhaseAt(SnapBuild *builder);
 extern Snapshot SnapBuildGetOrBuildSnapshot(SnapBuild *builder);
 
 extern bool SnapBuildXactNeedsSkip(SnapBuild *builder, XLogRecPtr ptr);
@@ -90,5 +91,7 @@ extern void SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
 extern void SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn,
 										 struct xl_running_xacts *running);
 extern void SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn);
+extern void SnapBuildUpdateCSNSnaphot(SnapBuild *builder,
+									  CSNSnapshotData *csnSnapshotData);
 
 #endif							/* SNAPBUILD_H */
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index f67056a82b5..d7b095b1464 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -124,6 +124,7 @@ typedef uint16 LOCKMETHODID;
 /* These identify the known lock methods */
 #define DEFAULT_LOCKMETHOD	1
 #define USER_LOCKMETHOD		2
+#define NO_LOG_LOCKMETHOD	255 /* Skip logging of AccessExclusiveLock */
 
 /*
  * LOCKTAG is the key information needed to look up a LOCK item in the
@@ -550,6 +551,7 @@ extern LockMethod GetLocksMethodTable(const LOCK *lock);
 extern LockMethod GetLockTagsMethodTable(const LOCKTAG *locktag);
 extern uint32 LockTagHashCode(const LOCKTAG *locktag);
 extern bool DoLockModesConflict(LOCKMODE mode1, LOCKMODE mode2);
+extern bool DoLocalLockExist(const LOCKTAG *locktag);
 extern LockAcquireResult LockAcquire(const LOCKTAG *locktag,
 									 LOCKMODE lockmode,
 									 bool sessionLock,
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index ef74f326932..f3aa3bde389 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -293,6 +293,7 @@ struct PGPROC
 	bool		fpVXIDLock;		/* are we holding a fast-path VXID lock? */
 	LocalTransactionId fpLocalTransactionId;	/* lxid for fast-path VXID
 												 * lock */
+	CommitSeqNo	lastCommittedCSN;
 
 	/*
 	 * Support for lock groups.  Use LockHashPartitionLockByProc on the group
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index d8cae3ce1c5..64db8a3aa8b 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -96,4 +96,6 @@ extern void ProcArraySetReplicationSlotXmin(TransactionId xmin,
 extern void ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
 											TransactionId *catalog_xmin);
 
+extern snapshot_hook_type snapshot_hook;
+
 #endif							/* PROCARRAY_H */
diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h
index 0721e4d2058..ef748bfe1e1 100644
--- a/src/include/storage/sinval.h
+++ b/src/include/storage/sinval.h
@@ -110,6 +110,16 @@ typedef struct
 	Oid			relId;			/* relation ID */
 } SharedInvalSnapshotMsg;
 
+#define SHAREDINVALUSERCACHE_ID		(-6)
+
+typedef struct
+{
+	int8		id;				/* type field --- must be first */
+	Oid			arg1;			/* user-specific values */
+	Oid			arg2;
+	Oid			arg3;
+} SharedInvalUserMsg;
+
 typedef union
 {
 	int8		id;				/* type field --- must be first */
@@ -119,6 +129,7 @@ typedef union
 	SharedInvalSmgrMsg sm;
 	SharedInvalRelmapMsg rm;
 	SharedInvalSnapshotMsg sn;
+	SharedInvalUserMsg usr;
 } SharedInvalidationMessage;
 
 
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index bb7d90c7ad6..b97394b4841 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -91,6 +91,7 @@ typedef struct RunningTransactionsData
 	TransactionId nextXid;		/* xid from ShmemVariableCache->nextXid */
 	TransactionId oldestRunningXid; /* *not* oldestXmin */
 	TransactionId latestCompletedXid;	/* so we can set xmax */
+	CommitSeqNo csn;	/* current csn */
 
 	TransactionId *xids;		/* array of (sub)xids still running */
 } RunningTransactionsData;
diff --git a/src/include/storage/standbydefs.h b/src/include/storage/standbydefs.h
index 188e348618a..23dddce8d84 100644
--- a/src/include/storage/standbydefs.h
+++ b/src/include/storage/standbydefs.h
@@ -52,6 +52,7 @@ typedef struct xl_running_xacts
 	TransactionId nextXid;		/* xid from ShmemVariableCache->nextXid */
 	TransactionId oldestRunningXid; /* *not* oldestXmin */
 	TransactionId latestCompletedXid;	/* so we can set xmax */
+	CommitSeqNo csn;	/* current csn */
 
 	TransactionId xids[FLEXIBLE_ARRAY_MEMBER];
 } xl_running_xacts;
diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h
index a32d7222a99..91880e498f7 100644
--- a/src/include/utils/catcache.h
+++ b/src/include/utils/catcache.h
@@ -232,5 +232,28 @@ extern void PrepareToInvalidateCacheTuple(Relation relation,
 
 extern void PrintCatCacheLeakWarning(HeapTuple tuple);
 extern void PrintCatCacheListLeakWarning(CatCList *list);
+typedef CatCTup *(*SearchCatCacheInternal_hook_type)(CatCache *cache,
+													 int nkeys,
+													 Datum v1, Datum v2,
+													 Datum v3, Datum v4);
+extern SearchCatCacheInternal_hook_type SearchCatCacheInternal_hook;
+
+typedef CatCList *(*SearchCatCacheList_hook_type)(CatCache *cache,
+												  int nkeys,
+												  Datum v1,
+												  Datum v2,
+												  Datum v3);
+extern SearchCatCacheList_hook_type SearchCatCacheList_hook;
+
+typedef TupleDesc (*SysCacheGetAttr_hook_type)(CatCache *SysCache);
+extern SysCacheGetAttr_hook_type SysCacheGetAttr_hook;
+
+typedef uint32 (*GetCatCacheHashValue_hook_type)(CatCache *cache,
+												 int nkeys,
+												 Datum v1,
+												 Datum v2,
+												 Datum v3,
+												 Datum v4);
+extern GetCatCacheHashValue_hook_type GetCatCacheHashValue_hook;
 
 #endif							/* CATCACHE_H */
diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h
index 0292e88b4f2..5b7deaa286c 100644
--- a/src/include/utils/elog.h
+++ b/src/include/utils/elog.h
@@ -542,4 +542,10 @@ extern void write_stderr(const char *fmt,...) pg_attribute_printf(1, 2);
  */
 extern void write_stderr_signal_safe(const char *fmt);
 
+typedef void (*CustomErrorCleanupHookType) (void);
+
+extern CustomErrorCleanupHookType CustomErrorCleanupHook;
+
+extern void CustomErrorCleanup(void);
+
 #endif							/* ELOG_H */
diff --git a/src/include/utils/fmgrtab.h b/src/include/utils/fmgrtab.h
index 838ffe3bc1c..f7e416653a6 100644
--- a/src/include/utils/fmgrtab.h
+++ b/src/include/utils/fmgrtab.h
@@ -46,4 +46,7 @@ extern PGDLLIMPORT const Oid fmgr_last_builtin_oid; /* highest function OID in
 #define InvalidOidBuiltinMapping PG_UINT16_MAX
 extern PGDLLIMPORT const uint16 fmgr_builtin_oid_index[];
 
+extern const FmgrBuiltin *fmgr_isbuiltin(Oid id);
+extern const FmgrBuiltin *fmgr_lookupByName(const char *name);
+
 #endif							/* FMGRTAB_H */
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index 14b4eac0630..1461271bbe6 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -22,6 +22,7 @@ extern PGDLLIMPORT int debug_discard_caches;
 
 typedef void (*SyscacheCallbackFunction) (Datum arg, int cacheid, uint32 hashvalue);
 typedef void (*RelcacheCallbackFunction) (Datum arg, Oid relid);
+typedef void (*UsercacheCallbackFunction) (Datum arg, Oid arg1, Oid arg2, Oid arg3);
 
 
 extern void AcceptInvalidationMessages(void);
@@ -48,6 +49,8 @@ extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple);
 
 extern void CacheInvalidateRelcacheByRelid(Oid relid);
 
+extern void CacheInvalidateRelcacheByDbidRelid(Oid dbid, Oid relid);
+
 extern void CacheInvalidateSmgr(RelFileLocatorBackend rlocator);
 
 extern void CacheInvalidateRelmap(Oid databaseId);
@@ -59,6 +62,9 @@ extern void CacheRegisterSyscacheCallback(int cacheid,
 extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
 										  Datum arg);
 
+extern void CacheRegisterUsercacheCallback(UsercacheCallbackFunction func,
+										  Datum arg);
+
 extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue);
 
 extern void InvalidateSystemCaches(void);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index e2a72435427..2fd19a95cbb 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -102,6 +102,8 @@ extern void make_icu_collator(const char *iculocstr,
 
 extern bool pg_locale_deterministic(pg_locale_t locale);
 extern pg_locale_t pg_newlocale_from_collation(Oid collid);
+typedef bool (*pg_newlocale_from_collation_hook_type)();
+extern pg_newlocale_from_collation_hook_type pg_newlocale_from_collation_hook;
 
 extern char *get_collation_actual_version(char collprovider, const char *collcollate);
 extern int	pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale);
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index 980d37a1947..d05de790428 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -18,6 +18,9 @@
 #include "utils/resowner.h"
 #include "utils/snapshot.h"
 
+#ifndef SNAPSHOT_H
+typedef void (*snapshot_hook_type) (Snapshot snapshot);
+#endif
 
 /*
  * The structure used to map times to TransactionId values for the "snapshot
@@ -120,7 +123,7 @@ extern void PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level);
 extern void PushCopiedSnapshot(Snapshot snapshot);
 extern void UpdateActiveSnapshotCommandId(void);
 extern void PopActiveSnapshot(void);
-extern Snapshot GetActiveSnapshot(void);
+extern PGDLLIMPORT Snapshot GetActiveSnapshot(void);
 extern bool ActiveSnapshotSet(void);
 
 extern Snapshot RegisterSnapshot(Snapshot snapshot);
@@ -178,4 +181,10 @@ extern void SerializeSnapshot(Snapshot snapshot, char *start_address);
 extern Snapshot RestoreSnapshot(char *start_address);
 extern void RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc);
 
+typedef void (*reset_xmin_hook_type) (void);
+
+extern snapshot_hook_type snapshot_register_hook;
+extern snapshot_hook_type snapshot_deregister_hook;
+extern reset_xmin_hook_type reset_xmin_hook;
+
 #endif							/* SNAPMGR_H */
diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h
index 583a667a40a..01093a33315 100644
--- a/src/include/utils/snapshot.h
+++ b/src/include/utils/snapshot.h
@@ -122,6 +122,20 @@ typedef struct SnapshotData *Snapshot;
 
 #define InvalidSnapshot		((Snapshot) NULL)
 
+typedef struct
+{
+	uint64		undoLocation;		/* undo log location retained by this snapshot */
+	uint64		xmin;
+	pairingheap_node ph_node;
+} RetainUndoLocationPHNode;
+
+typedef struct CSNSnapshotData
+{
+	uint64			xmin;
+	CommitSeqNo		snapshotcsn;
+	XLogRecPtr		xlogptr;
+} CSNSnapshotData;
+
 /*
  * Struct representing all kind of possible snapshots.
  *
@@ -214,6 +228,12 @@ typedef struct SnapshotData
 	 * transactions completed since the last GetSnapshotData().
 	 */
 	uint64		snapXactCompletionCount;
+
+	RetainUndoLocationPHNode undoRegularLocationPhNode;
+	RetainUndoLocationPHNode undoSystemLocationPhNode;
+	CSNSnapshotData	csnSnapshotData;
 } SnapshotData;
 
+typedef void (*snapshot_hook_type) (Snapshot snapshot);
+
 #endif							/* SNAPSHOT_H */
diff --git a/src/include/utils/tuplestore.h b/src/include/utils/tuplestore.h
index 36424b80b1b..e26d9a65308 100644
--- a/src/include/utils/tuplestore.h
+++ b/src/include/utils/tuplestore.h
@@ -73,6 +73,9 @@ extern bool tuplestore_in_memory(Tuplestorestate *state);
 extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
 									bool copy, TupleTableSlot *slot);
 
+extern bool tuplestore_force_gettupleslot(Tuplestorestate *state, bool forward,
+										  bool copy, TupleTableSlot *slot);
+
 extern bool tuplestore_advance(Tuplestorestate *state, bool forward);
 
 extern bool tuplestore_skiptuples(Tuplestorestate *state,
diff --git a/src/include/utils/typcache.h b/src/include/utils/typcache.h
index 95f3a9ee308..77d57927de0 100644
--- a/src/include/utils/typcache.h
+++ b/src/include/utils/typcache.h
@@ -206,4 +206,9 @@ extern void SharedRecordTypmodRegistryInit(SharedRecordTypmodRegistry *,
 
 extern void SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *);
 
+typedef void (*load_typcache_tupdesc_hook_type)(TypeCacheEntry *typentry);
+extern PGDLLIMPORT load_typcache_tupdesc_hook_type load_typcache_tupdesc_hook;
+typedef void (*load_enum_cache_data_hook_type)(TypeCacheEntry *tcache);
+extern PGDLLIMPORT load_enum_cache_data_hook_type load_enum_cache_data_hook;
+
 #endif							/* TYPCACHE_H */
diff --git a/src/include/varatt.h b/src/include/varatt.h
index e34870526ba..bc2b39e89f8 100644
--- a/src/include/varatt.h
+++ b/src/include/varatt.h
@@ -38,6 +38,25 @@ typedef struct varatt_external
 	Oid			va_toastrelid;	/* RelID of TOAST table containing it */
 }			varatt_external;
 
+typedef struct OToastExternal
+{
+	uint16		data_size; /* length of OToastExternal data */
+	int16		attnum;
+	int32		raw_size; /* original data size */
+	int32		toasted_size; /* compressed original data size */
+	/* for fetching data from TOAST tree */
+	CommitSeqNo	csn;
+	/* for finding TOAST tree */
+	Oid			datoid;
+	Oid			relid;
+	Oid			relnode;
+	/* for storing primary index tuple */
+	uint8		formatFlags; /* primary index tuple flags */
+	char		data[FLEXIBLE_ARRAY_MEMBER]; /* data (primary index tuple) */
+} OToastExternal;
+
+#define ORIOLEDB_EXT_FORMAT_FLAGS_BITS 6
+
 /*
  * These macros define the "saved size" portion of va_extinfo.  Its remaining
  * two high-order bits identify the compression method.
@@ -86,17 +105,21 @@ typedef enum vartag_external
 	VARTAG_INDIRECT = 1,
 	VARTAG_EXPANDED_RO = 2,
 	VARTAG_EXPANDED_RW = 3,
-	VARTAG_ONDISK = 18
+	VARTAG_ONDISK = 18,
+	VARTAG_ORIOLEDB = 34
 } vartag_external;
 
 /* this test relies on the specific tag values above */
 #define VARTAG_IS_EXPANDED(tag) \
 	(((tag) & ~1) == VARTAG_EXPANDED_RO)
 
+#define O_TOAST_EXTERNAL_SZ offsetof(OToastExternal, data)
+
 #define VARTAG_SIZE(tag) \
 	((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \
 	 VARTAG_IS_EXPANDED(tag) ? sizeof(varatt_expanded) : \
 	 (tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \
+	 (tag) == VARTAG_ORIOLEDB ? O_TOAST_EXTERNAL_SZ : \
 	 (AssertMacro(false), 0))
 
 /*
@@ -282,11 +305,16 @@ typedef struct
 #define VARDATA_SHORT(PTR)					VARDATA_1B(PTR)
 
 #define VARTAG_EXTERNAL(PTR)				VARTAG_1B_E(PTR)
-#define VARSIZE_EXTERNAL(PTR)				(VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR)))
+#define VARSIZE_EXTERNAL(PTR)				(VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR)) \
+												+ (VARATT_IS_EXTERNAL_ORIOLEDB(PTR) ? \
+												  *((uint16 *) VARDATA_1B_E(PTR)) \
+												  : 0))
+
 #define VARDATA_EXTERNAL(PTR)				VARDATA_1B_E(PTR)
 
 #define VARATT_IS_COMPRESSED(PTR)			VARATT_IS_4B_C(PTR)
 #define VARATT_IS_EXTERNAL(PTR)				VARATT_IS_1B_E(PTR)
+
 #define VARATT_IS_EXTERNAL_ONDISK(PTR) \
 	(VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK)
 #define VARATT_IS_EXTERNAL_INDIRECT(PTR) \
@@ -299,6 +327,9 @@ typedef struct
 	(VARATT_IS_EXTERNAL(PTR) && VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)))
 #define VARATT_IS_EXTERNAL_NON_EXPANDED(PTR) \
 	(VARATT_IS_EXTERNAL(PTR) && !VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)))
+#define VARATT_IS_EXTERNAL_ORIOLEDB(PTR) \
+	(VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ORIOLEDB)
+
 #define VARATT_IS_SHORT(PTR)				VARATT_IS_1B(PTR)
 #define VARATT_IS_EXTENDED(PTR)				(!VARATT_IS_4B_U(PTR))
 
diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build
index 13045cbd6e4..16ce1650e2e 100644
--- a/src/makefiles/meson.build
+++ b/src/makefiles/meson.build
@@ -37,6 +37,7 @@ pgxs_kv = {
   'PACKAGE_VERSION': pg_version,
   'PG_MAJORVERSION': pg_version_major,
   'PG_VERSION_NUM': pg_version_num,
+  'ORIOLEDB_PATCHSET_VERSION': orioledb_patchset_version,
   'configure_input': 'meson',
 
   'vpath_build': 'yes',
diff --git a/src/test/isolation/expected/eval-plan-qual-2.out b/src/test/isolation/expected/eval-plan-qual-2.out
new file mode 100644
index 00000000000..117a3d3be8d
--- /dev/null
+++ b/src/test/isolation/expected/eval-plan-qual-2.out
@@ -0,0 +1,37 @@
+Parsed test spec with 3 sessions
+
+starting permutation: read_u wx2 wb1 c2 c1 read_u read
+step read_u: SELECT * FROM accounts;
+accountid|balance|balance2
+---------+-------+--------
+checking |    600|    1200
+savings  |    600|    1200
+(2 rows)
+
+step wx2: UPDATE accounts SET balance = balance + 450 WHERE accountid = 'checking' RETURNING balance;
+balance
+-------
+   1050
+(1 row)
+
+step wb1: DELETE FROM accounts WHERE balance = 600 RETURNING *; <waiting ...>
+step c2: COMMIT;
+step wb1: <... completed>
+accountid|balance|balance2
+---------+-------+--------
+savings  |    600|    1200
+(1 row)
+
+step c1: COMMIT;
+step read_u: SELECT * FROM accounts;
+accountid|balance|balance2
+---------+-------+--------
+checking |   1050|    2100
+(1 row)
+
+step read: SELECT * FROM accounts ORDER BY accountid;
+accountid|balance|balance2
+---------+-------+--------
+checking |   1050|    2100
+(1 row)
+
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 9b0bb8a29b3..124c170746c 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -36,6 +36,7 @@ test: fk-partitioned-2
 test: fk-snapshot
 test: subxid-overflow
 test: eval-plan-qual
+test: eval-plan-qual-2
 test: eval-plan-qual-trigger
 test: inplace-inval
 test: intra-grant-inplace
diff --git a/src/test/isolation/specs/eval-plan-qual-2.spec b/src/test/isolation/specs/eval-plan-qual-2.spec
new file mode 100644
index 00000000000..30447bef24a
--- /dev/null
+++ b/src/test/isolation/specs/eval-plan-qual-2.spec
@@ -0,0 +1,30 @@
+setup
+{
+ CREATE TABLE accounts (accountid text PRIMARY KEY, balance numeric not null,
+   balance2 numeric GENERATED ALWAYS AS (balance * 2) STORED);
+ INSERT INTO accounts VALUES ('checking', 600), ('savings', 600);
+}
+
+teardown
+{
+ DROP TABLE accounts;
+}
+
+session s1
+setup		{ BEGIN ISOLATION LEVEL READ COMMITTED; }
+step wb1	{ DELETE FROM accounts WHERE balance = 600 RETURNING *; }
+step c1		{ COMMIT; }
+
+session s2
+setup		{ BEGIN ISOLATION LEVEL READ COMMITTED; }
+step wx2	{ UPDATE accounts SET balance = balance + 450 WHERE accountid = 'checking' RETURNING balance; }
+step c2	{ COMMIT; }
+
+session s3
+setup		{ BEGIN ISOLATION LEVEL READ COMMITTED; }
+step read	{ SELECT * FROM accounts ORDER BY accountid; }
+step read_u	{ SELECT * FROM accounts; }
+
+teardown    { COMMIT; }
+
+permutation read_u wx2 wb1 c2 c1 read_u read
diff --git a/src/test/modules/dummy_index_am/dummy_index_am.c b/src/test/modules/dummy_index_am/dummy_index_am.c
index c14e0abe0c6..09c5d20479d 100644
--- a/src/test/modules/dummy_index_am/dummy_index_am.c
+++ b/src/test/modules/dummy_index_am/dummy_index_am.c
@@ -164,7 +164,7 @@ dibuildempty(Relation index)
  */
 static bool
 diinsert(Relation index, Datum *values, bool *isnull,
-		 ItemPointer ht_ctid, Relation heapRel,
+		 Datum tupleid, Relation heapRel,
 		 IndexUniqueCheck checkUnique,
 		 bool indexUnchanged,
 		 IndexInfo *indexInfo)
@@ -302,7 +302,8 @@ dihandler(PG_FUNCTION_ARGS)
 
 	amroutine->ambuild = dibuild;
 	amroutine->ambuildempty = dibuildempty;
-	amroutine->aminsert = diinsert;
+	amroutine->aminsert = NULL;
+	amroutine->aminsertextended = diinsert;
 	amroutine->ambulkdelete = dibulkdelete;
 	amroutine->amvacuumcleanup = divacuumcleanup;
 	amroutine->amcanreturn = NULL;
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index bcbc6d910f1..fb75cf0905f 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -606,7 +606,7 @@ make_tuple_indirect(PG_FUNCTION_ARGS)
 			continue;
 
 		/* copy datum, so it still lives later */
-		if (VARATT_IS_EXTERNAL_ONDISK(attr))
+		if (VARATT_IS_EXTERNAL_ONDISK(attr) || VARATT_IS_EXTERNAL_ORIOLEDB(attr))
 			attr = detoast_external_attr(attr);
 		else
 		{
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 4791528e140..264bdbdee0f 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3146,6 +3146,7 @@ amgetbitmap_function
 amgettuple_function
 aminitparallelscan_function
 aminsert_function
+aminsert_extended_function
 ammarkpos_function
 amoptions_function
 amparallelrescan_function