diff --git a/CMakeLists.txt b/CMakeLists.txt
index c7f4c0fbf..ff1f90239 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,16 +203,11 @@ if (UMPIRE_ENABLE_TESTS)
   add_subdirectory(tests)
 endif ()
 
-if (UMPIRE_ENABLE_DEVELOPER_BENCHMARKS)
+if (UMPIRE_ENABLE_BENCHMARKS)
   add_subdirectory(benchmarks)
   if ((NOT CMAKE_BUILD_TYPE) OR (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Release"))
     message("-- Warning: CMAKE_BUILD_TYPE not set to Release, benchmark information will not be reliable for this build!")
   endif()
-else()
-  if (UMPIRE_ENABLE_BENCHMARKS)
-    message("-- Warning: Benchmarks will not be built. If you want to build with benchmarks,\n"
-          "     set UMPIRE_ENABLE_DEVELOPER_BENCHMARKS to On.")
-  endif()
 endif ()
 
 if (UMPIRE_ENABLE_EXAMPLES)
diff --git a/Dockerfile b/Dockerfile
index 9a2f159ba..a41bb2169 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -38,8 +38,8 @@ ENV GTEST_COLOR=1
 COPY . /home/umpire/workspace
 WORKDIR /home/umpire/workspace/build
 RUN cmake -DUMPIRE_ENABLE_DEVELOPER_DEFAULTS=On -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang \
-          -DUMPIRE_ENABLE_C=On -DCMAKE_CXX_FLAGS="-fsanitize=address" -DENABLE_TESTS=On -DUMPIRE_ENABLE_TOOLS=On \
-          -DUMPIRE_ENABLE_ASAN=On -DUMPIRE_ENABLE_SANITIZER_TESTS=On .. && \
+    -DUMPIRE_ENABLE_C=On -DCMAKE_CXX_FLAGS="-fsanitize=address" -DENABLE_TESTS=On -DUMPIRE_ENABLE_TOOLS=On \
+    -DUMPIRE_ENABLE_ASAN=On -DUMPIRE_ENABLE_SANITIZER_TESTS=On .. && \
     make -j 2 && \
     ctest -T test -E operation_tests --output-on-failure
 
@@ -48,7 +48,7 @@ ENV GTEST_COLOR=1
 COPY . /home/umpire/workspace
 WORKDIR /home/umpire/workspace/build
 RUN cmake -DUMPIRE_ENABLE_DEVELOPER_DEFAULTS=On -DCMAKE_CXX_COMPILER=g++ -DENABLE_CUDA=On -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=70 .. && \
-    make -j 16
+    make -j 8
 
 # TODO: switch to ROCM 6
 FROM ghcr.io/llnl/radiuss:hip-5.6.1-ubuntu-20.04 AS hip
diff --git a/Makefile b/Makefile
index 405126486..fabf48bae 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ else
 	DebugArgs=
 endif
 
-targets = asan clang10 clang11 clang12 clang13 gcc11 gcc7 gcc8 gcc9 hip hip.debug nvcc10 sycl umap_build
+targets = gcc clang umap_build asan cuda hip sycl intel 
 
 $(targets):
 	DOCKER_BUILDKIT=1 docker build --target $@ --no-cache $(DebugArgs) .
diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
index cdbe9c3c4..f16242a94 100644
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -88,64 +88,71 @@ blt_add_target_compile_flags(
   TO pool_stress_test
   FLAGS ${UMPIRE_DISABLE_DEPRECATED_WARNINGS_FLAG})
 
-if (UMPIRE_ENABLE_BENCHMARKS)
-  set (benchmark_depends gbenchmark umpire)
+set (benchmark_depends gbenchmark umpire)
 
-  if (UMPIRE_ENABLE_OPENMP_TARGET)
-    set (benchmark_depends
-      ${benchmark_depends}
-      openmp)
-  endif()
+if (UMPIRE_ENABLE_OPENMP_TARGET)
+  set (benchmark_depends
+    ${benchmark_depends}
+    openmp)
+endif()
 
-  blt_add_executable(
-    NAME allocator_benchmarks
-    SOURCES allocator_benchmarks.cpp
-    DEPENDS_ON ${benchmark_depends})
+blt_add_executable(
+  NAME allocator_benchmarks
+  SOURCES allocator_benchmarks.cpp
+  DEPENDS_ON ${benchmark_depends})
 
-  blt_add_benchmark(
-    NAME allocator_benchmarks
-    COMMAND allocator_benchmarks)
+blt_add_benchmark(
+  NAME allocator_benchmarks
+  COMMAND allocator_benchmarks)
 
-  blt_add_executable(
-    NAME vendor_allocator_benchmarks
-    SOURCES vendor_allocator_benchmarks.cpp
-    DEPENDS_ON ${benchmark_depends})
+blt_add_executable(
+  NAME vendor_allocator_benchmarks
+  SOURCES vendor_allocator_benchmarks.cpp
+  DEPENDS_ON ${benchmark_depends})
+
+blt_add_benchmark(
+  NAME vendor_allocator_benchmarks
+  COMMAND vendor_allocator_benchmarks)
+
+blt_add_executable(
+  NAME debuglog_benchmarks
+  SOURCES debuglog_benchmarks.cpp
+  DEPENDS_ON ${benchmark_depends})
 
-  blt_add_benchmark(
-    NAME vendor_allocator_benchmarks
-    COMMAND vendor_allocator_benchmarks)
+if (UMPIRE_ENABLE_OPENMP)
+  set (benchmark_depends
+    ${benchmark_depends}
+    openmp)
 
   blt_add_executable(
-    NAME debuglog_benchmarks
-    SOURCES debuglog_benchmarks.cpp
+    NAME file_resource_benchmarks
+    SOURCES file_resource_benchmarks.cpp
     DEPENDS_ON ${benchmark_depends})
+endif()
 
-  if (UMPIRE_ENABLE_OPENMP)
-    set (benchmark_depends
-      ${benchmark_depends}
-      openmp)
+blt_add_executable(
+  NAME copy_benchmarks
+  SOURCES copy_benchmarks.cpp
+  DEPENDS_ON ${benchmark_depends})
 
-    blt_add_executable(
-      NAME file_resource_benchmarks
-      SOURCES file_resource_benchmarks.cpp
-      DEPENDS_ON ${benchmark_depends})
-  endif()
+blt_add_benchmark(
+  NAME copy_benchmarks
+  COMMAND copy_benchmarks)
 
-  blt_add_executable(
-    NAME copy_benchmarks
-    SOURCES copy_benchmarks.cpp
-    DEPENDS_ON ${benchmark_depends})
+blt_add_executable(
+  NAME inspector_benchmarks
+  SOURCES inspector_benchmarks.cpp
+  DEPENDS_ON ${benchmark_depends})
 
-  blt_add_benchmark(
-    NAME copy_benchmarks
-    COMMAND copy_benchmarks)
+blt_add_benchmark(
+  NAME inspector_benchmarks
+  COMMAND inspector_benchmarks)
 
-  blt_add_executable(
-    NAME inspector_benchmarks
-    SOURCES inspector_benchmarks.cpp
-    DEPENDS_ON ${benchmark_depends})
+blt_add_executable(
+  NAME copy_performance_benchmark
+  SOURCES op/copy_performance_benchmark.cpp
+  DEPENDS_ON ${benchmark_depends})
 
-  blt_add_benchmark(
-    NAME inspector_benchmarks
-    COMMAND inspector_benchmarks)
-endif()
+blt_add_benchmark(
+  NAME copy_performance_benchmark
+  COMMAND copy_performance_benchmark)
diff --git a/benchmarks/op/copy_performance_benchmark.cpp b/benchmarks/op/copy_performance_benchmark.cpp
new file mode 100644
index 000000000..690c737f1
--- /dev/null
+++ b/benchmarks/op/copy_performance_benchmark.cpp
@@ -0,0 +1,365 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
+// project contributors. See the COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (MIT)
+//////////////////////////////////////////////////////////////////////////////
+
+#include <cstring>
+
+#include "benchmark/benchmark.h"
+
+#include "umpire/ResourceManager.hpp"
+#include "umpire/Allocator.hpp"
+#include "umpire/op.hpp"
+
+constexpr int MIN_SIZE = 64;      // 64 bytes
+constexpr int MAX_SIZE = 1048576; // 1 MB
+constexpr int MULTIPLIER = 2;
+
+//==============================================================================
+// Benchmark 1: Original ResourceManager copy (legacy approach)
+//==============================================================================
+
+static void BM_ResourceManager_Copy(benchmark::State& state, const std::string& src_name, const std::string& dst_name) {
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  auto src_allocator = rm.getAllocator(src_name);
+  auto dst_allocator = rm.getAllocator(dst_name);
+
+  const std::size_t size = state.range(0);
+
+  void* src_ptr = src_allocator.allocate(size);
+  void* dst_ptr = dst_allocator.allocate(size);
+
+  // Initialize source data
+  std::memset(src_ptr, 0xAA, size);
+
+  for (auto _ : state) {
+    // Original ResourceManager approach
+    rm.copy(src_ptr, dst_ptr, size);
+    benchmark::DoNotOptimize(dst_ptr);
+    benchmark::ClobberMemory();
+  }
+
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * static_cast<int64_t>(size));
+
+  src_allocator.deallocate(src_ptr);
+  dst_allocator.deallocate(dst_ptr);
+}
+
+//==============================================================================
+// Benchmark 2: Runtime-dispatch copy (new operation system v2)
+//==============================================================================
+
+static void BM_RuntimeDispatch_Copy(benchmark::State& state, const std::string& src_name, const std::string& dst_name) {
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  auto src_allocator = rm.getAllocator(src_name);
+  auto dst_allocator = rm.getAllocator(dst_name);
+
+  const std::size_t size = state.range(0);
+
+  void* src_ptr = src_allocator.allocate(size);
+  void* dst_ptr = dst_allocator.allocate(size);
+
+  // Initialize source data
+  std::memset(src_ptr, 0xBB, size);
+
+  for (auto _ : state) {
+    // Runtime dispatch - auto-detects platform from pointers
+    umpire::copy(src_ptr, dst_ptr, size);
+    benchmark::DoNotOptimize(dst_ptr);
+    benchmark::ClobberMemory();
+  }
+
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * static_cast<int64_t>(size));
+
+  src_allocator.deallocate(src_ptr);
+  dst_allocator.deallocate(dst_ptr);
+}
+
+//==============================================================================
+// Benchmark 3: Compile-time dispatch copy (zero-overhead direct calls)
+//==============================================================================
+
+// Host to Host
+static void BM_CompileTimeDispatch_Copy_Host_Host(benchmark::State& state) {
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  auto src_allocator = rm.getAllocator("HOST");
+  auto dst_allocator = rm.getAllocator("HOST");
+
+  const std::size_t size = state.range(0);
+
+  void* src_ptr = src_allocator.allocate(size);
+  void* dst_ptr = dst_allocator.allocate(size);
+
+  // Initialize source data
+  std::memset(src_ptr, 0xCC, size);
+
+  for (auto _ : state) {
+    // Compile-time dispatch - explicit platform specification
+    umpire::copy<umpire::resource::host_platform, umpire::resource::host_platform>(
+        static_cast<unsigned char*>(src_ptr),
+        static_cast<unsigned char*>(dst_ptr),
+        size);
+    benchmark::DoNotOptimize(dst_ptr);
+    benchmark::ClobberMemory();
+  }
+
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * static_cast<int64_t>(size));
+
+  src_allocator.deallocate(src_ptr);
+  dst_allocator.deallocate(dst_ptr);
+}
+
+#if defined(UMPIRE_ENABLE_CUDA)
+// Host to CUDA Device
+static void BM_CompileTimeDispatch_Copy_Host_Cuda(benchmark::State& state) {
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  auto src_allocator = rm.getAllocator("HOST");
+  auto dst_allocator = rm.getAllocator("DEVICE");
+
+  const std::size_t size = state.range(0);
+
+  void* src_ptr = src_allocator.allocate(size);
+  void* dst_ptr = dst_allocator.allocate(size);
+
+  // Initialize source data
+  std::memset(src_ptr, 0xDD, size);
+
+  for (auto _ : state) {
+    // Compile-time dispatch - explicit platform specification
+    umpire::copy<umpire::resource::host_platform, umpire::resource::cuda_platform>(
+        static_cast<unsigned char*>(src_ptr),
+        static_cast<unsigned char*>(dst_ptr),
+        size);
+    benchmark::DoNotOptimize(dst_ptr);
+    benchmark::ClobberMemory();
+  }
+
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * static_cast<int64_t>(size));
+
+  src_allocator.deallocate(src_ptr);
+  dst_allocator.deallocate(dst_ptr);
+}
+
+// CUDA Device to Host
+static void BM_CompileTimeDispatch_Copy_Cuda_Host(benchmark::State& state) {
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  auto src_allocator = rm.getAllocator("DEVICE");
+  auto dst_allocator = rm.getAllocator("HOST");
+
+  const std::size_t size = state.range(0);
+
+  void* src_ptr = src_allocator.allocate(size);
+  void* dst_ptr = dst_allocator.allocate(size);
+
+  // Initialize source data on device
+  unsigned char pattern = 0xEE;
+  umpire::memset(src_ptr, pattern, size);
+
+  for (auto _ : state) {
+    // Compile-time dispatch - explicit platform specification
+    umpire::copy<umpire::resource::cuda_platform, umpire::resource::host_platform>(
+        static_cast<unsigned char*>(src_ptr),
+        static_cast<unsigned char*>(dst_ptr),
+        size);
+    benchmark::DoNotOptimize(dst_ptr);
+    benchmark::ClobberMemory();
+  }
+
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * static_cast<int64_t>(size));
+
+  src_allocator.deallocate(src_ptr);
+  dst_allocator.deallocate(dst_ptr);
+}
+
+// CUDA Device to CUDA Device
+static void BM_CompileTimeDispatch_Copy_Cuda_Cuda(benchmark::State& state) {
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  auto src_allocator = rm.getAllocator("DEVICE");
+  auto dst_allocator = rm.getAllocator("DEVICE");
+
+  const std::size_t size = state.range(0);
+
+  void* src_ptr = src_allocator.allocate(size);
+  void* dst_ptr = dst_allocator.allocate(size);
+
+  // Initialize source data on device
+  unsigned char pattern = 0xFF;
+  umpire::memset(src_ptr, pattern, size);
+
+  for (auto _ : state) {
+    // Compile-time dispatch - explicit platform specification
+    umpire::copy<umpire::resource::cuda_platform, umpire::resource::cuda_platform>(
+        static_cast<unsigned char*>(src_ptr),
+        static_cast<unsigned char*>(dst_ptr),
+        size);
+    benchmark::DoNotOptimize(dst_ptr);
+    benchmark::ClobberMemory();
+  }
+
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * static_cast<int64_t>(size));
+
+  src_allocator.deallocate(src_ptr);
+  dst_allocator.deallocate(dst_ptr);
+}
+#endif // UMPIRE_ENABLE_CUDA
+
+#if defined(UMPIRE_ENABLE_HIP)
+// Host to HIP Device
+static void BM_CompileTimeDispatch_Copy_Host_Hip(benchmark::State& state) {
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  auto src_allocator = rm.getAllocator("HOST");
+  auto dst_allocator = rm.getAllocator("DEVICE");
+
+  const std::size_t size = state.range(0);
+
+  void* src_ptr = src_allocator.allocate(size);
+  void* dst_ptr = dst_allocator.allocate(size);
+
+  // Initialize source data
+  std::memset(src_ptr, 0x11, size);
+
+  for (auto _ : state) {
+    // Compile-time dispatch - explicit platform specification
+    umpire::copy<umpire::resource::host_platform, umpire::resource::hip_platform>(
+        static_cast<unsigned char*>(src_ptr),
+        static_cast<unsigned char*>(dst_ptr),
+        size);
+    benchmark::DoNotOptimize(dst_ptr);
+    benchmark::ClobberMemory();
+  }
+
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * static_cast<int64_t>(size));
+
+  src_allocator.deallocate(src_ptr);
+  dst_allocator.deallocate(dst_ptr);
+}
+
+// HIP Device to Host
+static void BM_CompileTimeDispatch_Copy_Hip_Host(benchmark::State& state) {
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  auto src_allocator = rm.getAllocator("DEVICE");
+  auto dst_allocator = rm.getAllocator("HOST");
+
+  const std::size_t size = state.range(0);
+
+  void* src_ptr = src_allocator.allocate(size);
+  void* dst_ptr = dst_allocator.allocate(size);
+
+  // Initialize source data on device
+  unsigned char pattern = 0x22;
+  umpire::memset(src_ptr, pattern, size);
+
+  for (auto _ : state) {
+    // Compile-time dispatch - explicit platform specification
+    umpire::copy<umpire::resource::hip_platform, umpire::resource::host_platform>(
+        static_cast<unsigned char*>(src_ptr),
+        static_cast<unsigned char*>(dst_ptr),
+        size);
+    benchmark::DoNotOptimize(dst_ptr);
+    benchmark::ClobberMemory();
+  }
+
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * static_cast<int64_t>(size));
+
+  src_allocator.deallocate(src_ptr);
+  dst_allocator.deallocate(dst_ptr);
+}
+#endif // UMPIRE_ENABLE_HIP
+
+//==============================================================================
+// Benchmark Registration
+//==============================================================================
+
+// Host to Host benchmarks - all three approaches
+BENCHMARK_CAPTURE(BM_ResourceManager_Copy, ResourceManager_Host_Host, std::string("HOST"), std::string("HOST"))
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK_CAPTURE(BM_RuntimeDispatch_Copy, RuntimeDispatch_Host_Host, std::string("HOST"), std::string("HOST"))
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_CompileTimeDispatch_Copy_Host_Host)
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+#if defined(UMPIRE_ENABLE_CUDA)
+// Host to CUDA Device benchmarks
+BENCHMARK_CAPTURE(BM_ResourceManager_Copy, ResourceManager_Host_Cuda, std::string("HOST"), std::string("DEVICE"))
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK_CAPTURE(BM_RuntimeDispatch_Copy, RuntimeDispatch_Host_Cuda, std::string("HOST"), std::string("DEVICE"))
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_CompileTimeDispatch_Copy_Host_Cuda)
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+// CUDA Device to Host benchmarks
+BENCHMARK_CAPTURE(BM_ResourceManager_Copy, ResourceManager_Cuda_Host, std::string("DEVICE"), std::string("HOST"))
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK_CAPTURE(BM_RuntimeDispatch_Copy, RuntimeDispatch_Cuda_Host, std::string("DEVICE"), std::string("HOST"))
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_CompileTimeDispatch_Copy_Cuda_Host)
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+// CUDA Device to CUDA Device benchmarks
+BENCHMARK_CAPTURE(BM_ResourceManager_Copy, ResourceManager_Cuda_Cuda, std::string("DEVICE"), std::string("DEVICE"))
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK_CAPTURE(BM_RuntimeDispatch_Copy, RuntimeDispatch_Cuda_Cuda, std::string("DEVICE"), std::string("DEVICE"))
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_CompileTimeDispatch_Copy_Cuda_Cuda)
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+#endif // UMPIRE_ENABLE_CUDA
+
+#if defined(UMPIRE_ENABLE_HIP)
+// Host to HIP Device benchmarks
+BENCHMARK_CAPTURE(BM_ResourceManager_Copy, ResourceManager_Host_Hip, std::string("HOST"), std::string("DEVICE"))
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK_CAPTURE(BM_RuntimeDispatch_Copy, RuntimeDispatch_Host_Hip, std::string("HOST"), std::string("DEVICE"))
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_CompileTimeDispatch_Copy_Host_Hip)
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+// HIP Device to Host benchmarks
+BENCHMARK_CAPTURE(BM_ResourceManager_Copy, ResourceManager_Hip_Host, std::string("DEVICE"), std::string("HOST"))
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK_CAPTURE(BM_RuntimeDispatch_Copy, RuntimeDispatch_Hip_Host, std::string("DEVICE"), std::string("HOST"))
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_CompileTimeDispatch_Copy_Hip_Host)
+    ->RangeMultiplier(MULTIPLIER)->Range(MIN_SIZE, MAX_SIZE)
+    ->Unit(benchmark::kMicrosecond);
+#endif // UMPIRE_ENABLE_HIP
+
+BENCHMARK_MAIN();
\ No newline at end of file
diff --git a/cmake/SetupUmpireOptions.cmake b/cmake/SetupUmpireOptions.cmake
index 725afbae1..1c7bc461e 100644
--- a/cmake/SetupUmpireOptions.cmake
+++ b/cmake/SetupUmpireOptions.cmake
@@ -25,6 +25,7 @@ option(UMPIRE_ENABLE_NUMA "Build Umpire with NUMA support" Off)
 option(UMPIRE_ENABLE_OPENMP_TARGET "Build Umpire with OPENMP target" Off)
 
 option(UMPIRE_ENABLE_LOGGING "Build Umpire with Logging enabled" On)
+option(UMPIRE_ENABLE_BOUNDS_CHECKS "Enable bounds checking in memory operations" On)
 option(UMPIRE_ENABLE_SLIC "Build Umpire with SLIC logging" Off)
 option(UMPIRE_ENABLE_BACKTRACE "Build Umpire with allocation backtrace enabled" Off)
 option(UMPIRE_ENABLE_BACKTRACE_SYMBOLS "Build Umpire with symbol support" Off)
@@ -39,6 +40,7 @@ option(UMPIRE_ENABLE_SANITIZER_TESTS "Enable address sanitizer tests" Off)
 option(UMPIRE_ENABLE_DEVICE_ALLOCATOR "Enable Device Allocator" Off)
 option(UMPIRE_ENABLE_SQLITE_EXPERIMENTAL "Build with sqlite event integration (experimental)" Off)
 option(UMPIRE_DISABLE_ALLOCATIONMAP_DEBUG "Disable verbose output from AllocationMap during debug builds" Off)
+option(UMPIRE_RM_USE_NEW_OPS "Enable new template-based memory operations" On)
 set(UMPIRE_FMT_TARGET fmt::fmt-header-only CACHE STRING "Name of fmt target to use") 
 
 if (UMPIRE_ENABLE_INACCESSIBILITY_TESTS)
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 2a03ddf2f..3c75fcff8 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -55,7 +55,9 @@ if (UMPIRE_ENABLE_HIP)
     NAME multi_device
     SOURCES multi_device.cpp
     DEPENDS_ON umpire blt::hip)
-
+  blt_add_target_compile_flags(
+    TO multi_device
+    FLAGS ${UMPIRE_DISABLE_DEPRECATED_WARNINGS_FLAG})
   list(APPEND umpire_examples multi_device)
 endif()
 
diff --git a/examples/cookbook/CMakeLists.txt b/examples/cookbook/CMakeLists.txt
index c4589bea5..9e90cb088 100644
--- a/examples/cookbook/CMakeLists.txt
+++ b/examples/cookbook/CMakeLists.txt
@@ -23,6 +23,9 @@ if (UMPIRE_ENABLE_NUMA)
     NAME recipe_move_between_numa
     SOURCES recipe_move_between_numa.cpp
     DEPENDS_ON ${cookbook_depends})
+  blt_add_target_compile_flags(
+    TO recipe_move_between_numa
+    FLAGS ${UMPIRE_DISABLE_DEPRECATED_WARNINGS_FLAG})
   list(APPEND umpire_cookbooks recipe_move_between_numa)
 endif ()
 
@@ -79,6 +82,9 @@ if (UMPIRE_ENABLE_CUDA)
     NAME recipe_move_to_managed
     SOURCES recipe_move_to_managed.cpp
     DEPENDS_ON ${cookbook_depends})
+  blt_add_target_compile_flags(
+    TO recipe_move_to_managed
+    FLAGS ${UMPIRE_DISABLE_DEPRECATED_WARNINGS_FLAG})
   list(APPEND umpire_cookbooks recipe_move_to_managed)
 
   blt_add_executable(
@@ -246,5 +252,4 @@ blt_add_executable(
 list(APPEND umpire_cookbooks recipe_named_allocation)
 
 install(TARGETS ${umpire_cookbooks} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
-
 umpire_add_code_checks(PREFIX cookbook)
diff --git a/examples/cookbook/recipe_device_ipc.cpp b/examples/cookbook/recipe_device_ipc.cpp
index e37ff8efc..d7ba997f3 100644
--- a/examples/cookbook/recipe_device_ipc.cpp
+++ b/examples/cookbook/recipe_device_ipc.cpp
@@ -12,6 +12,7 @@
 #include "umpire/Umpire.hpp"
 #include "umpire/strategy/DeviceIpcAllocator.hpp"
 #include "umpire/util/MemoryResourceTraits.hpp"
+#include "umpire/op.hpp"
 #if defined(UMPIRE_ENABLE_MPI)
 #include <mpi.h>
 #endif
@@ -41,7 +42,8 @@ int main(int argc, char** argv)
 
     // Allocate device memory - only rank 0 will physically allocate
     // All other ranks will import via IPC
-    const size_t size = 1024 * sizeof(float);
+    constexpr std::size_t num_elements = 1024;
+    const size_t size = num_elements * sizeof(float);
     float* data = static_cast<float*>(ipc_allocator.allocate(size));
 
     std::cout << "Rank " << rank << ": Got device memory at " << data << std::endl;
@@ -59,7 +61,7 @@ int main(int argc, char** argv)
       }
 
       // Copy to device
-      rm.copy(data, host_data, size);
+      umpire::copy(host_data, data, num_elements);
       host_allocator.deallocate(host_data);
     }
 
@@ -76,7 +78,7 @@ int main(int argc, char** argv)
     // All ranks can now access the data
     // Verify by copying a portion back to host
     float* value = static_cast<float*>(host_allocator.allocate(sizeof(float)));
-    rm.copy(value, data + 1, sizeof(float));
+    umpire::copy(data + 1, value, 1);
 
     std::cout << "Rank " << rank << ": second value is " << *value << std::endl;
 
diff --git a/examples/cookbook/recipe_move_between_numa.cpp b/examples/cookbook/recipe_move_between_numa.cpp
index 7282386fa..18729c8c9 100644
--- a/examples/cookbook/recipe_move_between_numa.cpp
+++ b/examples/cookbook/recipe_move_between_numa.cpp
@@ -12,6 +12,7 @@
 #include "umpire/util/Macros.hpp"
 #include "umpire/util/error.hpp"
 #include "umpire/util/numa.hpp"
+#include "umpire/op.hpp"
 
 #if defined(UMPIRE_ENABLE_CUDA)
 #include <cuda_runtime_api.h>
@@ -52,7 +53,7 @@ int main(int, char**)
     }
 
     // Touch it
-    rm.memset(dst_ptr, 0);
+    umpire::memset(dst_ptr, 0, alloc_size);
 
     // Verify NUMA node
     if (umpire::numa::get_location(dst_ptr) != host_nodes[1]) {
@@ -82,7 +83,7 @@ int main(int, char**)
 
     // Touch it -- this currently uses the host memset operation (thus, copying
     // the memory back)
-    rm.memset(dst_ptr, 0);
+    umpire::memset(dst_ptr, 0, alloc_size);
 
     // Verify NUMA node
     if (umpire::numa::get_location(dst_ptr) != device_nodes[0]) {
diff --git a/examples/multi_device.cpp b/examples/multi_device.cpp
index 9be296bdb..f39868c7e 100644
--- a/examples/multi_device.cpp
+++ b/examples/multi_device.cpp
@@ -8,6 +8,7 @@
 
 #include "umpire/ResourceManager.hpp"
 #include "umpire/strategy/QuickPool.hpp"
+#include "umpire/op.hpp"
 
 constexpr int BLOCK_SIZE = 256;
 constexpr int NUM_THREADS = 4096;
@@ -67,7 +68,7 @@ int main(int, char**)
   }
 #endif
 
-  rm.copy(b, a);
+  umpire::copy(a, b, NUM_THREADS);
   b = static_cast<double*>(rm.move(b, rm.getAllocator("HOST")));
 
   UMPIRE_ASSERT(b[BLOCK_SIZE] == (BLOCK_SIZE * MULTIPLE) && "Error: incorrect value!");
diff --git a/examples/tutorial/CMakeLists.txt b/examples/tutorial/CMakeLists.txt
index 2a19df0f0..198c48336 100644
--- a/examples/tutorial/CMakeLists.txt
+++ b/examples/tutorial/CMakeLists.txt
@@ -88,4 +88,8 @@ endif ()
 
 install(TARGETS ${umpire_tutorials} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 
+blt_add_target_compile_flags(
+  TO tut_move
+  FLAGS ${UMPIRE_DISABLE_DEPRECATED_WARNINGS_FLAG})
+
 umpire_add_code_checks(PREFIX tutorial)
diff --git a/examples/tutorial/tut_copy.cpp b/examples/tutorial/tut_copy.cpp
index ac9e79745..e96d60b74 100644
--- a/examples/tutorial/tut_copy.cpp
+++ b/examples/tutorial/tut_copy.cpp
@@ -6,6 +6,7 @@
 //////////////////////////////////////////////////////////////////////////////
 #include "umpire/Allocator.hpp"
 #include "umpire/ResourceManager.hpp"
+#include "umpire/op.hpp"
 
 void copy_data(double* source_data, std::size_t size, const std::string& destination)
 {
@@ -15,7 +16,7 @@ void copy_data(double* source_data, std::size_t size, const std::string& destina
   double* dest_data = static_cast<double*>(dest_allocator.allocate(size * sizeof(double)));
 
   // _sphinx_tag_tut_copy_start
-  rm.copy(dest_data, source_data);
+  umpire::copy(source_data, dest_data, size);
   // _sphinx_tag_tut_copy_end
 
   std::cout << "Copied source data (" << source_data << ") to destination " << destination << " (" << dest_data << ")"
diff --git a/examples/tutorial/tut_memset.cpp b/examples/tutorial/tut_memset.cpp
index be62c7d77..4aae072a6 100644
--- a/examples/tutorial/tut_memset.cpp
+++ b/examples/tutorial/tut_memset.cpp
@@ -6,6 +6,7 @@
 //////////////////////////////////////////////////////////////////////////////
 #include "umpire/Allocator.hpp"
 #include "umpire/ResourceManager.hpp"
+#include "umpire/op.hpp"
 
 int main(int, char**)
 {
@@ -37,7 +38,7 @@ int main(int, char**)
               << std::endl;
 
     // _sphinx_tag_tut_memset_start
-    rm.memset(data, 0);
+    umpire::memset(data, 0, SIZE * sizeof(double));
     // _sphinx_tag_tut_memset_end
 
     std::cout << "Set data from " << destination << " (" << data << ") to 0." << std::endl;
diff --git a/examples/tutorial/tut_reallocate.cpp b/examples/tutorial/tut_reallocate.cpp
index ab5fe3aac..93d1e244a 100644
--- a/examples/tutorial/tut_reallocate.cpp
+++ b/examples/tutorial/tut_reallocate.cpp
@@ -6,6 +6,7 @@
 //////////////////////////////////////////////////////////////////////////////
 #include "umpire/Allocator.hpp"
 #include "umpire/ResourceManager.hpp"
+#include "umpire/op.hpp"
 
 int main(int, char**)
 {
@@ -40,7 +41,7 @@ int main(int, char**)
     std::cout << "Reallocating data (" << data << ") to size " << REALLOCATED_SIZE << "...";
 
     // _sphinx_tag_tut_realloc_start
-    data = static_cast<double*>(rm.reallocate(data, REALLOCATED_SIZE));
+    data = umpire::reallocate(&data, REALLOCATED_SIZE);
     // _sphinx_tag_tut_realloc_end
 
     std::cout << "done.  Reallocated data (" << data << ")" << std::endl;
diff --git a/include/umpire/op.hpp b/include/umpire/op.hpp
new file mode 100644
index 000000000..4924ebf1c
--- /dev/null
+++ b/include/umpire/op.hpp
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "umpire/config.hpp"
+#include "umpire/op/host.hpp"
+#include "umpire/op/operations.hpp"
+#if defined(UMPIRE_ENABLE_CUDA)
+#include "umpire/op/cuda.hpp"
+#endif
+#if defined(UMPIRE_ENABLE_HIP)
+#include "umpire/op/hip.hpp"
+#endif
+#if defined(UMPIRE_ENABLE_SYCL)
+#include "umpire/op/sycl.hpp"
+#endif
+#if defined(UMPIRE_ENABLE_OPENMP_TARGET)
+#include "umpire/op/openmp_target.hpp"
+#endif
+
+#include "umpire/op/dispatch.hpp"
diff --git a/include/umpire/op/cuda.hpp b/include/umpire/op/cuda.hpp
new file mode 100644
index 000000000..5dfc8cc87
--- /dev/null
+++ b/include/umpire/op/cuda.hpp
@@ -0,0 +1,478 @@
+#pragma once
+
+#include <type_traits>
+
+#include "umpire/op/detail/utils.hpp"
+#include "umpire/op/operations.hpp"
+#include "umpire/resource/platform.hpp"
+#include "umpire/util/Platform.hpp"
+#include "umpire/util/error.hpp"
+
+namespace umpire {
+namespace op {
+
+// CUDA implementation helpers
+namespace detail {
+
+/**
+ * @brief Get the CUDA memory copy direction kind
+ *
+ * @tparam SRC Source platform
+ * @tparam DST Destination platform
+ */
+template <typename SRC, typename DST>
+struct copy_kind;
+
+// Device to host specialization
+template <>
+struct copy_kind<resource::cuda_platform, resource::host_platform> {
+  static constexpr cudaMemcpyKind value = cudaMemcpyDeviceToHost;
+};
+
+// Host to device specialization
+template <>
+struct copy_kind<resource::host_platform, resource::cuda_platform> {
+  static constexpr cudaMemcpyKind value = cudaMemcpyHostToDevice;
+};
+
+// Device to device specialization
+template <>
+struct copy_kind<resource::cuda_platform, resource::cuda_platform> {
+  static constexpr cudaMemcpyKind value = cudaMemcpyDeviceToDevice;
+};
+
+/**
+ * @brief Check if a CUDA device supports managed memory features
+ *
+ * @param device Device ID to check
+ * @return true if the device supports managed memory
+ * @return false if the device does not support managed memory
+ */
+inline bool supports_managed_memory(int device)
+{
+  cudaDeviceProp properties;
+  cudaError_t error = ::cudaGetDeviceProperties(&properties, device);
+
+  if (error != cudaSuccess) {
+    UMPIRE_ERROR(runtime_error, fmt::format("cudaGetDeviceProperties for device {} failed with error: {}", device,
+                                            cudaGetErrorString(error)));
+  }
+
+  return (properties.managedMemory == 1 && properties.concurrentManagedAccess == 1);
+}
+
+/**
+ * @brief Get CUDA stream from a resource
+ *
+ * @param resource The resource to get the stream from
+ * @return cudaStream_t The CUDA stream
+ */
+inline cudaStream_t get_stream(camp::resources::Resource& resource)
+{
+  auto cuda_resource = resource.try_get<camp::resources::Cuda>();
+  if (!cuda_resource) {
+    UMPIRE_ERROR(resource_error, fmt::format("Expected resources::Cuda, got resources::{}",
+                                             platform_to_string(resource.get_platform())));
+  }
+  return cuda_resource->get_stream();
+}
+
+/**
+ * @brief Apply memory advice to a CUDA managed memory allocation
+ *
+ * @tparam T Type of memory
+ * @param ptr Pointer to memory
+ * @param count Number of elements
+ * @param device Device ID for advice
+ * @param advice Memory advice to apply
+ */
+template <typename T>
+inline void advise(T* ptr, std::size_t count, int device, cudaMemoryAdvise advice)
+{
+  // Skip if device doesn't support managed memory
+  if (!supports_managed_memory(device))
+    return;
+
+  std::size_t size = detail::get_size<T>(count);
+  cudaError_t error = ::cudaMemAdvise(ptr, size, advice, device);
+
+  if (error != cudaSuccess) {
+    UMPIRE_ERROR(runtime_error,
+                 fmt::format("cudaMemAdvise(ptr={}, size={}, advice={}, device={}) failed with error: {}", ptr, size,
+                             static_cast<int>(advice), device, cudaGetErrorString(error)));
+  }
+}
+
+/**
+ * @brief Synchronous memory copy implementation
+ *
+ * @tparam T Type of memory
+ * @param src Source pointer
+ * @param dst Destination pointer
+ * @param count Number of elements
+ * @param kind Copy direction kind
+ */
+template <typename T>
+inline void copy(T* src, T* dst, std::size_t count, cudaMemcpyKind kind)
+{
+  std::size_t size = detail::get_size<T>(count);
+
+  cudaError_t error = ::cudaMemcpy(dst, src, size, kind);
+  if (error != cudaSuccess) {
+    UMPIRE_ERROR(runtime_error, fmt::format("cudaMemcpy(dst={}, src={}, size={}, kind={}) failed with error: {}", reinterpret_cast<void*>(dst),
+                                            reinterpret_cast<void*>(src), size, static_cast<int>(kind), cudaGetErrorString(error)));
+  }
+}
+
+/**
+ * @brief Asynchronous memory copy implementation
+ *
+ * @tparam T Type of memory
+ * @param src Source pointer
+ * @param dst Destination pointer
+ * @param count Number of elements
+ * @param resource Resource for asynchronous operation
+ * @param kind Copy direction kind
+ * @return Event representing the asynchronous operation
+ */
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> copy_async(T* src, T* dst, std::size_t count,
+                                                                         camp::resources::Resource& resource,
+                                                                         cudaMemcpyKind kind)
+{
+  auto stream = get_stream(resource);
+  std::size_t size = detail::get_size<T>(count);
+
+  cudaError_t error = ::cudaMemcpyAsync(dst, src, size, kind, stream);
+  if (error != cudaSuccess) {
+    UMPIRE_ERROR(runtime_error,
+                 fmt::format("cudaMemcpyAsync(dst={}, src={}, size={}, kind={}, stream={}) failed with error: {}", dst,
+                             src, size, static_cast<int>(kind), static_cast<void*>(stream), cudaGetErrorString(error)));
+  }
+
+  return camp::resources::EventProxy<camp::resources::Resource>{resource};
+}
+
+/**
+ * @brief Synchronous memory set implementation
+ *
+ * @tparam T Type of memory
+ * @param ptr Pointer to memory
+ * @param value Value to set
+ * @param count Number of elements
+ */
+template <typename T>
+inline void memset(T* ptr, int value, std::size_t count)
+{
+  std::size_t size = detail::get_size<T>(count);
+
+  cudaError_t error = ::cudaMemset(ptr, value, size);
+  if (error != cudaSuccess) {
+    UMPIRE_ERROR(runtime_error, fmt::format("cudaMemset(ptr={}, value={}, size={}) failed with error: {}", reinterpret_cast<void*>(ptr), value,
+                                            size, cudaGetErrorString(error)));
+  }
+}
+
+/**
+ * @brief Asynchronous memory set implementation
+ *
+ * @tparam T Type of memory
+ * @param ptr Pointer to memory
+ * @param value Value to set
+ * @param count Number of elements
+ * @param resource Resource for asynchronous operation
+ * @return Event representing the asynchronous operation
+ */
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> memset_async(T* ptr, int value, std::size_t count,
+                                                                           camp::resources::Resource& resource)
+{
+  auto stream = get_stream(resource);
+  std::size_t size = detail::get_size<T>(count);
+
+  cudaError_t error = ::cudaMemsetAsync(ptr, value, size, stream);
+  if (error != cudaSuccess) {
+    UMPIRE_ERROR(runtime_error,
+                 fmt::format("cudaMemsetAsync(ptr={}, value={}, size={}, stream={}) failed with error: {}", ptr, value,
+                             size, static_cast<void*>(stream), cudaGetErrorString(error)));
+  }
+
+  return camp::resources::EventProxy<camp::resources::Resource>{resource};
+}
+
+/**
+ * @brief Synchronous memory prefetch implementation
+ *
+ * @tparam T Type of memory
+ * @param ptr Pointer to memory
+ * @param device Device to prefetch to
+ * @param count Number of elements
+ */
+template <typename T>
+inline void prefetch(T* ptr, int device, std::size_t count)
+{
+  // Use current device for properties if device is CPU
+  int current_device;
+  cudaGetDevice(&current_device);
+  int gpu = (device != cudaCpuDeviceId) ? device : current_device;
+
+  if (supports_managed_memory(gpu)) {
+    std::size_t size = detail::get_size<T>(count);
+    cudaError_t error = ::cudaMemPrefetchAsync(ptr, size, device, nullptr);
+
+    if (error != cudaSuccess) {
+      UMPIRE_ERROR(runtime_error, fmt::format("cudaMemPrefetchAsync(ptr={}, size={}, device={}) failed with error: {}",
+                                              reinterpret_cast<void*>(ptr), size, device, cudaGetErrorString(error)));
+    }
+  }
+}
+
+/**
+ * @brief Asynchronous memory prefetch implementation
+ *
+ * @tparam T Type of memory
+ * @param ptr Pointer to memory
+ * @param device Device to prefetch to
+ * @param count Number of elements
+ * @param resource Resource for asynchronous operation
+ * @return Event representing the asynchronous operation
+ */
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> prefetch_async(T* ptr, int device, std::size_t count,
+                                                                             camp::resources::Resource& resource)
+{
+  auto stream = get_stream(resource);
+
+  // Use current device for properties if device is CPU
+  int current_device;
+  cudaGetDevice(&current_device);
+  int gpu = (device != cudaCpuDeviceId) ? device : current_device;
+
+  if (supports_managed_memory(gpu)) {
+    std::size_t size = detail::get_size<T>(count);
+    cudaError_t error = ::cudaMemPrefetchAsync(ptr, size, device, stream);
+
+    if (error != cudaSuccess) {
+      UMPIRE_ERROR(runtime_error,
+                   fmt::format("cudaMemPrefetchAsync(ptr={}, size={}, device={}, stream={}) failed with error: {}", ptr,
+                               size, device, static_cast<void*>(stream), cudaGetErrorString(error)));
+    }
+  }
+
+  return camp::resources::EventProxy<camp::resources::Resource>{resource};
+}
+
+} // namespace detail
+
+//------------------------------------------------------------------------------
+// CUDA Operation Template Specializations
+//------------------------------------------------------------------------------
+
+// CUDA-to-CUDA copy operation
+template <>
+struct copy<resource::cuda_platform, resource::cuda_platform> {
+  /**
+   * @brief CUDA to CUDA synchronous copy
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   */
+  template <typename T>
+  static void exec(T* src, T* dst, std::size_t len) noexcept
+  {
+    detail::copy(src, dst, len, detail::copy_kind<resource::cuda_platform, resource::cuda_platform>::value);
+  }
+
+  /**
+   * @brief CUDA to CUDA asynchronous copy
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   * @param resource Resource for asynchronous operation
+   * @return Event representing the asynchronous operation
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* src, T* dst, std::size_t len,
+                                                                     camp::resources::Resource& resource)
+  {
+    return detail::copy_async(src, dst, len, resource,
+                              detail::copy_kind<resource::cuda_platform, resource::cuda_platform>::value);
+  }
+};
+
+// CUDA-to-Host copy operation
+template <>
+struct copy<resource::cuda_platform, resource::host_platform> {
+  /**
+   * @brief CUDA to Host synchronous copy
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   */
+  template <typename T>
+  static void exec(T* src, T* dst, std::size_t len) noexcept
+  {
+    detail::copy(src, dst, len, detail::copy_kind<resource::cuda_platform, resource::host_platform>::value);
+  }
+
+  /**
+   * @brief CUDA to Host asynchronous copy
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   * @param resource Resource for asynchronous operation
+   * @return Event representing the asynchronous operation
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* src, T* dst, std::size_t len,
+                                                                     camp::resources::Resource& resource)
+  {
+    return detail::copy_async(src, dst, len, resource,
+                              detail::copy_kind<resource::cuda_platform, resource::host_platform>::value);
+  }
+};
+
+// Host-to-CUDA copy operation
+template <>
+struct copy<resource::host_platform, resource::cuda_platform> {
+  /**
+   * @brief Host to CUDA synchronous copy
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   */
+  template <typename T>
+  static void exec(T* src, T* dst, std::size_t len) noexcept
+  {
+    detail::copy(src, dst, len, detail::copy_kind<resource::host_platform, resource::cuda_platform>::value);
+  }
+
+  /**
+   * @brief Host to CUDA asynchronous copy
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   * @param resource Resource for asynchronous operation
+   * @return Event representing the asynchronous operation
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* src, T* dst, std::size_t len,
+                                                                     camp::resources::Resource& resource)
+  {
+    return detail::copy_async(src, dst, len, resource,
+                              detail::copy_kind<resource::host_platform, resource::cuda_platform>::value);
+  }
+};
+
+// CUDA memset operation
+template <>
+struct memset<resource::cuda_platform> {
+  /**
+   * @brief CUDA synchronous memset
+   *
+   * @tparam T Type of memory being set
+   * @param ptr Pointer to memory
+   * @param val Value to set
+   * @param len Number of elements to set
+   */
+  template <typename T>
+  static void exec(T* ptr, int val, std::size_t len) noexcept
+  {
+    detail::memset(ptr, val, len);
+  }
+
+  /**
+   * @brief CUDA asynchronous memset
+   *
+   * @tparam T Type of memory being set
+   * @param ptr Pointer to memory
+   * @param val Value to set
+   * @param len Number of elements to set
+   * @param resource Resource for asynchronous operation
+   * @return Event representing the asynchronous operation
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* ptr, int val, std::size_t len,
+                                                                     camp::resources::Resource& resource)
+  {
+    return detail::memset_async(ptr, val, len, resource);
+  }
+};
+
+// CUDA prefetch operation
+template <>
+struct prefetch<resource::cuda_platform> {
+  /**
+   * @brief CUDA synchronous prefetch
+   *
+   * @tparam T Type of memory being prefetched
+   * @param ptr Pointer to memory
+   * @param device Device to prefetch to
+   * @param len Number of elements to prefetch
+   */
+  template <typename T>
+  static void exec(T* ptr, int device, std::size_t len) noexcept
+  {
+    detail::prefetch(ptr, device, len);
+  }
+
+  /**
+   * @brief CUDA asynchronous prefetch
+   *
+   * @tparam T Type of memory being prefetched
+   * @param ptr Pointer to memory
+   * @param device Device to prefetch to
+   * @param len Number of elements to prefetch
+   * @param resource Resource for asynchronous operation
+   * @return Event representing the asynchronous operation
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* ptr, int device, std::size_t len,
+                                                                     camp::resources::Resource& resource)
+  {
+    return detail::prefetch_async(ptr, device, len, resource);
+  }
+};
+
+// Memory advice operations define macro to reduce duplication
+#define DEFINE_CUDA_ADVICE_OP(op_name, advice_flag)                       \
+  template <>                                                             \
+  struct op_name<resource::cuda_platform> {                               \
+    /**                                                                   \
+     * @brief Apply memory advice operation                               \
+     *                                                                    \
+     * @tparam T Type of memory                                           \
+     * @param ptr Pointer to memory                                       \
+     * @param device Device to apply advice for                           \
+     * @param len Number of elements                                      \
+     */                                                                   \
+    template <typename T>                                                 \
+    static inline void exec(T* ptr, int device, std::size_t len) noexcept \
+    {                                                                     \
+      detail::advise(ptr, len, device, advice_flag);                      \
+    }                                                                     \
+  };
+
+DEFINE_CUDA_ADVICE_OP(set_accessed_by, cudaMemAdviseSetAccessedBy)
+DEFINE_CUDA_ADVICE_OP(set_preferred_location, cudaMemAdviseSetPreferredLocation)
+DEFINE_CUDA_ADVICE_OP(set_read_mostly, cudaMemAdviseSetReadMostly)
+DEFINE_CUDA_ADVICE_OP(unset_accessed_by, cudaMemAdviseUnsetAccessedBy)
+DEFINE_CUDA_ADVICE_OP(unset_preferred_location, cudaMemAdviseUnsetPreferredLocation)
+DEFINE_CUDA_ADVICE_OP(unset_read_mostly, cudaMemAdviseUnsetReadMostly)
+
+#undef DEFINE_CUDA_ADVICE_OP
+
+} // namespace op
+} // namespace umpire
diff --git a/include/umpire/op/detail/traits.hpp b/include/umpire/op/detail/traits.hpp
new file mode 100644
index 000000000..6d8c24328
--- /dev/null
+++ b/include/umpire/op/detail/traits.hpp
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <type_traits>
+
+#include "umpire/config.hpp"
+
+namespace umpire {
+namespace op {
+namespace detail {
+
+template <typename Platform>
+struct supports_memory_advice : std::false_type {};
+
+#if defined(UMPIRE_ENABLE_CUDA)
+template <>
+struct supports_memory_advice<resource::cuda_platform> : std::true_type {};
+#endif
+
+#if defined(UMPIRE_ENABLE_HIP)
+template <>
+struct supports_memory_advice<resource::hip_platform> : std::true_type {};
+#endif
+
+#if defined(UMPIRE_ENABLE_SYCL)
+template <>
+struct supports_memory_advice<resource::sycl_platform> : std::true_type {};
+#endif
+
+#if defined(UMPIRE_ENABLE_OPENMP_TARGET)
+template <>
+struct supports_memory_advice<resource::openmp_target_platform> : std::true_type {};
+#endif
+
+} // namespace detail
+} // namespace op
+} // namespace umpire
diff --git a/include/umpire/op/detail/utils.hpp b/include/umpire/op/detail/utils.hpp
new file mode 100644
index 000000000..81f34ed6a
--- /dev/null
+++ b/include/umpire/op/detail/utils.hpp
@@ -0,0 +1,55 @@
+#pragma once
+
+#include <cstddef>
+#include <type_traits>
+#include "camp/resource.hpp"
+
+namespace umpire {
+namespace op {
+namespace detail {
+
+/**
+ * @brief Calculate size in bytes based on element count and type
+ * 
+ * @tparam T The pointer type (void* or typed pointer)
+ * @param count Number of elements or bytes (if T is void)
+ * @return std::size_t Size in bytes
+ */
+template <typename T>
+inline std::size_t get_size(std::size_t count) noexcept
+{
+  if constexpr (std::is_same_v<T, void>)
+    return count;
+  else
+    return count * sizeof(T);
+}
+
+/**
+ * @brief Create a default event for platforms without native async support
+ * 
+ * @param resource The resource to create the event for
+ * @return camp::resources::EventProxy<camp::resources::Resource> A completed event
+ */
+inline camp::resources::EventProxy<camp::resources::Resource> 
+make_completed_event(camp::resources::Resource& resource) noexcept
+{
+  return camp::resources::EventProxy<camp::resources::Resource>{resource};
+}
+
+/**
+ * @brief Get minimum of two values (used for copy size calculations)
+ * 
+ * @tparam T The type of values being compared
+ * @param a First value
+ * @param b Second value
+ * @return constexpr T The smaller of the two values
+ */
+template <typename T>
+constexpr T min(const T& a, const T& b) noexcept
+{
+  return (a < b) ? a : b;
+}
+
+} // namespace detail
+} // namespace op
+} // namespace umpire
diff --git a/include/umpire/op/dispatch.hpp b/include/umpire/op/dispatch.hpp
new file mode 100644
index 000000000..771a0a2a4
--- /dev/null
+++ b/include/umpire/op/dispatch.hpp
@@ -0,0 +1,783 @@
+#pragma once
+
+#include <type_traits>
+
+#include "umpire/ResourceManager.hpp"
+#include "umpire/config.hpp"
+#include "umpire/op/detail/traits.hpp"
+#include "umpire/resource/platform.hpp"
+
+namespace umpire {
+namespace op {
+namespace detail {
+
+/**
+ * @brief Dispatch an operation to the appropriate platform implementation
+ *
+ * @tparam Op The operation template to dispatch
+ * @tparam Args Argument types for the operation
+ * @param platform The platform to dispatch to
+ * @param args Arguments for the operation
+ * @return Result of the operation
+ */
+template <template <typename...> class Op, typename... Args>
+inline auto dispatch(camp::resources::Platform platform, Args&&... args)
+{
+  switch (platform) {
+    case camp::resources::Platform::host:
+      return Op<resource::host_platform>::exec(std::forward<Args>(args)...);
+#if defined(UMPIRE_ENABLE_CUDA)
+    case camp::resources::Platform::cuda:
+      return Op<resource::cuda_platform>::exec(std::forward<Args>(args)...);
+#endif
+#if defined(UMPIRE_ENABLE_HIP)
+    case camp::resources::Platform::hip:
+      return Op<resource::hip_platform>::exec(std::forward<Args>(args)...);
+#endif
+#if defined(UMPIRE_ENABLE_SYCL)
+    case camp::resources::Platform::sycl:
+      return Op<resource::sycl_platform>::exec(std::forward<Args>(args)...);
+#endif
+#if defined(UMPIRE_ENABLE_OPENMP_TARGET)
+    case camp::resources::Platform::omp_target:
+      return Op<resource::openmp_target_platform>::exec(std::forward<Args>(args)...);
+#endif
+    default:
+      UMPIRE_ERROR(runtime_error, "Unknown platform for operation");
+  }
+}
+
+/**
+ * @brief Dispatch an operation between two platforms (same or different)
+ *
+ * @tparam Op The operation template to dispatch
+ * @tparam Args Argument types for the operation
+ * @param src_platform The source platform
+ * @param dst_platform The destination platform
+ * @param args Arguments for the operation
+ * @return Result of the operation
+ */
+template <template <typename...> class Op, typename... Args>
+inline auto dispatch(camp::resources::Platform src_platform, camp::resources::Platform dst_platform, Args&&... args)
+{
+  // Same-platform operations
+  if (src_platform == dst_platform) {
+    switch (src_platform) {
+      case camp::resources::Platform::host:
+        return Op<resource::host_platform, resource::host_platform>::exec(std::forward<Args>(args)...);
+#if defined(UMPIRE_ENABLE_CUDA)
+      case camp::resources::Platform::cuda:
+        return Op<resource::cuda_platform, resource::cuda_platform>::exec(std::forward<Args>(args)...);
+#endif
+#if defined(UMPIRE_ENABLE_HIP)
+      case camp::resources::Platform::hip:
+        return Op<resource::hip_platform, resource::hip_platform>::exec(std::forward<Args>(args)...);
+#endif
+#if defined(UMPIRE_ENABLE_SYCL)
+      case camp::resources::Platform::sycl:
+        return Op<resource::sycl_platform, resource::sycl_platform>::exec(std::forward<Args>(args)...);
+#endif
+#if defined(UMPIRE_ENABLE_OPENMP_TARGET)
+      case camp::resources::Platform::omp_target:
+        return Op<resource::openmp_target_platform, resource::openmp_target_platform>::exec(
+            std::forward<Args>(args)...);
+#endif
+      default:
+        UMPIRE_ERROR(runtime_error, "Unknown platform for same-platform operation");
+    }
+  }
+
+  // Cross-platform operations
+#if defined(UMPIRE_ENABLE_CUDA)
+  if (src_platform == camp::resources::Platform::host && dst_platform == camp::resources::Platform::cuda) {
+    return Op<resource::host_platform, resource::cuda_platform>::exec(std::forward<Args>(args)...);
+  }
+  if (src_platform == camp::resources::Platform::cuda && dst_platform == camp::resources::Platform::host) {
+    return Op<resource::cuda_platform, resource::host_platform>::exec(std::forward<Args>(args)...);
+  }
+#endif
+
+#if defined(UMPIRE_ENABLE_HIP)
+  if (src_platform == camp::resources::Platform::host && dst_platform == camp::resources::Platform::hip) {
+    return Op<resource::host_platform, resource::hip_platform>::exec(std::forward<Args>(args)...);
+  }
+  if (src_platform == camp::resources::Platform::hip && dst_platform == camp::resources::Platform::host) {
+    return Op<resource::hip_platform, resource::host_platform>::exec(std::forward<Args>(args)...);
+  }
+#endif
+
+#if defined(UMPIRE_ENABLE_SYCL)
+  if (src_platform == camp::resources::Platform::host && dst_platform == camp::resources::Platform::sycl) {
+    return Op<resource::host_platform, resource::sycl_platform>::exec(std::forward<Args>(args)...);
+  }
+  if (src_platform == camp::resources::Platform::sycl && dst_platform == camp::resources::Platform::host) {
+    return Op<resource::sycl_platform, resource::host_platform>::exec(std::forward<Args>(args)...);
+  }
+#endif
+
+#if defined(UMPIRE_ENABLE_OPENMP_TARGET)
+  if (src_platform == camp::resources::Platform::host && dst_platform == camp::resources::Platform::omp_target) {
+    return Op<resource::host_platform, resource::openmp_target_platform>::exec(std::forward<Args>(args)...);
+  }
+  if (src_platform == camp::resources::Platform::omp_target && dst_platform == camp::resources::Platform::host) {
+    return Op<resource::openmp_target_platform, resource::host_platform>::exec(std::forward<Args>(args)...);
+  }
+#endif
+
+  UMPIRE_ERROR(runtime_error, "Unsupported platform combination");
+}
+
+template <typename T>
+constexpr auto decay_ptr(T* ptr)
+{
+  if constexpr (std::is_pointer_v<T>) {
+    return *ptr;
+  } else {
+    return ptr;
+  }
+}
+} // namespace detail
+
+// Base template for op_caller with helper functions for argument handling
+template <template <typename...> class Op>
+struct op_caller {
+  // Get the last argument from a parameter pack
+  template <typename... Args>
+  static auto get_last_arg(Args... args)
+  {
+    return std::get<sizeof...(Args) - 1>(std::forward_as_tuple(args...));
+  }
+
+  // Get the Nth argument from a parameter pack
+  template <size_t N, typename... Args>
+  static auto get_arg(Args... args)
+  {
+    return std::get<N>(std::forward_as_tuple(args...));
+  }
+
+#ifdef UMPIRE_ENABLE_BOUNDS_CHECKS
+  // Boundary check for memset operations
+  template <typename T, typename... Args>
+  static void check_memset_bounds(T* src, const util::AllocationRecord* record, std::size_t length)
+  {
+    std::ptrdiff_t offset = reinterpret_cast<const char*>(src) - reinterpret_cast<const char*>(record->ptr);
+    std::size_t size = record->size - offset;
+
+    if (length > 0 && length > size) {
+      UMPIRE_ERROR(runtime_error, fmt::format("Cannot memset over the end of allocation: {} -> {}", length, size));
+    }
+  }
+
+  // Boundary check for copy operations
+  template <typename T, typename... Args>
+  static void check_copy_bounds(T* src, T* dst, const util::AllocationRecord* src_record,
+                                const util::AllocationRecord* dst_record, std::size_t size)
+  {
+    // Calculate source and destination details
+    std::ptrdiff_t src_offset = reinterpret_cast<const char*>(src) - reinterpret_cast<const char*>(src_record->ptr);
+    std::size_t src_size = src_record->size - src_offset;
+
+    std::ptrdiff_t dst_offset = reinterpret_cast<const char*>(dst) - reinterpret_cast<const char*>(dst_record->ptr);
+    std::size_t dst_size = dst_record->size - dst_offset;
+
+    // If size is 0, use the source size
+    if (size == 0) {
+      size = src_size;
+    }
+
+    // Check if destination has enough space
+    if (size > dst_size) {
+      UMPIRE_ERROR(runtime_error,
+                   fmt::format("Not enough space in destination to copy {} bytes into {} bytes", size, dst_size));
+    }
+  }
+#endif // UMPIRE_ENABLE_BOUNDS_CHECKS
+
+  // Single-pointer operations (synchronous)
+  template <typename T, typename... Args>
+  inline static auto exec(T* src, Args... args)
+  {
+    auto& rm = ResourceManager::getInstance();
+    auto& allocation_map = rm.m_allocations;
+    auto src_record = allocation_map.find(detail::decay_ptr(src));
+    auto p = src_record->strategy->getPlatform();
+
+    // Operation-specific handling
+    if constexpr (std::is_same_v<Op<resource::host_platform>, memset<resource::host_platform>>) {
+      // For memset, we expect args to be {value, size}
+#ifdef UMPIRE_ENABLE_BOUNDS_CHECKS
+      std::size_t length = get_arg<1>(args...);
+      check_memset_bounds(src, src_record, length);
+#endif
+    }
+
+    // Dispatch based on platform
+    return detail::dispatch<Op>(p, src, args...);
+  }
+
+  // Single-pointer operations (asynchronous)
+  template <typename T, typename... Args>
+  inline static auto exec(T* src, Args... args, camp::resources::Resource& ctx)
+  {
+    auto& rm = ResourceManager::getInstance();
+    auto& allocation_map = rm.m_allocations;
+    auto src_record = allocation_map.find(detail::decay_ptr(src));
+    auto p = src_record->strategy->getPlatform();
+
+    // Operation-specific handling
+    if constexpr (std::is_same_v<Op<resource::host_platform>, memset<resource::host_platform>>) {
+      // For memset, we expect args to be {value, size}
+#ifdef UMPIRE_ENABLE_BOUNDS_CHECKS
+      std::size_t length = get_arg<1>(args...);
+      check_memset_bounds(src, src_record, length);
+#endif
+    }
+
+    return detail::dispatch<Op>(p, src, std::forward<Args>(args)..., ctx);
+  }
+
+  // Dual-pointer operations (synchronous)
+  template <typename T, typename... Args>
+  inline static auto exec(T* src, T* dst, Args... args)
+  {
+    auto& rm = ResourceManager::getInstance();
+    auto& allocation_map = rm.m_allocations;
+    auto src_record = allocation_map.find(detail::decay_ptr(src));
+    auto dst_record = allocation_map.find(dst);
+
+    auto p1 = src_record->strategy->getPlatform();
+    auto p2 = dst_record->strategy->getPlatform();
+
+    // Operation-specific handling for copy
+    if constexpr (std::is_same_v<Op<resource::host_platform, resource::host_platform>,
+                                 copy<resource::host_platform, resource::host_platform>>) {
+      // For copy, we expect args to be {size}
+#ifdef UMPIRE_ENABLE_BOUNDS_CHECKS
+      std::size_t size = get_arg<0>(args...);
+      check_copy_bounds(src, dst, src_record, dst_record, size);
+#endif
+    }
+
+    // Dispatch based on source and destination platforms
+    return detail::dispatch<Op>(p1, p2, src, dst, args...);
+  }
+
+  // Dual-pointer operations (asynchronous)
+  template <typename T, typename... Args>
+  inline static auto exec(T* src, T* dst, Args... args, camp::resources::Resource& ctx)
+  {
+    auto& rm = ResourceManager::getInstance();
+    auto& allocation_map = rm.m_allocations;
+    auto src_record = allocation_map.find(detail::decay_ptr(src));
+    auto dst_record = allocation_map.find(dst);
+
+    auto p1 = src_record->strategy->getPlatform();
+    auto p2 = dst_record->strategy->getPlatform();
+
+    // Operation-specific handling for copy
+    if constexpr (std::is_same_v<Op<resource::host_platform, resource::host_platform>,
+                                 copy<resource::host_platform, resource::host_platform>>) {
+#ifdef UMPIRE_ENABLE_BOUNDS_CHECKS
+      std::size_t size = get_arg<0>(args...);
+      check_copy_bounds(src, dst, src_record, dst_record, size);
+#endif
+    }
+
+    return detail::dispatch<Op>(p1, p2, src, dst, std::forward<Args>(args)..., ctx);
+  }
+};
+
+} // namespace op
+
+//------------------------------------------------------------------------------
+// Public API Semantics for Copy and Reallocate Operations
+//------------------------------------------------------------------------------
+//
+// The following functions use different semantics for size parameters based on
+// pointer type, matching standard C++ conventions:
+//
+// COPY OPERATIONS:
+//   - umpire::copy(T* src, T* dst, std::size_t len) for non-void T:
+//     len is a COUNT OF ELEMENTS (will be multiplied by sizeof(T) internally)
+//
+//   - umpire::copy(void* src, void* dst, std::size_t len):
+//     len is BYTES (used directly, no sizeof multiplication)
+//
+// REALLOCATE OPERATIONS:
+//   - umpire::reallocate(T** ptr, std::size_t new_size) for non-void T:
+//     new_size is a COUNT OF ELEMENTS (will be multiplied by sizeof(T) internally)
+//
+//   - umpire::reallocate(void** ptr, std::size_t new_size):
+//     new_size is BYTES (used directly, no sizeof multiplication)
+//
+// RATIONALE:
+//   This matches how detail::get_size<T>(count) works:
+//   - For void: returns count as-is (bytes)
+//   - For typed pointers: returns count * sizeof(T) (elements to bytes)
+//
+// This keeps the API consistent with C++ idioms where typed operations work
+// with element counts and void* operations work with byte counts.
+//------------------------------------------------------------------------------
+
+// Global operation implementations that use the op_caller
+template <typename T>
+void copy(T* src, T* dst, std::size_t len)
+{
+  op::op_caller<op::copy>::exec(src, dst, len);
+}
+
+template <typename T>
+auto copy(T* src, T* dst, std::size_t len, camp::resources::Resource& ctx)
+{
+  return op::op_caller<op::copy>::exec(src, dst, len, ctx);
+}
+
+template <typename T, typename V>
+void memset(T* src, V v, std::size_t len)
+{
+  op::op_caller<op::memset>::exec(src, v, len);
+}
+
+template <typename T>
+camp::resources::EventProxy<camp::resources::Resource> memset(T* src, int v, std::size_t len,
+                                                              camp::resources::Resource& ctx)
+{
+  return op::op_caller<op::memset>::exec(src, v, len, ctx);
+}
+
+template <typename T>
+inline T* reallocate(T** src, std::size_t size)
+{
+  // Handle nullptr specially - op_caller can't look up null in allocation map
+  if (*src == nullptr) {
+    return op::reallocate<resource::host_platform>::exec(src, size);
+  }
+  return op::op_caller<op::reallocate>::exec(src, size);
+}
+
+// Async reallocate implementation
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> reallocate(T** src, std::size_t size,
+                                                                         camp::resources::Resource& ctx)
+{
+  // Handle nullptr specially - op_caller can't look up null in allocation map
+  if (*src == nullptr) {
+    return op::reallocate<resource::host_platform>::exec(src, size, ctx);
+  }
+  return op::op_caller<op::reallocate>::exec(src, size, ctx);
+}
+
+// Synchronous prefetch implementation
+template <typename T>
+void prefetch(T* ptr, int device, std::size_t size)
+{
+  op::op_caller<op::prefetch>::exec(ptr, device, size);
+}
+
+// Asynchronous prefetch implementation
+template <typename T>
+camp::resources::EventProxy<camp::resources::Resource> prefetch(T* ptr, int device, std::size_t size,
+                                                                camp::resources::Resource& ctx)
+{
+  return op::op_caller<op::prefetch>::exec(ptr, device, size, ctx);
+}
+
+// Individual convenience functions with auto-dispatch
+template <typename T>
+void set_accessed_by(T* ptr, int device, std::size_t size)
+{
+  op::op_caller<op::set_accessed_by>::exec(ptr, device, size);
+}
+
+template <typename T>
+void set_preferred_location(T* ptr, int device, std::size_t size)
+{
+  op::op_caller<op::set_preferred_location>::exec(ptr, device, size);
+}
+
+template <typename T>
+void set_read_mostly(T* ptr, int device, std::size_t size)
+{
+  op::op_caller<op::set_read_mostly>::exec(ptr, device, size);
+}
+
+template <typename T>
+void unset_accessed_by(T* ptr, int device, std::size_t size)
+{
+  op::op_caller<op::unset_accessed_by>::exec(ptr, device, size);
+}
+
+template <typename T>
+void unset_preferred_location(T* ptr, int device, std::size_t size)
+{
+  op::op_caller<op::unset_preferred_location>::exec(ptr, device, size);
+}
+
+template <typename T>
+void unset_read_mostly(T* ptr, int device, std::size_t size)
+{
+  op::op_caller<op::unset_read_mostly>::exec(ptr, device, size);
+}
+
+#if (defined(UMPIRE_ENABLE_HIP) && HIP_VERSION_MAJOR >= 5)
+template <typename T>
+void set_coarse_grain(T* ptr, int device, std::size_t size)
+{
+  op::op_caller<op::set_coarse_grain>::exec(ptr, device, size);
+}
+
+template <typename T>
+void unset_coarse_grain(T* ptr, int device, std::size_t size)
+{
+  op::op_caller<op::unset_coarse_grain>::exec(ptr, device, size);
+}
+#endif
+
+//------------------------------------------------------------------------------
+// Reallocate implementations (moved from operations.hpp to avoid circular dependency)
+//------------------------------------------------------------------------------
+
+template <typename Src>
+template <typename T>
+T* op::reallocate<Src>::exec(T** ptr, std::size_t new_size)
+{
+  auto current_ptr = *ptr;
+  if (!current_ptr) {
+    // If current pointer is null, just allocate
+    auto& rm = ResourceManager::getInstance();
+    Allocator allocator = rm.getDefaultAllocator();
+    T* new_ptr = static_cast<T*>(allocator.allocate(new_size * sizeof(T)));
+    *ptr = new_ptr;
+    return new_ptr;
+  }
+
+  auto& rm = ResourceManager::getInstance();
+  auto& allocation_map = rm.m_allocations;
+
+  // Find the allocator that owns current_ptr
+  Allocator allocator = rm.getAllocator(current_ptr);
+
+  // Check for offset pointer
+  auto alloc_record = allocation_map.find(current_ptr);
+  if (current_ptr != alloc_record->ptr) {
+    UMPIRE_ERROR(runtime_error, fmt::format("Cannot reallocate an offset ptr (ptr={}, base={})",
+                                            reinterpret_cast<void*>(current_ptr), alloc_record->ptr));
+  }
+
+  // Get the current allocation size
+  std::size_t old_size = rm.getSize(current_ptr);
+
+  // Convert sizes from elements to bytes
+  std::size_t old_bytes = old_size;
+  std::size_t new_bytes = new_size * sizeof(T);
+
+  // Special case for 0-byte size
+  if (new_bytes == 0) {
+    allocator.deallocate(current_ptr);
+    T* new_ptr = static_cast<T*>(allocator.allocate(0));
+    *ptr = new_ptr;
+    return new_ptr;
+  }
+
+  // Allocate new memory
+  T* new_ptr = static_cast<T*>(allocator.allocate(new_bytes));
+
+  // Calculate copy size in bytes (minimum of old and new size)
+  std::size_t copy_bytes = (old_bytes > new_bytes) ? new_bytes : old_bytes;
+
+  // Copy data using void* to pass bytes directly (avoids sizeof(T) multiplication in copy)
+  // Note: We cast to void* so that detail::get_size<void>(len) returns len as-is (bytes)
+  umpire::copy(static_cast<void*>(current_ptr), static_cast<void*>(new_ptr), copy_bytes);
+
+  // Deallocate old memory
+  allocator.deallocate(current_ptr);
+
+  // Update the pointer
+  *ptr = new_ptr;
+
+  return new_ptr;
+}
+
+template <typename Src>
+template <typename T>
+camp::resources::EventProxy<camp::resources::Resource> op::reallocate<Src>::exec(T** ptr_ptr, std::size_t new_size,
+                                                                                 camp::resources::Resource& ctx)
+{
+  T* current_ptr = *ptr_ptr;
+
+  if (!current_ptr) {
+    // If current pointer is null, just allocate
+    auto& rm = ResourceManager::getInstance();
+    Allocator allocator = rm.getDefaultAllocator();
+    // Since there's no data to copy, we can just return a completed event
+    T* new_ptr = static_cast<T*>(allocator.allocate(new_size * sizeof(T)));
+    *ptr_ptr = new_ptr;
+    return camp::resources::EventProxy<camp::resources::Resource>{ctx};
+  }
+
+  auto& rm = ResourceManager::getInstance();
+  auto& allocation_map = rm.m_allocations;
+
+  // Find the allocator that owns current_ptr
+  Allocator allocator = rm.getAllocator(current_ptr);
+
+  // Check for offset pointer
+  auto alloc_record = allocation_map.find(current_ptr);
+  if (current_ptr != alloc_record->ptr) {
+    UMPIRE_ERROR(runtime_error, fmt::format("Cannot reallocate an offset ptr (ptr={}, base={})",
+                                            reinterpret_cast<void*>(current_ptr), alloc_record->ptr));
+  }
+
+  // Get the current allocation size
+  std::size_t old_size = rm.getSize(current_ptr);
+
+  // Convert sizes from elements to bytes
+  std::size_t old_bytes = old_size;
+  std::size_t new_bytes = new_size * sizeof(T);
+
+  // Special case for 0-byte size
+  if (new_bytes == 0) {
+    allocator.deallocate(current_ptr);
+    T* new_ptr = static_cast<T*>(allocator.allocate(0));
+    *ptr_ptr = new_ptr;
+    return camp::resources::EventProxy<camp::resources::Resource>{ctx};
+  }
+
+  // Allocate new memory
+  T* new_ptr = static_cast<T*>(allocator.allocate(new_bytes));
+
+  // Calculate copy size in bytes (minimum of old and new size)
+  std::size_t copy_bytes = (old_bytes > new_bytes) ? new_bytes : old_bytes;
+
+  // Copy data using void* to pass bytes directly (avoids sizeof(T) multiplication in copy)
+  // Note: We cast to void* so that detail::get_size<void>(len) returns len as-is (bytes)
+  auto event = umpire::copy(static_cast<void*>(current_ptr), static_cast<void*>(new_ptr), copy_bytes, ctx);
+
+  // IMPORTANT: In a fully async implementation, we would need to chain the deallocation
+  // to happen after the copy completes. However, since we don't have that mechanism yet,
+  // and ResourceManager's reallocate operation doesn't wait on the event, we need to
+  // deallocate here as we did in the synchronous case.
+  //
+  // This has the potential to cause race conditions if the memory is deallocated before
+  // the copy completes, but for most allocators, the memory won't be immediately reused.
+  // A better solution would be to have the ResourceManager wait on the event before returning
+  // or implement a chained operation system.
+  allocator.deallocate(current_ptr);
+
+  // Update the pointer
+  *ptr_ptr = new_ptr;
+
+  return event;
+}
+
+template <typename Src>
+void* op::reallocate<Src>::exec(void** ptr_ptr, std::size_t new_size)
+{
+  void* current_ptr = *ptr_ptr;
+
+  if (!current_ptr) {
+    // If current pointer is null, just allocate
+    auto& rm = ResourceManager::getInstance();
+    Allocator allocator = rm.getDefaultAllocator();
+    void* new_ptr = allocator.allocate(new_size); // No sizeof multiplication for void*
+    *ptr_ptr = new_ptr;
+    return new_ptr;
+  }
+
+  auto& rm = ResourceManager::getInstance();
+  auto& allocation_map = rm.m_allocations;
+
+  // Find the allocator that owns current_ptr
+  Allocator allocator = rm.getAllocator(current_ptr);
+
+  // Check for offset pointer
+  auto alloc_record = allocation_map.find(current_ptr);
+  if (current_ptr != alloc_record->ptr) {
+    UMPIRE_ERROR(runtime_error,
+                 fmt::format("Cannot reallocate an offset ptr (ptr={}, base={})", current_ptr, alloc_record->ptr));
+  }
+
+  // Get the current allocation size
+  std::size_t old_size = rm.getSize(current_ptr);
+
+  // Special case for 0-byte size
+  if (new_size == 0) {
+    allocator.deallocate(current_ptr);
+    void* new_ptr = allocator.allocate(0);
+    *ptr_ptr = new_ptr;
+    return new_ptr;
+  }
+
+  // Allocate new memory
+  void* new_ptr = allocator.allocate(new_size);
+
+  // Calculate copy size in bytes (minimum of old and new size)
+  std::size_t copy_bytes = (old_size > new_size) ? new_size : old_size;
+
+  // Copy data from old to new location (void* naturally uses bytes)
+  umpire::copy(static_cast<void*>(current_ptr), static_cast<void*>(new_ptr), copy_bytes);
+
+  // Deallocate old memory
+  allocator.deallocate(current_ptr);
+
+  // Update the pointer
+  *ptr_ptr = new_ptr;
+
+  return new_ptr;
+}
+
+template <typename Src>
+camp::resources::EventProxy<camp::resources::Resource> op::reallocate<Src>::exec(void** ptr_ptr, std::size_t new_size,
+                                                                                 camp::resources::Resource& ctx)
+{
+  void* current_ptr = *ptr_ptr;
+
+  if (!current_ptr) {
+    // If current pointer is null, just allocate
+    auto& rm = ResourceManager::getInstance();
+    Allocator allocator = rm.getDefaultAllocator();
+    // Since there's no data to copy, we can just return a completed event
+    void* new_ptr = allocator.allocate(new_size); // No sizeof multiplication for void*
+    *ptr_ptr = new_ptr;
+    return camp::resources::EventProxy<camp::resources::Resource>{ctx};
+  }
+
+  auto& rm = ResourceManager::getInstance();
+  auto& allocation_map = rm.m_allocations;
+
+  // Find the allocator that owns current_ptr
+  Allocator allocator = rm.getAllocator(current_ptr);
+
+  // Check for offset pointer
+  auto alloc_record = allocation_map.find(current_ptr);
+  if (current_ptr != alloc_record->ptr) {
+    UMPIRE_ERROR(runtime_error,
+                 fmt::format("Cannot reallocate an offset ptr (ptr={}, base={})", current_ptr, alloc_record->ptr));
+  }
+
+  // Get the current allocation size
+  std::size_t old_size = rm.getSize(current_ptr);
+
+  // Special case for 0-byte size
+  if (new_size == 0) {
+    allocator.deallocate(current_ptr);
+    void* new_ptr = allocator.allocate(0);
+    *ptr_ptr = new_ptr;
+    return camp::resources::EventProxy<camp::resources::Resource>{ctx};
+  }
+
+  // Allocate new memory
+  void* new_ptr = allocator.allocate(new_size);
+
+  // Calculate copy size in bytes (minimum of old and new size)
+  std::size_t copy_bytes = (old_size > new_size) ? new_size : old_size;
+
+  // Copy data from old to new location asynchronously (void* naturally uses bytes)
+  auto event = umpire::copy(static_cast<void*>(current_ptr), static_cast<void*>(new_ptr), copy_bytes, ctx);
+
+  // Deallocate old memory
+  allocator.deallocate(current_ptr);
+
+  // Update the pointer
+  *ptr_ptr = new_ptr;
+
+  return event;
+}
+
+template <typename SrcPlatform, typename DstPlatform, typename T>
+void copy(T* src, T* dst, std::size_t len)
+{
+  op::copy<SrcPlatform, DstPlatform>::exec(src, dst, len);
+}
+
+template <typename SrcPlatform, typename DstPlatform, typename T>
+auto copy(T* src, T* dst, std::size_t len, camp::resources::Resource& ctx)
+{
+  return op::copy<SrcPlatform, DstPlatform>::exec(src, dst, len, ctx);
+}
+
+// Direct template memset functions
+template <typename Platform, typename T>
+void memset(T* ptr, int value, std::size_t len)
+{
+  op::memset<Platform>::exec(ptr, value, len);
+}
+
+template <typename Platform, typename T>
+auto memset(T* ptr, int value, std::size_t len, camp::resources::Resource& ctx)
+{
+  return op::memset<Platform>::exec(ptr, value, len, ctx);
+}
+
+// Direct template prefetch functions
+template <typename Platform, typename T>
+void prefetch(T* ptr, int device, std::size_t len)
+{
+  op::prefetch<Platform>::exec(ptr, device, len);
+}
+
+template <typename Platform, typename T>
+auto prefetch(T* ptr, int device, std::size_t len, camp::resources::Resource& ctx)
+{
+  return op::prefetch<Platform>::exec(ptr, device, len, ctx);
+}
+
+template <typename Platform, typename T>
+std::enable_if_t<op::detail::supports_memory_advice<Platform>::value> set_accessed_by(T* ptr, int device,
+                                                                                      std::size_t len)
+{
+  op::set_accessed_by<Platform>::exec(ptr, device, len);
+}
+
+template <typename Platform, typename T>
+std::enable_if_t<op::detail::supports_memory_advice<Platform>::value> set_preferred_location(T* ptr, int device,
+                                                                                             std::size_t len)
+{
+  op::set_preferred_location<Platform>::exec(ptr, device, len);
+}
+
+template <typename Platform, typename T>
+std::enable_if_t<op::detail::supports_memory_advice<Platform>::value> set_read_mostly(T* ptr, int device,
+                                                                                      std::size_t len)
+{
+  op::set_read_mostly<Platform>::exec(ptr, device, len);
+}
+
+template <typename Platform, typename T>
+std::enable_if_t<op::detail::supports_memory_advice<Platform>::value> unset_accessed_by(T* ptr, int device,
+                                                                                        std::size_t len)
+{
+  op::unset_accessed_by<Platform>::exec(ptr, device, len);
+}
+
+template <typename Platform, typename T>
+std::enable_if_t<op::detail::supports_memory_advice<Platform>::value> unset_preferred_location(T* ptr, int device,
+                                                                                               std::size_t len)
+{
+  op::unset_preferred_location<Platform>::exec(ptr, device, len);
+}
+
+template <typename Platform, typename T>
+std::enable_if_t<op::detail::supports_memory_advice<Platform>::value> unset_read_mostly(T* ptr, int device,
+                                                                                        std::size_t len)
+{
+  op::unset_read_mostly<Platform>::exec(ptr, device, len);
+}
+
+#if (defined(UMPIRE_ENABLE_HIP) && HIP_VERSION_MAJOR >= 5) || defined(UMPIRE_ENABLE_CUDA)
+template <typename Platform, typename T>
+std::enable_if_t<op::detail::supports_memory_advice<Platform>::value> set_coarse_grain(T* ptr, int device,
+                                                                                       std::size_t len)
+{
+  op::set_coarse_grain<Platform>::exec(ptr, device, len);
+}
+
+template <typename Platform, typename T>
+std::enable_if_t<op::detail::supports_memory_advice<Platform>::value> unset_coarse_grain(T* ptr, int device,
+                                                                                         std::size_t len)
+{
+  op::unset_coarse_grain<Platform>::exec(ptr, device, len);
+}
+#endif
+
+} // namespace umpire
diff --git a/include/umpire/op/hip.hpp b/include/umpire/op/hip.hpp
new file mode 100644
index 000000000..02604136d
--- /dev/null
+++ b/include/umpire/op/hip.hpp
@@ -0,0 +1,487 @@
+#pragma once
+
+#include <type_traits>
+
+#include "umpire/op/detail/utils.hpp"
+#include "umpire/op/operations.hpp"
+#include "umpire/resource/platform.hpp"
+#include "umpire/util/Platform.hpp"
+#include "umpire/util/error.hpp"
+
+namespace umpire {
+namespace op {
+
+// HIP implementation helpers
+namespace detail {
+
+/**
+ * @brief Get the HIP memory copy direction kind
+ *
+ * @tparam SRC Source platform
+ * @tparam DST Destination platform
+ */
+template <typename SRC, typename DST>
+struct copy_kind;
+
+// Device to host specialization
+template <>
+struct copy_kind<resource::hip_platform, resource::host_platform> {
+  static constexpr hipMemcpyKind value = hipMemcpyDeviceToHost;
+};
+
+// Host to device specialization
+template <>
+struct copy_kind<resource::host_platform, resource::hip_platform> {
+  static constexpr hipMemcpyKind value = hipMemcpyHostToDevice;
+};
+
+// Device to device specialization
+template <>
+struct copy_kind<resource::hip_platform, resource::hip_platform> {
+  static constexpr hipMemcpyKind value = hipMemcpyDeviceToDevice;
+};
+
+/**
+ * @brief Check if a HIP device supports managed memory features
+ *
+ * @param device Device ID to check
+ * @return true if the device supports managed memory
+ * @return false if the device does not support managed memory
+ */
+inline bool supports_managed_memory(int device)
+{
+  hipDeviceProp_t properties;
+  hipError_t error = ::hipGetDeviceProperties(&properties, device);
+
+  if (error != hipSuccess) {
+    UMPIRE_ERROR(runtime_error, fmt::format("hipGetDeviceProperties for device {} failed with error: {}", device,
+                                            hipGetErrorString(error)));
+  }
+
+  return (properties.managedMemory == 1 && properties.concurrentManagedAccess == 1);
+}
+
+/**
+ * @brief Get HIP stream from a resource
+ *
+ * @param resource The resource to get the stream from
+ * @return hipStream_t The HIP stream
+ */
+inline hipStream_t get_stream(camp::resources::Resource& resource)
+{
+  auto hip_resource = resource.try_get<camp::resources::Hip>();
+  if (!hip_resource) {
+    UMPIRE_ERROR(resource_error, fmt::format("Expected resources::Hip, got resources::{}",
+                                             platform_to_string(resource.get_platform())));
+  }
+  return hip_resource->get_stream();
+}
+
+/**
+ * @brief Apply memory advice to a HIP managed memory allocation
+ *
+ * @tparam T Type of memory
+ * @param ptr Pointer to memory
+ * @param count Number of elements
+ * @param device Device ID for advice
+ * @param advice Memory advice to apply
+ */
+template <typename T>
+inline void advise(T* ptr, std::size_t count, int device, hipMemoryAdvise advice)
+{
+  // Skip if device doesn't support managed memory
+  if (!supports_managed_memory(device))
+    return;
+
+  std::size_t size = detail::get_size<T>(count);
+  hipError_t error = ::hipMemAdvise(ptr, size, advice, device);
+
+  if (error != hipSuccess) {
+    UMPIRE_ERROR(runtime_error, fmt::format("hipMemAdvise(ptr={}, size={}, advice={}, device={}) failed with error: {}",
+                                            reinterpret_cast<void*>(ptr), size, static_cast<int>(advice), device,
+                                            hipGetErrorString(error)));
+  }
+}
+
+/**
+ * @brief Synchronous memory copy implementation
+ *
+ * @tparam T Type of memory
+ * @param src Source pointer
+ * @param dst Destination pointer
+ * @param count Number of elements
+ * @param kind Copy direction kind
+ */
+template <typename T>
+inline void copy(T* src, T* dst, std::size_t count, hipMemcpyKind kind)
+{
+  std::size_t size = detail::get_size<T>(count);
+
+  hipError_t error = ::hipMemcpy(dst, src, size, kind);
+  if (error != hipSuccess) {
+    UMPIRE_ERROR(runtime_error, fmt::format("hipMemcpy(dst={}, src={}, size={}, kind={}) failed with error: {}",
+                                            reinterpret_cast<void*>(dst), reinterpret_cast<void*>(src), size,
+                                            static_cast<int>(kind), hipGetErrorString(error)));
+  }
+}
+
+/**
+ * @brief Asynchronous memory copy implementation
+ *
+ * @tparam T Type of memory
+ * @param src Source pointer
+ * @param dst Destination pointer
+ * @param count Number of elements
+ * @param resource Resource for asynchronous operation
+ * @param kind Copy direction kind
+ * @return Event representing the asynchronous operation
+ */
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> copy_async(T* src, T* dst, std::size_t count,
+                                                                         camp::resources::Resource& resource,
+                                                                         hipMemcpyKind kind)
+{
+  auto stream = get_stream(resource);
+  std::size_t size = detail::get_size<T>(count);
+
+  hipError_t error = ::hipMemcpyAsync(dst, src, size, kind, stream);
+  if (error != hipSuccess) {
+    UMPIRE_ERROR(runtime_error,
+                 fmt::format("hipMemcpyAsync(dst={}, src={}, size={}, kind={}, stream={}) failed with error: {}", dst,
+                             src, size, static_cast<int>(kind), static_cast<void*>(stream), hipGetErrorString(error)));
+  }
+
+  return camp::resources::EventProxy<camp::resources::Resource>{resource};
+}
+
+/**
+ * @brief Synchronous memory set implementation
+ *
+ * @tparam T Type of memory
+ * @param ptr Pointer to memory
+ * @param value Value to set
+ * @param count Number of elements
+ */
+template <typename T>
+inline void memset(T* ptr, int value, std::size_t count)
+{
+  std::size_t size = detail::get_size<T>(count);
+
+  hipError_t error = ::hipMemset(ptr, value, size);
+  if (error != hipSuccess) {
+    UMPIRE_ERROR(runtime_error, fmt::format("hipMemset(ptr={}, value={}, size={}) failed with error: {}",
+                                            reinterpret_cast<void*>(ptr), value, size, hipGetErrorString(error)));
+  }
+}
+
+/**
+ * @brief Asynchronous memory set implementation
+ *
+ * @tparam T Type of memory
+ * @param ptr Pointer to memory
+ * @param value Value to set
+ * @param count Number of elements
+ * @param resource Resource for asynchronous operation
+ * @return Event representing the asynchronous operation
+ */
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> memset_async(T* ptr, int value, std::size_t count,
+                                                                           camp::resources::Resource& resource)
+{
+  auto stream = get_stream(resource);
+  std::size_t size = detail::get_size<T>(count);
+
+  hipError_t error = ::hipMemsetAsync(ptr, value, size, stream);
+  if (error != hipSuccess) {
+    UMPIRE_ERROR(runtime_error,
+                 fmt::format("hipMemsetAsync(ptr={}, value={}, size={}, stream={}) failed with error: {}", ptr, value,
+                             size, static_cast<void*>(stream), hipGetErrorString(error)));
+  }
+
+  return camp::resources::EventProxy<camp::resources::Resource>{resource};
+}
+
+/**
+ * @brief Synchronous memory prefetch implementation
+ *
+ * @tparam T Type of memory
+ * @param ptr Pointer to memory
+ * @param device Device to prefetch to
+ * @param count Number of elements
+ */
+template <typename T>
+inline void prefetch(T* ptr, int device, std::size_t count)
+{
+  // Use current device for properties if device is CPU
+  int current_device;
+  hipGetDevice(&current_device);
+  int gpu = (device != hipCpuDeviceId) ? device : current_device;
+
+  if (supports_managed_memory(gpu)) {
+    std::size_t size = detail::get_size<T>(count);
+    hipError_t error = ::hipMemPrefetchAsync(ptr, size, device, nullptr);
+
+    if (error != hipSuccess) {
+      UMPIRE_ERROR(runtime_error, fmt::format("hipMemPrefetchAsync(ptr={}, size={}, device={}) failed with error: {}",
+                                              reinterpret_cast<void*>(ptr), size, device, hipGetErrorString(error)));
+    }
+  }
+}
+
+/**
+ * @brief Asynchronous memory prefetch implementation
+ *
+ * @tparam T Type of memory
+ * @param ptr Pointer to memory
+ * @param device Device to prefetch to
+ * @param count Number of elements
+ * @param resource Resource for asynchronous operation
+ * @return Event representing the asynchronous operation
+ */
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> prefetch_async(T* ptr, int device, std::size_t count,
+                                                                             camp::resources::Resource& resource)
+{
+  auto stream = get_stream(resource);
+
+  // Use current device for properties if device is CPU
+  int current_device;
+  hipGetDevice(&current_device);
+  int gpu = (device != hipCpuDeviceId) ? device : current_device;
+
+  if (supports_managed_memory(gpu)) {
+    std::size_t size = detail::get_size<T>(count);
+    hipError_t error = ::hipMemPrefetchAsync(ptr, size, device, stream);
+
+    if (error != hipSuccess) {
+      UMPIRE_ERROR(runtime_error,
+                   fmt::format("hipMemPrefetchAsync(ptr={}, size={}, device={}, stream={}) failed with error: {}", ptr,
+                               size, device, static_cast<void*>(stream), hipGetErrorString(error)));
+    }
+  }
+
+  return camp::resources::EventProxy<camp::resources::Resource>{resource};
+}
+
+} // namespace detail
+
+//------------------------------------------------------------------------------
+// HIP Operation Template Specializations
+//------------------------------------------------------------------------------
+
+// HIP-to-HIP copy operation
+template <>
+struct copy<resource::hip_platform, resource::hip_platform> {
+  /**
+   * @brief HIP to HIP synchronous copy
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   */
+  template <typename T>
+  static void exec(T* src, T* dst, std::size_t len) noexcept
+  {
+    detail::copy(src, dst, len, detail::copy_kind<resource::hip_platform, resource::hip_platform>::value);
+  }
+
+  /**
+   * @brief HIP to HIP asynchronous copy
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   * @param resource Resource for asynchronous operation
+   * @return Event representing the asynchronous operation
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* src, T* dst, std::size_t len,
+                                                                     camp::resources::Resource& resource)
+  {
+    return detail::copy_async(src, dst, len, resource,
+                              detail::copy_kind<resource::hip_platform, resource::hip_platform>::value);
+  }
+};
+
+// HIP-to-Host copy operation
+template <>
+struct copy<resource::hip_platform, resource::host_platform> {
+  /**
+   * @brief HIP to Host synchronous copy
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   */
+  template <typename T>
+  static void exec(T* src, T* dst, std::size_t len) noexcept
+  {
+    detail::copy(src, dst, len, detail::copy_kind<resource::hip_platform, resource::host_platform>::value);
+  }
+
+  /**
+   * @brief HIP to Host asynchronous copy
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   * @param resource Resource for asynchronous operation
+   * @return Event representing the asynchronous operation
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* src, T* dst, std::size_t len,
+                                                                     camp::resources::Resource& resource)
+  {
+    return detail::copy_async(src, dst, len, resource,
+                              detail::copy_kind<resource::hip_platform, resource::host_platform>::value);
+  }
+};
+
+// Host-to-HIP copy operation
+template <>
+struct copy<resource::host_platform, resource::hip_platform> {
+  /**
+   * @brief Host to HIP synchronous copy
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   */
+  template <typename T>
+  static void exec(T* src, T* dst, std::size_t len) noexcept
+  {
+    detail::copy(src, dst, len, detail::copy_kind<resource::host_platform, resource::hip_platform>::value);
+  }
+
+  /**
+   * @brief Host to HIP asynchronous copy
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   * @param resource Resource for asynchronous operation
+   * @return Event representing the asynchronous operation
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* src, T* dst, std::size_t len,
+                                                                     camp::resources::Resource& resource)
+  {
+    return detail::copy_async(src, dst, len, resource,
+                              detail::copy_kind<resource::host_platform, resource::hip_platform>::value);
+  }
+};
+
+// HIP memset operation
+template <>
+struct memset<resource::hip_platform> {
+  /**
+   * @brief HIP synchronous memset
+   *
+   * @tparam T Type of memory being set
+   * @param ptr Pointer to memory
+   * @param val Value to set
+   * @param len Number of elements to set
+   */
+  template <typename T>
+  static void exec(T* ptr, int val, std::size_t len) noexcept
+  {
+    detail::memset(ptr, val, len);
+  }
+
+  /**
+   * @brief HIP asynchronous memset
+   *
+   * @tparam T Type of memory being set
+   * @param ptr Pointer to memory
+   * @param val Value to set
+   * @param len Number of elements to set
+   * @param resource Resource for asynchronous operation
+   * @return Event representing the asynchronous operation
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* ptr, int val, std::size_t len,
+                                                                     camp::resources::Resource& resource)
+  {
+    return detail::memset_async(ptr, val, len, resource);
+  }
+};
+
+// Note: HIP platform uses the generic reallocate implementation from operations.hpp
+// since direct HIP reallocation isn't supported and memory pools require a safe allocate-copy-free pattern
+
+// HIP prefetch operation
+template <>
+struct prefetch<resource::hip_platform> {
+  /**
+   * @brief HIP synchronous prefetch
+   *
+   * @tparam T Type of memory being prefetched
+   * @param ptr Pointer to memory
+   * @param device Device to prefetch to
+   * @param len Number of elements to prefetch
+   */
+  template <typename T>
+  static void exec(T* ptr, int device, std::size_t len) noexcept
+  {
+    detail::prefetch(ptr, device, len);
+  }
+
+  /**
+   * @brief HIP asynchronous prefetch
+   *
+   * @tparam T Type of memory being prefetched
+   * @param ptr Pointer to memory
+   * @param device Device to prefetch to
+   * @param len Number of elements to prefetch
+   * @param resource Resource for asynchronous operation
+   * @return Event representing the asynchronous operation
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* ptr, int device, std::size_t len,
+                                                                     camp::resources::Resource& resource)
+  {
+    return detail::prefetch_async(ptr, device, len, resource);
+  }
+};
+
+// Memory advice operations - using macro to reduce duplication
+#define DEFINE_HIP_ADVICE_OP(op_name, advice_flag)                        \
+  template <>                                                             \
+  struct op_name<resource::hip_platform> {                                \
+    /**                                                                   \
+     * @brief Apply memory advice operation                               \
+     *                                                                    \
+     * @tparam T Type of memory                                           \
+     * @param ptr Pointer to memory                                       \
+     * @param device Device to apply advice for                           \
+     * @param len Number of elements                                      \
+     */                                                                   \
+    template <typename T>                                                 \
+    static inline void exec(T* ptr, int device, std::size_t len) noexcept \
+    {                                                                     \
+      detail::advise(ptr, len, device, advice_flag);                      \
+    }                                                                     \
+  };
+
+DEFINE_HIP_ADVICE_OP(set_accessed_by, hipMemAdviseSetAccessedBy)
+DEFINE_HIP_ADVICE_OP(set_preferred_location, hipMemAdviseSetPreferredLocation)
+DEFINE_HIP_ADVICE_OP(set_read_mostly, hipMemAdviseSetReadMostly)
+DEFINE_HIP_ADVICE_OP(unset_accessed_by, hipMemAdviseUnsetAccessedBy)
+DEFINE_HIP_ADVICE_OP(unset_preferred_location, hipMemAdviseUnsetPreferredLocation)
+DEFINE_HIP_ADVICE_OP(unset_read_mostly, hipMemAdviseUnsetReadMostly)
+
+#if HIP_VERSION_MAJOR >= 5
+DEFINE_HIP_ADVICE_OP(set_coarse_grain, hipMemAdviseSetCoarseGrain)
+DEFINE_HIP_ADVICE_OP(unset_coarse_grain, hipMemAdviseUnsetCoarseGrain)
+#endif
+
+#undef DEFINE_HIP_ADVICE_OP
+
+} // namespace op
+} // namespace umpire
diff --git a/include/umpire/op/host.hpp b/include/umpire/op/host.hpp
new file mode 100644
index 000000000..c443a6ee8
--- /dev/null
+++ b/include/umpire/op/host.hpp
@@ -0,0 +1,110 @@
+#pragma once
+
+#include <cstring>
+
+#include "camp/resource.hpp"
+#include "umpire/op/detail/utils.hpp"
+#include "umpire/op/operations.hpp"
+#include "umpire/resource/platform.hpp"
+#include "umpire/util/error.hpp"
+
+namespace umpire {
+namespace op {
+
+template <>
+struct copy<resource::host_platform, resource::host_platform> {
+  /**
+   * @brief Host-to-host memory copy implementation
+   *
+   * @tparam T Type of data being copied
+   * @param src Source pointer
+   * @param dst Destination pointer
+   * @param len Number of elements to copy
+   */
+  template <typename T>
+  static void exec(T* src, T* dst, std::size_t len) noexcept
+  {
+    std::memcpy(dst, src, detail::get_size<T>(len));
+  }
+
+  /**
+   * @brief Asynchronous host-to-host memory copy implementation
+   *
+   * Since host operations are synchronous, this simply performs a sync copy
+   * and returns a completed event.
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* src, T* dst, std::size_t len,
+                                                                     camp::resources::Resource& resource) noexcept
+  {
+    exec(src, dst, len);
+    return detail::make_completed_event(resource);
+  }
+};
+
+template <>
+struct memset<resource::host_platform> {
+  /**
+   * @brief Fill host memory with a value
+   *
+   * @tparam T Type of data being set
+   * @param ptr Pointer to memory
+   * @param val Value to set (treated as byte)
+   * @param len Number of elements to set
+   */
+  template <typename T>
+  static void exec(T* ptr, int val, std::size_t len) noexcept
+  {
+    std::memset(ptr, val, detail::get_size<T>(len));
+  }
+
+  /**
+   * @brief Asynchronous memset implementation for host memory
+   *
+   * Since host operations are synchronous, this simply performs a sync memset
+   * and returns a completed event.
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* ptr, int val, std::size_t len,
+                                                                     camp::resources::Resource& resource) noexcept
+  {
+    exec(ptr, val, len);
+    return detail::make_completed_event(resource);
+  }
+};
+
+template <>
+struct prefetch<resource::host_platform> {
+  /**
+   * @brief Host memory prefetch (no-op)
+   *
+   * Prefetch is a no-op for host memory since the CPU has direct access.
+   * This implementation exists for API compatibility and to avoid throwing errors.
+   *
+   * @tparam T Type of data being prefetched
+   * @param ptr Pointer to memory (unused)
+   * @param device Device ID (unused)
+   * @param len Number of bytes to prefetch (unused)
+   */
+  template <typename T>
+  static void exec(T* /*ptr*/, int /*device*/, std::size_t /*len*/) noexcept
+  {
+    // No-op: CPU already has direct access to host memory
+  }
+
+  /**
+   * @brief Asynchronous host memory prefetch (no-op)
+   *
+   * Returns a completed event immediately since this is a no-op.
+   */
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T* /*ptr*/, int /*device*/, std::size_t /*len*/,
+                                                                     camp::resources::Resource& resource) noexcept
+  {
+    // No-op: CPU already has direct access to host memory
+    return detail::make_completed_event(resource);
+  }
+};
+
+} // namespace op
+} // namespace umpire
diff --git a/include/umpire/op/openmp_target.hpp b/include/umpire/op/openmp_target.hpp
new file mode 100644
index 000000000..9beed257d
--- /dev/null
+++ b/include/umpire/op/openmp_target.hpp
@@ -0,0 +1,186 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
+// project contributors. See the COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (MIT)
+//////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "umpire/config.hpp"
+
+#if defined(UMPIRE_ENABLE_OPENMP_TARGET)
+
+#include <omp.h>
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <sstream>
+
+#include "umpire/util/Platform.hpp"
+#include "umpire/util/error.hpp"
+#include "umpire/resource/platform.hpp"
+#include "camp/resource.hpp"
+#include "camp/resource/event.hpp"
+
+namespace umpire {
+namespace op {
+
+// Platform-specific type 
+struct openmp_target_platform {};
+
+// OpenMP Target implementation helpers
+namespace {
+// Size-aware calculation with type awareness
+template<typename T>
+inline std::size_t calculate_size(T* ptr, std::size_t count) {
+  return std::is_same<T, void>::value ? count : count * sizeof(T);
+}
+
+// Helper function for device-to-device copy operations
+template <typename T>
+inline void copy_impl(T* src_ptr, T* dst_ptr, std::size_t count) {
+  std::size_t size = calculate_size(src_ptr, count);
+  
+  #pragma omp target data use_device_ptr(src_ptr, dst_ptr)
+  {
+    std::memcpy(dst_ptr, src_ptr, size);
+  }
+}
+
+// Helper function for host-to-device copy operations
+template <typename T>
+inline void host_to_device_copy_impl(T* src_ptr, T* dst_ptr, std::size_t count) {
+  std::size_t size = calculate_size(src_ptr, count);
+  
+  #pragma omp target data use_device_ptr(dst_ptr)
+  {
+    std::memcpy(dst_ptr, src_ptr, size);
+  }
+}
+
+// Helper function for device-to-host copy operations
+template <typename T>
+inline void device_to_host_copy_impl(T* src_ptr, T* dst_ptr, std::size_t count) {
+  std::size_t size = calculate_size(src_ptr, count);
+  
+  #pragma omp target data use_device_ptr(src_ptr)
+  {
+    std::memcpy(dst_ptr, src_ptr, size);
+  }
+}
+
+// Helper function for copy operations that returns an Event
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> copy_async_impl(
+    T* src_ptr, T* dst_ptr, std::size_t count, camp::resources::Resource& res) {
+  // Just call synchronous version and return a completed event
+  copy_impl(src_ptr, dst_ptr, count);
+  return camp::resources::EventProxy<camp::resources::Resource>{res};
+}
+
+// Helper function for host_to_device async copy
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> host_to_device_async_impl(
+    T* src_ptr, T* dst_ptr, std::size_t count, camp::resources::Resource& res) {
+  // Just call synchronous version and return a completed event
+  host_to_device_copy_impl(src_ptr, dst_ptr, count);
+  return camp::resources::EventProxy<camp::resources::Resource>{res};
+}
+
+// Helper function for device_to_host async copy
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> device_to_host_async_impl(
+    T* src_ptr, T* dst_ptr, std::size_t count, camp::resources::Resource& res) {
+  // Just call synchronous version and return a completed event
+  device_to_host_copy_impl(src_ptr, dst_ptr, count);
+  return camp::resources::EventProxy<camp::resources::Resource>{res};
+}
+
+// Helper function for memset operations
+template <typename T>
+inline void memset_impl(T* ptr, int val, std::size_t count) {
+  std::size_t size = calculate_size(ptr, count);
+  
+  #pragma omp target data use_device_ptr(ptr)
+  {
+    std::memset(ptr, val, size);
+  }
+}
+
+// Helper function for memset operations that returns an Event
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> memset_async_impl(
+    T* ptr, int val, std::size_t count, camp::resources::Resource& res) {
+  // Just call synchronous version and return a completed event
+  memset_impl(ptr, val, count);
+  return camp::resources::EventProxy<camp::resources::Resource>{res};
+}
+} // namespace
+
+// Device-to-device copy specialization
+template<>
+struct copy<openmp_target_platform, openmp_target_platform> {
+  template <typename T>
+  static void exec(T* src_ptr, T* dst_ptr, std::size_t len) {
+    copy_impl(src_ptr, dst_ptr, len);
+  }
+  
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(
+      T* src_ptr, T* dst_ptr, std::size_t len, camp::resources::Resource& res) {
+    return copy_async_impl(src_ptr, dst_ptr, len, res);
+  }
+};
+
+// Host-to-device copy specialization
+template<>
+struct copy<resource::host_platform, openmp_target_platform> {
+  template <typename T>
+  static void exec(T* src_ptr, T* dst_ptr, std::size_t len) {
+    host_to_device_copy_impl(src_ptr, dst_ptr, len);
+  }
+  
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(
+      T* src_ptr, T* dst_ptr, std::size_t len, camp::resources::Resource& res) {
+    return host_to_device_async_impl(src_ptr, dst_ptr, len, res);
+  }
+};
+
+// Device-to-host copy specialization
+template<>
+struct copy<openmp_target_platform, resource::host_platform> {
+  template <typename T>
+  static void exec(T* src_ptr, T* dst_ptr, std::size_t len) {
+    device_to_host_copy_impl(src_ptr, dst_ptr, len);
+  }
+  
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(
+      T* src_ptr, T* dst_ptr, std::size_t len, camp::resources::Resource& res) {
+    return device_to_host_async_impl(src_ptr, dst_ptr, len, res);
+  }
+};
+
+// Memset specialization
+template<>
+struct memset<openmp_target_platform> {
+  template <typename T>
+  static void exec(T* ptr, int val, std::size_t len) {
+    memset_impl(ptr, val, len);
+  }
+  
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(
+      T* ptr, int val, std::size_t len, camp::resources::Resource& res) {
+    return memset_async_impl(ptr, val, len, res);
+  }
+};
+
+// Note: OpenMP Target platform uses the generic reallocate implementation from operations.hpp
+// since direct OpenMP Target reallocation isn't supported and memory pools require a safe allocate-copy-free pattern
+
+} // end of namespace op
+} // end of namespace umpire
+
+#endif // UMPIRE_ENABLE_OPENMP_TARGET
\ No newline at end of file
diff --git a/include/umpire/op/operations.hpp b/include/umpire/op/operations.hpp
new file mode 100644
index 000000000..e2a60ceb1
--- /dev/null
+++ b/include/umpire/op/operations.hpp
@@ -0,0 +1,80 @@
+#pragma once
+
+#include <cstdlib>
+#include <stdexcept>
+
+#include "camp/resource.hpp"
+#include "umpire/Allocator.hpp"
+#include "umpire/ResourceManager.hpp"
+#include "umpire/config.hpp"
+
+namespace umpire {
+namespace op {
+
+struct operation {
+  static constexpr int arity = -1;
+  static constexpr const char* name = "UNKNOWN";
+};
+
+template <typename Src, typename Dst>
+struct copy : public operation {
+  static constexpr int arity = 2;
+  static constexpr const char* name = "COPY";
+};
+
+template <typename Src>
+struct memset : public operation {
+  static constexpr int arity = 1;
+  static constexpr const char* name = "MEMSET";
+};
+
+template <typename Src>
+struct reallocate : public operation {
+  static constexpr int arity = 1;
+  static constexpr const char* name = "REALLOCATE";
+
+  template <typename T>
+  static T* exec(T** ptr, std::size_t new_size);
+
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(T** ptr_ptr, std::size_t new_size,
+                                                                     camp::resources::Resource& ctx);
+
+  static void* exec(void** ptr_ptr, std::size_t new_size);
+
+  static camp::resources::EventProxy<camp::resources::Resource> exec(void** ptr_ptr, std::size_t new_size,
+                                                                     camp::resources::Resource& ctx);
+};
+
+#define DEFINE_ADVICE_OP(op_name, name_str)                                                        \
+  template <typename Src>                                                                          \
+  struct op_name : public operation {                                                              \
+    static constexpr int arity = 1;                                                                \
+    static constexpr const char* name = #op_name;                                                  \
+                                                                                                   \
+    static void exec(void*, int, std::size_t)                                                      \
+    {                                                                                              \
+      throw std::runtime_error("Memory advice not supported for this platform");                   \
+    }                                                                                              \
+                                                                                                   \
+    static camp::resources::EventProxy<camp::resources::Resource> exec(void*, int, std::size_t,    \
+                                                                       camp::resources::Resource&) \
+    {                                                                                              \
+      throw std::runtime_error("Memory advice not supported for this platform");                   \
+    }                                                                                              \
+  };
+
+DEFINE_ADVICE_OP(set_accessed_by, "SET_ACCESSED_BY")
+DEFINE_ADVICE_OP(unset_accessed_by, "UNSET_ACCESSED_BY")
+DEFINE_ADVICE_OP(set_preferred_location, "SET_PREFERRED_LOCATION")
+DEFINE_ADVICE_OP(unset_preferred_location, "UNSET_PREFERRED_LOCATION")
+DEFINE_ADVICE_OP(set_read_mostly, "SET_READ_MOSTLY")
+DEFINE_ADVICE_OP(unset_read_mostly, "UNSET_READ_MOSTLY")
+DEFINE_ADVICE_OP(set_coarse_grain, "SET_COARSE_GRAIN")
+DEFINE_ADVICE_OP(unset_coarse_grain, "UNSET_COARSE_GRAIN")
+DEFINE_ADVICE_OP(prefetch, "PREFETCH")
+
+#undef DEFINE_ADVICE_OP
+
+} // namespace op
+} // namespace umpire
diff --git a/include/umpire/op/sycl.hpp b/include/umpire/op/sycl.hpp
new file mode 100644
index 000000000..4ea94990b
--- /dev/null
+++ b/include/umpire/op/sycl.hpp
@@ -0,0 +1,205 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
+// project contributors. See the COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (MIT)
+//////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "umpire/config.hpp"
+
+#if defined(UMPIRE_ENABLE_SYCL)
+
+#include "umpire/util/Platform.hpp"
+#include "umpire/util/error.hpp"
+#include "umpire/util/sycl_compat.hpp"
+#include "umpire/resource/platform.hpp"
+#include "camp/resource.hpp"
+#include "camp/resource/event.hpp"
+
+#include <iostream>
+#include <memory>
+#include <sstream>
+
+namespace umpire {
+namespace op {
+
+// Platform-specific type
+struct sycl_platform {};
+
+// SYCL implementation helpers
+namespace {
+// Size-aware calculation with type awareness
+template<typename T>
+inline std::size_t calculate_size(T* ptr, std::size_t count) {
+  return std::is_same<T, void>::value ? count : count * sizeof(T);
+}
+
+// Error handling for SYCL operations
+inline void sycl_error_check(sycl::event event, const char* message) {
+  try {
+    event.wait_and_throw();
+  } catch (const sycl::exception& e) {
+    UMPIRE_ERROR(runtime_error, message + std::string(": ") + std::string(e.what()));
+  }
+}
+
+// Synchronous copy implementation
+template <typename T>
+inline void copy_impl(T* src_ptr, T* dst_ptr, std::size_t count, sycl::usm::alloc alloc_type) {
+  std::size_t size = calculate_size(src_ptr, count);
+  
+  sycl::queue queue;
+  auto event = queue.memcpy(dst_ptr, src_ptr, size);
+  sycl_error_check(event, "SYCL memcpy failed");
+}
+
+// Asynchronous copy implementation
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> copy_async_impl(
+    T* src_ptr, T* dst_ptr, std::size_t count, camp::resources::Resource& res,
+    sycl::usm::alloc alloc_type) {
+  
+  std::size_t size = calculate_size(src_ptr, count);
+  
+  auto& sycl_res = dynamic_cast<camp::resources::Sycl&>(res);
+  sycl::queue& queue = sycl_res.get_queue();
+  
+  auto event = queue.memcpy(dst_ptr, src_ptr, size);
+  
+  return camp::resources::EventProxy<camp::resources::Resource>{res, event};
+}
+
+// Synchronous memset implementation
+template <typename T>
+inline void memset_impl(T* ptr, int val, std::size_t count) {
+  std::size_t size = calculate_size(ptr, count);
+  
+  sycl::queue queue;
+  auto event = queue.memset(ptr, val, size);
+  sycl_error_check(event, "SYCL memset failed");
+}
+
+// Asynchronous memset implementation
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> memset_async_impl(
+    T* ptr, int val, std::size_t count, camp::resources::Resource& res) {
+  
+  std::size_t size = calculate_size(ptr, count);
+  
+  auto& sycl_res = dynamic_cast<camp::resources::Sycl&>(res);
+  sycl::queue& queue = sycl_res.get_queue();
+  
+  auto event = queue.memset(ptr, val, size);
+  
+  return camp::resources::EventProxy<camp::resources::Resource>{res, event};
+}
+
+// Synchronous prefetch implementation
+template <typename T>
+inline void prefetch_impl(T* ptr, int device, std::size_t count) {
+  std::size_t size = calculate_size(ptr, count);
+  
+  sycl::queue queue;
+  auto event = queue.prefetch(ptr, size);
+  sycl_error_check(event, "SYCL prefetch failed");
+}
+
+// Asynchronous prefetch implementation
+template <typename T>
+inline camp::resources::EventProxy<camp::resources::Resource> prefetch_async_impl(
+    T* ptr, int device, std::size_t count, camp::resources::Resource& res) {
+  
+  std::size_t size = calculate_size(ptr, count);
+  
+  auto& sycl_res = dynamic_cast<camp::resources::Sycl&>(res);
+  sycl::queue& queue = sycl_res.get_queue();
+  
+  auto event = queue.prefetch(ptr, size);
+  
+  return camp::resources::EventProxy<camp::resources::Resource>{res, event};
+}
+} // namespace
+
+// Device-to-device copy specialization
+template<>
+struct copy<sycl_platform, sycl_platform> {
+  template <typename T>
+  static void exec(T* src_ptr, T* dst_ptr, std::size_t len) {
+    copy_impl(src_ptr, dst_ptr, len, sycl::usm::alloc::device);
+  }
+  
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(
+      T* src_ptr, T* dst_ptr, std::size_t len, camp::resources::Resource& res) {
+    return copy_async_impl(src_ptr, dst_ptr, len, res, sycl::usm::alloc::device);
+  }
+};
+
+// Host-to-device copy specialization
+template<>
+struct copy<resource::host_platform, sycl_platform> {
+  template <typename T>
+  static void exec(T* src_ptr, T* dst_ptr, std::size_t len) {
+    copy_impl(src_ptr, dst_ptr, len, sycl::usm::alloc::host);
+  }
+  
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(
+      T* src_ptr, T* dst_ptr, std::size_t len, camp::resources::Resource& res) {
+    return copy_async_impl(src_ptr, dst_ptr, len, res, sycl::usm::alloc::host);
+  }
+};
+
+// Device-to-host copy specialization
+template<>
+struct copy<sycl_platform, resource::host_platform> {
+  template <typename T>
+  static void exec(T* src_ptr, T* dst_ptr, std::size_t len) {
+    copy_impl(src_ptr, dst_ptr, len, sycl::usm::alloc::host);
+  }
+  
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(
+      T* src_ptr, T* dst_ptr, std::size_t len, camp::resources::Resource& res) {
+    return copy_async_impl(src_ptr, dst_ptr, len, res, sycl::usm::alloc::host);
+  }
+};
+
+// Memset specialization
+template<>
+struct memset<sycl_platform> {
+  template <typename T>
+  static void exec(T* ptr, int val, std::size_t len) {
+    memset_impl(ptr, val, len);
+  }
+  
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(
+      T* ptr, int val, std::size_t len, camp::resources::Resource& res) {
+    return memset_async_impl(ptr, val, len, res);
+  }
+};
+
+// Prefetch specialization
+template<>
+struct prefetch<sycl_platform> {
+  template <typename T>
+  static void exec(T* ptr, int device, std::size_t len) {
+    prefetch_impl(ptr, device, len);
+  }
+  
+  template <typename T>
+  static camp::resources::EventProxy<camp::resources::Resource> exec(
+      T* ptr, int device, std::size_t len, camp::resources::Resource& res) {
+    return prefetch_async_impl(ptr, device, len, res);
+  }
+};
+
+// Note: SYCL platform uses the generic reallocate implementation from operations.hpp
+// since direct SYCL reallocation isn't supported and memory pools require a safe allocate-copy-free pattern
+
+} // end of namespace op
+} // end of namespace umpire
+
+#endif // UMPIRE_ENABLE_SYCL
\ No newline at end of file
diff --git a/include/umpire/resource/platform.hpp b/include/umpire/resource/platform.hpp
new file mode 100644
index 000000000..340ce572b
--- /dev/null
+++ b/include/umpire/resource/platform.hpp
@@ -0,0 +1,66 @@
+#pragma once
+
+#include "camp/resource/platform.hpp"
+#include "umpire/config.hpp"
+
+namespace umpire {
+namespace resource {
+
+template <typename Platform>
+struct platform_for {};
+
+struct undefined_platform {};
+struct host_platform {};
+#if defined(UMPIRE_ENABLE_CUDA)
+struct cuda_platform {};
+#endif
+#if defined(UMPIRE_ENABLE_HIP)
+struct hip_platform {};
+#endif
+#if defined(UMPIRE_ENABLE_OPENMP_TARGET)
+struct omp_target_platform {};
+#endif
+
+template <>
+struct platform_for<undefined_platform> {
+  static constexpr camp::resources::Platform value = camp::resources::Platform::undefined;
+};
+
+template <>
+struct platform_for<host_platform> {
+  static constexpr camp::resources::Platform value = camp::resources::Platform::host;
+};
+
+#if defined(UMPIRE_ENABLE_CUDA)
+template <>
+struct platform_for<cuda_platform> {
+  static constexpr camp::resources::Platform value = camp::resources::Platform::cuda;
+};
+#endif
+#if defined(UMPIRE_ENABLE_HIP)
+template <>
+struct platform_for<hip_platform> {
+  static constexpr camp::resources::Platform value = camp::resources::Platform::hip;
+};
+#endif
+#if defined(UMPIRE_ENABLE_OPENMP_TARGET)
+template <>
+struct platform_for<omp_target_platform> {
+  static constexpr camp::resources::Platform value = camp::resources::Platform::omp_target;
+};
+#endif
+
+} // namespace resource
+
+using host = resource::host_platform;
+#if defined(UMPIRE_ENABLE_CUDA)
+using cuda = resource::cuda_platform;
+#endif
+#if defined(UMPIRE_ENABLE_HIP)
+using hip = resource::hip_platform;
+#endif
+#if defined(UMPIRE_ENABLE_OPENMP_TARGET)
+using omp_target = resource::omp_target_platform;
+#endif
+
+} // namespace umpire
diff --git a/src/umpire/CMakeLists.txt b/src/umpire/CMakeLists.txt
index 0e4f3b7ea..de5d87492 100644
--- a/src/umpire/CMakeLists.txt
+++ b/src/umpire/CMakeLists.txt
@@ -196,6 +196,7 @@ target_include_directories(
   umpire
   PUBLIC
   $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>
+  $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
   $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>
   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
 
diff --git a/src/umpire/DeviceAllocator.cpp b/src/umpire/DeviceAllocator.cpp
index 5fa252d57..4b5896389 100644
--- a/src/umpire/DeviceAllocator.cpp
+++ b/src/umpire/DeviceAllocator.cpp
@@ -27,7 +27,7 @@ __host__ DeviceAllocator::DeviceAllocator(Allocator allocator, size_t size, cons
   auto device_alloc = rm.getAllocator("UM");
 
   m_counter = static_cast<unsigned int*>(device_alloc.allocate(sizeof(unsigned int)));
-  rm.memset(m_counter, 0);
+  umpire::memset(m_counter, 0);
 
   // convert the string name to a char name
   const char* name = old_name.c_str();
diff --git a/src/umpire/ResourceManager.cpp b/src/umpire/ResourceManager.cpp
index 4c0c68da8..b1d4a750f 100644
--- a/src/umpire/ResourceManager.cpp
+++ b/src/umpire/ResourceManager.cpp
@@ -1,4 +1,3 @@
-//////////////////////////////////////////////////////////////////////////////
 // Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
 // project contributors. See the COPYRIGHT file for details.
 //
@@ -40,6 +39,8 @@
 #include "umpire/util/sycl_compat.hpp"
 #endif
 
+#include "umpire/op.hpp"
+
 static const char* s_null_resource_name{"__umpire_internal_null"};
 static const char* s_zero_byte_pool_name{"__umpire_internal_0_byte_pool"};
 
@@ -388,6 +389,17 @@ const util::AllocationRecord* ResourceManager::findAllocationRecord(void* ptr) c
 
 void ResourceManager::copy(void* dst_ptr, void* src_ptr, std::size_t size)
 {
+#if defined(UMPIRE_RM_USE_NEW_OPS)
+  if (size == 0) {
+    auto record = findAllocationRecord(src_ptr);
+    std::ptrdiff_t src_offset = reinterpret_cast<char*>(src_ptr) - reinterpret_cast<char*>(record->ptr);
+    size = record->size - src_offset;
+  }
+
+  UMPIRE_LOG(Debug, "(src_ptr=" << src_ptr << ", dst_ptr=" << dst_ptr << ", size=" << size << ")");
+
+  umpire::copy(static_cast<void*>(src_ptr), static_cast<void*>(dst_ptr), size);
+#else
   UMPIRE_LOG(Debug, "(src_ptr=" << src_ptr << ", dst_ptr=" << dst_ptr << ", size=" << size << ")");
 
   auto& op_registry = op::MemoryOperationRegistry::getInstance();
@@ -427,12 +439,24 @@ void ResourceManager::copy(void* dst_ptr, void* src_ptr, std::size_t size)
   auto op = op_registry.find("COPY", src_alloc_record->strategy, dst_alloc_record->strategy);
 
   op->transform(src_ptr, &dst_ptr, src_alloc_record, dst_alloc_record, size);
+#endif
 }
 
 camp::resources::EventProxy<camp::resources::Resource> ResourceManager::copy(void* dst_ptr, void* src_ptr,
                                                                              camp::resources::Resource& ctx,
                                                                              std::size_t size)
 {
+#if defined(UMPIRE_RM_USE_NEW_OPS)
+  if (size == 0) {
+    auto record = findAllocationRecord(src_ptr);
+    std::ptrdiff_t src_offset = reinterpret_cast<char*>(src_ptr) - reinterpret_cast<char*>(record->ptr);
+    size = record->size - src_offset;
+  }
+
+  UMPIRE_LOG(Debug, "(src_ptr=" << src_ptr << ", dst_ptr=" << dst_ptr << ", size=" << size << ")");
+
+  return umpire::copy(static_cast<void*>(src_ptr), static_cast<void*>(dst_ptr), size, ctx);
+#else
   UMPIRE_LOG(Debug, "(src_ptr=" << src_ptr << ", dst_ptr=" << dst_ptr << ", size=" << size << ")");
 
   auto& op_registry = op::MemoryOperationRegistry::getInstance();
@@ -472,10 +496,21 @@ camp::resources::EventProxy<camp::resources::Resource> ResourceManager::copy(voi
   auto op = op_registry.find("COPY", src_alloc_record->strategy, dst_alloc_record->strategy);
 
   return op->transform_async(src_ptr, &dst_ptr, src_alloc_record, dst_alloc_record, size, ctx);
+#endif
 }
 
 void ResourceManager::memset(void* ptr, int value, std::size_t length)
 {
+#if defined(UMPIRE_RM_USE_NEW_OPS)
+  if (length == 0) {
+    auto record = findAllocationRecord(ptr);
+    std::ptrdiff_t src_offset = reinterpret_cast<char*>(ptr) - reinterpret_cast<char*>(record->ptr);
+    length = record->size - src_offset;
+  }
+  UMPIRE_LOG(Debug, "(ptr=" << ptr << ", value=" << value << ", length=" << length << ")");
+
+  umpire::memset(static_cast<void*>(ptr), value, length);
+#else
   UMPIRE_LOG(Debug, "(ptr=" << ptr << ", value=" << value << ", length=" << length << ")");
 
   auto& op_registry = op::MemoryOperationRegistry::getInstance();
@@ -507,12 +542,24 @@ void ResourceManager::memset(void* ptr, int value, std::size_t length)
   auto op = op_registry.find("MEMSET", alloc_record->strategy, alloc_record->strategy);
 
   op->apply(ptr, alloc_record, value, length);
+#endif
 }
 
 camp::resources::EventProxy<camp::resources::Resource> ResourceManager::memset(void* ptr, int value,
                                                                                camp::resources::Resource& ctx,
                                                                                std::size_t length)
 {
+#if defined(UMPIRE_RM_USE_NEW_OPS)
+  if (length == 0) {
+    auto record = findAllocationRecord(ptr);
+    std::ptrdiff_t src_offset = reinterpret_cast<char*>(ptr) - reinterpret_cast<char*>(record->ptr);
+    length = record->size - src_offset;
+  }
+
+  UMPIRE_LOG(Debug, "(ptr=" << ptr << ", value=" << value << ", length=" << length << ")");
+
+  return umpire::memset(static_cast<void*>(ptr), value, length, ctx);
+#else
   UMPIRE_LOG(Debug, "(ptr=" << ptr << ", value=" << value << ", length=" << length << ")");
 
   auto& op_registry = op::MemoryOperationRegistry::getInstance();
@@ -545,10 +592,29 @@ camp::resources::EventProxy<camp::resources::Resource> ResourceManager::memset(v
   auto op = op_registry.find("MEMSET", alloc_record->strategy, alloc_record->strategy);
 
   return op->apply_async(ptr, alloc_record, value, length, ctx);
+#endif
 }
 
 void* ResourceManager::reallocate(void* current_ptr, std::size_t new_size)
 {
+#if defined(UMPIRE_RM_USE_NEW_OPS)
+
+  if (!current_ptr) {
+    auto alloc = getDefaultAllocator();
+    return alloc.allocate(new_size);
+  }
+
+  if (new_size == 0) {
+    auto alloc_record = m_allocations.find(current_ptr);
+    auto alloc = Allocator(alloc_record->strategy);
+    alloc.deallocate(current_ptr);
+    return alloc.allocate(new_size);
+  }
+
+  UMPIRE_LOG(Debug, "(current_ptr=" << current_ptr << ", new_size=" << new_size << ")");
+
+  return umpire::reallocate(&current_ptr, new_size);
+#else
   strategy::AllocationStrategy* strategy;
 
   if (current_ptr != nullptr) {
@@ -579,10 +645,29 @@ void* ResourceManager::reallocate(void* current_ptr, std::size_t new_size)
   });
 
   return new_ptr;
+#endif
 }
 
 void* ResourceManager::reallocate(void* current_ptr, std::size_t new_size, camp::resources::Resource& ctx)
 {
+#if defined(UMPIRE_RM_USE_NEW_OPS)
+  if (!current_ptr) {
+    auto alloc = getDefaultAllocator();
+    return alloc.allocate(new_size);
+  }
+
+  if (new_size == 0) {
+    auto alloc_record = m_allocations.find(current_ptr);
+    auto alloc = Allocator(alloc_record->strategy);
+    alloc.deallocate(current_ptr);
+    return alloc.allocate(new_size);
+  }
+
+  UMPIRE_LOG(Debug, "(current_ptr=" << current_ptr << ", new_size=" << new_size << ")");
+
+  auto event = umpire::reallocate(&current_ptr, new_size, ctx);
+  return current_ptr;
+#else
   strategy::AllocationStrategy* strategy;
 
   if (current_ptr != nullptr) {
@@ -614,10 +699,35 @@ void* ResourceManager::reallocate(void* current_ptr, std::size_t new_size, camp:
   });
 
   return new_ptr;
+#endif
 }
 
 void* ResourceManager::reallocate(void* current_ptr, std::size_t new_size, Allocator alloc)
 {
+#if defined(UMPIRE_RM_USE_NEW_OPS)
+  if (!current_ptr) {
+    return alloc.allocate(new_size);
+  }
+
+  if (new_size == 0) {
+    auto alloc_record = m_allocations.find(current_ptr);
+    auto a = Allocator(alloc_record->strategy);
+    a.deallocate(current_ptr);
+    return alloc.allocate(new_size);
+  }
+
+  // We need to check if the current pointer belongs to the same allocator
+  auto src_allocator = getAllocator(current_ptr);
+  if (src_allocator.getId() != alloc.getId()) {
+    UMPIRE_ERROR(runtime_error, fmt::format("Cannot reallocate {} from allocator \"{}\" with allocator \"{}\"",
+                                            current_ptr, src_allocator.getName(), alloc.getName()));
+  }
+
+  UMPIRE_LOG(Debug, "(current_ptr=" << current_ptr << ", new_size=" << new_size << ", with Allocator "
+                                    << alloc.getName() << ")");
+
+  return umpire::reallocate(&current_ptr, new_size);
+#else
   umpire::event::record([&](auto& event) {
     event.name("reallocate")
         .category(event::category::operation)
@@ -639,11 +749,37 @@ void* ResourceManager::reallocate(void* current_ptr, std::size_t new_size, Alloc
   });
 
   return new_ptr;
+#endif
 }
 
 void* ResourceManager::reallocate(void* current_ptr, std::size_t new_size, Allocator alloc,
                                   camp::resources::Resource& ctx)
 {
+#if defined(UMPIRE_RM_USE_NEW_OPS)
+  if (!current_ptr) {
+    return alloc.allocate(new_size);
+  }
+
+  if (new_size == 0) {
+    auto alloc_record = m_allocations.find(current_ptr);
+    auto a = Allocator(alloc_record->strategy);
+    a.deallocate(current_ptr);
+    return alloc.allocate(new_size);
+  }
+
+  // We need to check if the current pointer belongs to the same allocator
+  auto src_allocator = getAllocator(current_ptr);
+  if (src_allocator.getId() != alloc.getId()) {
+    UMPIRE_ERROR(runtime_error, fmt::format("Cannot reallocate {} from allocator \"{}\" with allocator \"{}\"",
+                                            current_ptr, src_allocator.getName(), alloc.getName()));
+  }
+
+  UMPIRE_LOG(Debug, "(current_ptr=" << current_ptr << ", new_size=" << new_size << ", with Allocator "
+                                    << alloc.getName() << ")");
+
+  auto event = umpire::reallocate(&current_ptr, new_size, ctx);
+  return current_ptr;
+#else
   umpire::event::record([&](auto& event) {
     event.name("reallocate")
         .category(event::category::operation)
@@ -666,6 +802,7 @@ void* ResourceManager::reallocate(void* current_ptr, std::size_t new_size, Alloc
   });
 
   return new_ptr;
+#endif
 }
 
 void* ResourceManager::reallocate_impl(void* current_ptr, std::size_t new_size, Allocator allocator)
@@ -769,23 +906,51 @@ void* ResourceManager::reallocate_impl(void* current_ptr, std::size_t new_size,
   return new_ptr;
 }
 
-void* ResourceManager::move(void* ptr, Allocator allocator)
+void* ResourceManager::move(void* src_ptr, Allocator allocator)
 {
-  UMPIRE_LOG(Debug, "(src_ptr=" << ptr << ", allocator=" << allocator.getName() << ")");
+#if defined(UMPIRE_RM_USE_NEW_OPS)
+  UMPIRE_LOG(Debug, "(src_ptr=" << src_ptr << ", allocator=" << allocator.getName() << ")");
 
-  auto alloc_record = m_allocations.find(ptr);
+  auto src_allocator = getAllocator(src_ptr);
+
+  // short-circuit if ptr was allocated by 'allocator'
+  if (src_allocator.getId() == allocator.getId()) {
+    return src_ptr;
+  }
+
+  // Check for offset pointers
+  auto alloc_record = m_allocations.find(src_ptr);
+  if (src_ptr != alloc_record->ptr) {
+    UMPIRE_ERROR(runtime_error, fmt::format("Cannot move an offset ptr (ptr={}, base={})", src_ptr, alloc_record->ptr));
+  }
+
+  // Allocate new memory in destination
+  std::size_t size = getSize(src_ptr);
+  void* dst_ptr = allocator.allocate(size);
+
+  // Copy data
+  umpire::copy(src_ptr, dst_ptr, size);
+
+  // Deallocate original
+  deallocate(src_ptr);
+
+  return dst_ptr;
+#else
+  UMPIRE_LOG(Debug, "(src_ptr=" << src_ptr << ", allocator=" << allocator.getName() << ")");
+
+  auto alloc_record = m_allocations.find(src_ptr);
 
   // short-circuit if ptr was allocated by 'allocator'
   if (alloc_record->strategy == allocator.getAllocationStrategy()) {
     umpire::event::record([&](auto& event) {
       event.name("move")
           .category(event::category::operation)
-          .arg("ptr", ptr)
+          .arg("ptr", src_ptr)
           .arg("allocator_ref", (void*)allocator.getAllocationStrategy())
           .tag("allocator_name", allocator.getName())
           .tag("replay", "true");
     });
-    return ptr;
+    return src_ptr;
   }
 
 #if defined(UMPIRE_ENABLE_NUMA)
@@ -797,7 +962,7 @@ void* ResourceManager::move(void* ptr, Allocator allocator)
     if (dynamic_cast<strategy::NumaPolicy*>(base_strategy)) {
       auto& op_registry = op::MemoryOperationRegistry::getInstance();
 
-      auto src_alloc_record = m_allocations.find(ptr);
+      auto src_alloc_record = m_allocations.find(src_ptr);
 
       const std::size_t size{src_alloc_record->size};
       util::AllocationRecord dst_alloc_record{nullptr, size, allocator.getAllocationStrategy()};
@@ -805,37 +970,37 @@ void* ResourceManager::move(void* ptr, Allocator allocator)
       if (size > 0) {
         auto op = op_registry.find("MOVE", src_alloc_record->strategy, dst_alloc_record.strategy);
         void* ret{nullptr};
-        op->transform(ptr, &ret, src_alloc_record, &dst_alloc_record, size);
-        UMPIRE_ASSERT(ret == ptr);
+        op->transform(src_ptr, &ret, src_alloc_record, &dst_alloc_record, size);
+        UMPIRE_ASSERT(ret == src_ptr);
       }
 
       umpire::event::record([&](auto& event) {
         event.name("move")
             .category(event::category::operation)
-            .arg("ptr", ptr)
+            .arg("ptr", src_ptr)
             .arg("allocator_ref", (void*)allocator.getAllocationStrategy())
             .tag("allocator_name", allocator.getName())
             .tag("replay", "true")
-            .arg("result", ptr);
+            .arg("result", src_ptr);
       });
-      return ptr;
+      return src_ptr;
     }
   }
 #endif
 
-  if (ptr != alloc_record->ptr) {
-    UMPIRE_ERROR(runtime_error, fmt::format("Cannot move an offset ptr (ptr={}, base={})", ptr, alloc_record->ptr));
+  if (src_ptr != alloc_record->ptr) {
+    UMPIRE_ERROR(runtime_error, fmt::format("Cannot move an offset ptr (ptr={}, base={})", src_ptr, alloc_record->ptr));
   }
 
   void* dst_ptr{allocator.allocate(alloc_record->size)};
-  copy(dst_ptr, ptr);
+  copy(dst_ptr, src_ptr);
 
-  deallocate(ptr);
+  deallocate(src_ptr);
 
   umpire::event::record([&](auto& event) {
     event.name("move")
         .category(event::category::operation)
-        .arg("ptr", ptr)
+        .arg("ptr", src_ptr)
         .arg("allocator_ref", (void*)allocator.getAllocationStrategy())
         .tag("allocator_name", allocator.getName())
         .tag("replay", "true")
@@ -843,14 +1008,25 @@ void* ResourceManager::move(void* ptr, Allocator allocator)
   });
 
   return dst_ptr;
+#endif
 }
 
 camp::resources::EventProxy<camp::resources::Resource> ResourceManager::prefetch(void* ptr, int device,
                                                                                  camp::resources::Resource& ctx)
 {
+#if defined(UMPIRE_RM_USE_NEW_OPS)
+  UMPIRE_LOG(Debug, "(ptr=" << ptr << ", device=" << device << ")");
+
+  auto alloc_record = m_allocations.find(ptr);
+
+  // Get size from offset to end of allocation
+  std::ptrdiff_t offset = static_cast<char*>(ptr) - static_cast<char*>(alloc_record->ptr);
+  std::size_t size = alloc_record->size - offset;
+
+  return umpire::prefetch(ptr, device, size, ctx);
+#else
   UMPIRE_LOG(Debug, "(ptr=" << ptr << ", device=" << device << ")");
 
-  auto& op_registry = op::MemoryOperationRegistry::getInstance();
   auto alloc_record = m_allocations.find(ptr);
 
   if (alloc_record->strategy->getTraits().resource != umpire::MemoryResourceTraits::resource_type::um) {
@@ -860,8 +1036,10 @@ camp::resources::EventProxy<camp::resources::Resource> ResourceManager::prefetch
   std::ptrdiff_t offset = static_cast<char*>(ptr) - static_cast<char*>(alloc_record->ptr);
   std::size_t size = alloc_record->size - offset;
 
+  auto& op_registry = op::MemoryOperationRegistry::getInstance();
   auto op = op_registry.find("PREFETCH", alloc_record->strategy, alloc_record->strategy);
   return op->apply_async(ptr, alloc_record, device, size, ctx);
+#endif
 }
 
 void ResourceManager::deallocate(void* ptr)
diff --git a/src/umpire/ResourceManager.hpp b/src/umpire/ResourceManager.hpp
index 71a93b32c..46810e4ff 100644
--- a/src/umpire/ResourceManager.hpp
+++ b/src/umpire/ResourceManager.hpp
@@ -7,6 +7,8 @@
 #ifndef UMPIRE_ResourceManager_HPP
 #define UMPIRE_ResourceManager_HPP
 
+#include "umpire/config.hpp"
+
 #include <list>
 #include <memory>
 #include <mutex>
@@ -25,7 +27,11 @@ namespace umpire {
 
 namespace op {
 class MemoryOperation;
-}
+
+template <template <typename... T> class Op>
+struct op_caller;
+
+} // namespace op
 
 namespace strategy {
 class ZeroByteHandler;
@@ -39,6 +45,10 @@ class AllocateNull;
  * \brief
  */
 class ResourceManager {
+  // Friend declarations for template operations
+  template <template <typename... T> class Op>
+  friend struct op::op_caller;
+
  public:
   /*!
    * \brief
@@ -210,9 +220,13 @@ class ResourceManager {
    * \param dst_ptr Destination pointer.
    * \param src_ptr Source pointer.
    * \param size Size in bytes.
+   * 
+   * \deprecated Use the global umpire::copy function instead.
    */
+  [[deprecated("Use the global umpire::copy function instead")]]
   void copy(void* dst_ptr, void* src_ptr, std::size_t size = 0);
 
+  [[deprecated("Use the global umpire::copy function with Resource instead")]]
   camp::resources::EventProxy<camp::resources::Resource> copy(void* dst_ptr, void* src_ptr,
                                                               camp::resources::Resource& ctx, std::size_t size = 0);
 
@@ -222,9 +236,13 @@ class ResourceManager {
    * \param ptr Pointer to data.
    * \param val Value to set.
    * \param length Number of bytes to set to val.
+   * 
+   * \deprecated Use the global umpire::memset function instead.
    */
+  [[deprecated("Use the global umpire::memset function instead")]]
   void memset(void* ptr, int val, std::size_t length = 0);
 
+  [[deprecated("Use the global umpire::memset function with Resource instead")]]
   camp::resources::EventProxy<camp::resources::Resource> memset(void* ptr, int val, camp::resources::Resource& ctx,
                                                                 std::size_t length = 0);
 
@@ -248,10 +266,13 @@ class ResourceManager {
    * a nullptr, and a zero-byte allocation will be returned.
    *
    * \return Reallocated pointer.
-   *
+   * 
+   * \deprecated Use the global umpire::reallocate function instead.
    */
+  [[deprecated("Use the global umpire::reallocate function instead")]]
   void* reallocate(void* current_ptr, std::size_t new_size);
 
+  [[deprecated("Use the global umpire::reallocate function with Resource instead")]]
   void* reallocate(void* current_ptr, std::size_t new_size, camp::resources::Resource& ctx);
 
   /*!
@@ -268,10 +289,13 @@ class ResourceManager {
    * a nullptr, and a zero-byte allocation will be returned.
    *
    * \return Reallocated pointer.
-   *
+   * 
+   * \deprecated Use the global umpire::reallocate function instead.
    */
+  [[deprecated("Use the global umpire::reallocate function instead")]]
   void* reallocate(void* current_ptr, std::size_t new_size, Allocator allocator);
 
+  [[deprecated("Use the global umpire::reallocate function with Resource instead")]]
   void* reallocate(void* current_ptr, std::size_t new_size, Allocator allocator, camp::resources::Resource& ctx);
 
   /*!
@@ -281,7 +305,10 @@ class ResourceManager {
    * \param allocator Allocator to use to allocate new memory for moved data.
    *
    * \return Pointer to new location of data.
+   * 
+   * \deprecated Use the global umpire::move function instead.
    */
+  [[deprecated("Use the global umpire::move function instead")]]
   void* move(void* src_ptr, Allocator allocator);
 
   /*!
@@ -297,7 +324,10 @@ class ResourceManager {
    * \param ptr Pointer to prefech
    * \param device Device to prefetch data to
    * \param ctx Resource to use for asynchronous operation
+   * 
+   * \deprecated Use the global umpire::prefetch function instead.
    */
+  [[deprecated("Use the global umpire::prefetch function instead")]]
   camp::resources::EventProxy<camp::resources::Resource> prefetch(void* ptr, int device,
                                                                   camp::resources::Resource& ctx);
 
@@ -336,8 +366,10 @@ class ResourceManager {
 
   void* reallocate_impl(void* current_ptr, std::size_t new_size, Allocator allocator, camp::resources::Resource& ctx);
 
+ public:
   util::AllocationMap m_allocations;
 
+ private:
   std::list<std::unique_ptr<strategy::AllocationStrategy>> m_allocators;
   std::vector<std::string> m_shared_allocator_names;
 
@@ -359,6 +391,9 @@ class ResourceManager {
   friend std::vector<util::AllocationRecord> get_allocator_records(Allocator);
   friend strategy::ZeroByteHandler;
   friend strategy::mixins::AllocateNull;
+
+  template <template <typename... T> class Op>
+  friend struct umpire::op::op_caller;
 };
 
 } // end namespace umpire
diff --git a/src/umpire/config.hpp.in b/src/umpire/config.hpp.in
index 9f0a52b59..429462f25 100644
--- a/src/umpire/config.hpp.in
+++ b/src/umpire/config.hpp.in
@@ -12,6 +12,7 @@
 //
 #cmakedefine UMPIRE_ENABLE_BACKTRACE
 #cmakedefine UMPIRE_ENABLE_BACKTRACE_SYMBOLS
+#cmakedefine UMPIRE_ENABLE_BOUNDS_CHECKS
 #cmakedefine UMPIRE_ENABLE_DEVELOPER_BENCHMARKS
 #cmakedefine UMPIRE_ENABLE_CONST
 #cmakedefine UMPIRE_ENABLE_CUDA
@@ -36,6 +37,7 @@
 #cmakedefine UMPIRE_ENABLE_DEVICE_ALLOCATOR
 #cmakedefine UMPIRE_ENABLE_SQLITE_EXPERIMENTAL
 #cmakedefine UMPIRE_DISABLE_ALLOCATIONMAP_DEBUG
+#cmakedefine UMPIRE_RM_USE_NEW_OPS
 
 #define UMPIRE_VERSION_MAJOR @Umpire_VERSION_MAJOR@
 #define UMPIRE_VERSION_MINOR @Umpire_VERSION_MINOR@
@@ -57,8 +59,8 @@
 #define UMPIRE_EXPORT
 #endif
 
-#define UMPIRE_VERSION_SYM  umpire_ver_@Umpire_VERSION_MAJOR@_@Umpire_VERSION_MINOR@_found
-UMPIRE_EXPORT extern int    UMPIRE_VERSION_SYM;
+#define UMPIRE_VERSION_SYM umpire_ver_@Umpire_VERSION_MAJOR@_@Umpire_VERSION_MINOR@_found
+UMPIRE_EXPORT extern int UMPIRE_VERSION_SYM;
 #define UMPIRE_VERSION_OK() UMPIRE_VERSION_SYM == 0
 
 namespace umpire {
diff --git a/src/umpire/op/CMakeLists.txt b/src/umpire/op/CMakeLists.txt
index 3118489e1..24132fd21 100644
--- a/src/umpire/op/CMakeLists.txt
+++ b/src/umpire/op/CMakeLists.txt
@@ -120,6 +120,7 @@ target_include_directories(
   umpire_op
   PUBLIC
   $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>
+  $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
   $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>
   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
 
diff --git a/src/umpire/op/GenericReallocateOperation.cpp b/src/umpire/op/GenericReallocateOperation.cpp
index 36fb99374..f0c1e504f 100644
--- a/src/umpire/op/GenericReallocateOperation.cpp
+++ b/src/umpire/op/GenericReallocateOperation.cpp
@@ -9,6 +9,7 @@
 #include <cstdlib>
 
 #include "umpire/ResourceManager.hpp"
+#include "umpire/op.hpp"
 #include "umpire/strategy/AllocationStrategy.hpp"
 #include "umpire/util/AllocationRecord.hpp"
 #include "umpire/util/Macros.hpp"
@@ -26,7 +27,7 @@ void GenericReallocateOperation::transform(void* current_ptr, void** new_ptr,
   const std::size_t old_size = current_allocation->size;
   const std::size_t copy_size = (old_size > new_size) ? new_size : old_size;
 
-  ResourceManager::getInstance().copy(*new_ptr, current_ptr, copy_size);
+  umpire::copy(*new_ptr, current_ptr, copy_size);
 
   allocator.deallocate(current_ptr);
 }
@@ -41,7 +42,7 @@ camp::resources::EventProxy<camp::resources::Resource> GenericReallocateOperatio
   const std::size_t old_size = current_allocation->size;
   const std::size_t copy_size = (old_size > new_size) ? new_size : old_size;
 
-  auto event = ResourceManager::getInstance().copy(*new_ptr, current_ptr, ctx, copy_size);
+  auto event = umpire::copy(*new_ptr, current_ptr, copy_size, ctx);
 
   allocator.deallocate(current_ptr);
 
diff --git a/src/umpire/op/HostReallocateOperation.cpp b/src/umpire/op/HostReallocateOperation.cpp
index 64882663a..4d69fb758 100644
--- a/src/umpire/op/HostReallocateOperation.cpp
+++ b/src/umpire/op/HostReallocateOperation.cpp
@@ -11,6 +11,7 @@
 #include "umpire/ResourceManager.hpp"
 #include "umpire/strategy/mixins/Inspector.hpp"
 #include "umpire/util/error.hpp"
+#include "umpire/op.hpp"
 
 namespace umpire {
 namespace op {
@@ -29,7 +30,7 @@ void HostReallocateOperation::transform(void* current_ptr, void** new_ptr, util:
     *new_ptr = allocator.allocate(new_size);
     const std::size_t copy_size = (old_size > new_size) ? new_size : old_size;
 
-    ResourceManager::getInstance().copy(*new_ptr, current_ptr, copy_size);
+    umpire::copy(current_ptr, *new_ptr, copy_size);
     allocator.deallocate(current_ptr);
   } else {
     auto old_record = ResourceManager::getInstance().deregisterAllocation(current_ptr);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 1799f7c0e..506628895 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -17,6 +17,7 @@ add_subdirectory(unit)
 add_subdirectory(integration)
 add_subdirectory(tools)
 add_subdirectory(debug)
+add_subdirectory(op)
 if (UMPIRE_ENABLE_DEVELOPER_DEFAULTS)
     add_subdirectory(applications)
 endif()
diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt
index b5c05bcc3..e6883f0b0 100644
--- a/tests/integration/CMakeLists.txt
+++ b/tests/integration/CMakeLists.txt
@@ -275,6 +275,9 @@ blt_add_target_compile_flags(
 blt_add_target_compile_flags(
   TO primary_pool_tests
   FLAGS ${UMPIRE_DISABLE_DEPRECATED_WARNINGS_FLAG})
+blt_add_target_compile_flags(
+  TO reallocate_tests
+  FLAGS ${UMPIRE_DISABLE_DEPRECATED_WARNINGS_FLAG})
 if (UMPIRE_ENABLE_PERFORMANCE_TESTS)
   blt_add_target_compile_flags(
     TO primary_pool_performance_tests 
diff --git a/tests/integration/replay/CMakeLists.txt b/tests/integration/replay/CMakeLists.txt
index 243ff9a49..9f6ada3eb 100644
--- a/tests/integration/replay/CMakeLists.txt
+++ b/tests/integration/replay/CMakeLists.txt
@@ -48,6 +48,10 @@ blt_add_executable(
   SOURCES replay_tests.cpp
   DEPENDS_ON ${replay_integration_tests_depends})
 
+blt_add_target_compile_flags(
+  TO replay_tests
+  FLAGS ${UMPIRE_DISABLE_DEPRECATED_WARNINGS_FLAG})
+
 add_custom_target(
   regen_replay_output
   COMMAND  ${CMAKE_COMMAND} -E env UMPIRE_REPLAY=On $<TARGET_FILE:replay_tests> && mv *.stats ${CMAKE_CURRENT_SOURCE_DIR}/${replay_file} && rm -f *.stats)
diff --git a/tests/op/CMakeLists.txt b/tests/op/CMakeLists.txt
new file mode 100644
index 000000000..864cc9e84
--- /dev/null
+++ b/tests/op/CMakeLists.txt
@@ -0,0 +1,21 @@
+##############################################################################
+# Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
+# project contributors. See the COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (MIT)
+##############################################################################
+set(test_sources
+  copy_test.cpp
+  memset_test.cpp
+  reallocate_test.cpp
+  prefetch_test.cpp
+  advise_test.cpp)
+
+blt_add_executable(
+  NAME op_tests
+  SOURCES ${test_sources}
+  DEPENDS_ON umpire gtest)
+
+blt_add_test(
+  NAME op_tests
+  COMMAND op_tests)
diff --git a/tests/op/advise_test.cpp b/tests/op/advise_test.cpp
new file mode 100644
index 000000000..0b46fab6c
--- /dev/null
+++ b/tests/op/advise_test.cpp
@@ -0,0 +1,488 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
+// project contributors. See the COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (MIT)
+//////////////////////////////////////////////////////////////////////////////
+#include <algorithm>
+#include <cstring>
+#include <random>
+
+#include "gtest/gtest.h"
+#include "umpire/Allocator.hpp"
+#include "umpire/ResourceManager.hpp"
+#include "umpire/op.hpp"
+
+// Host advise tests - these should throw runtime errors since host doesn't support memory advice
+TEST(Advise, HostAdviseThrows)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate host buffer
+  void* ptr = allocator.allocate(size);
+
+  // Fill with test data
+  std::memset(ptr, 0x33, size);
+
+  // Advise operations should throw runtime errors for host memory
+  EXPECT_THROW(umpire::set_accessed_by(ptr, 0, size), std::runtime_error);
+  EXPECT_THROW(umpire::set_preferred_location(ptr, 0, size), std::runtime_error);
+  EXPECT_THROW(umpire::set_read_mostly(ptr, 0, size), std::runtime_error);
+  EXPECT_THROW(umpire::unset_accessed_by(ptr, 0, size), std::runtime_error);
+  EXPECT_THROW(umpire::unset_preferred_location(ptr, 0, size), std::runtime_error);
+  EXPECT_THROW(umpire::unset_read_mostly(ptr, 0, size), std::runtime_error);
+
+  // Data should be unchanged after failed operations
+  unsigned char* byte_ptr = static_cast<unsigned char*>(ptr);
+  for (std::size_t i = 0; i < size; ++i) {
+    ASSERT_EQ(byte_ptr[i], 0x33) << "Host advise modified data at byte " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Advise, HostAdviseExplicit)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate host buffer
+  void* ptr = allocator.allocate(size);
+
+  // Fill with test data
+  std::memset(ptr, 0x66, size);
+
+  // Explicit host advise - should not compile due to SFINAE constraints
+  // These calls should fail at compile time:
+  // umpire::set_accessed_by<umpire::resource::host_platform>(ptr, 0, size);
+  // umpire::set_preferred_location<umpire::resource::host_platform>(ptr, 0, size);
+  // umpire::set_read_mostly<umpire::resource::host_platform>(ptr, 0, size);
+
+  // Data should be unchanged
+  unsigned char* byte_ptr = static_cast<unsigned char*>(ptr);
+  for (std::size_t i = 0; i < size; ++i) {
+    ASSERT_EQ(byte_ptr[i], 0x66) << "Explicit host advise modified data at byte " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Advise, ZeroSizeAdvise)
+{
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate host buffer
+  void* ptr = allocator.allocate(1024);
+
+  // Zero-size advise should still throw for host (platform doesn't support it regardless of size)
+  EXPECT_THROW(umpire::set_accessed_by(ptr, 0, 0), std::runtime_error);
+  EXPECT_THROW(umpire::set_preferred_location(ptr, 0, 0), std::runtime_error);
+  EXPECT_THROW(umpire::set_read_mostly(ptr, 0, 0), std::runtime_error);
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+#if defined(UMPIRE_ENABLE_CUDA)
+TEST(Advise, CudaSetAccessedBy)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    void* um_ptr = um_allocator.allocate(size);
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0x44, size);
+
+    // Set accessed by device 0 (should not crash or corrupt data)
+    EXPECT_NO_THROW(umpire::set_accessed_by(um_ptr, 0, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after advise
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0x44) << "CUDA set_accessed_by corrupted data at byte " << i;
+    }
+
+    // Test unsetting
+    EXPECT_NO_THROW(umpire::unset_accessed_by(um_ptr, 0, size));
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+
+TEST(Advise, CudaSetPreferredLocation)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    void* um_ptr = um_allocator.allocate(size);
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0x55, size);
+
+    // Set preferred location to device 0
+    EXPECT_NO_THROW(umpire::set_preferred_location(um_ptr, 0, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after advise
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0x55) << "CUDA set_preferred_location corrupted data at byte " << i;
+    }
+
+    // Test unsetting
+    EXPECT_NO_THROW(umpire::unset_preferred_location(um_ptr, 0, size));
+
+    // Test setting preferred location to host (-1)
+    EXPECT_NO_THROW(umpire::set_preferred_location(um_ptr, -1, size));
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+
+TEST(Advise, CudaSetReadMostly)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    void* um_ptr = um_allocator.allocate(size);
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0x77, size);
+
+    // Set read mostly hint
+    EXPECT_NO_THROW(umpire::set_read_mostly(um_ptr, 0, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after advise
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0x77) << "CUDA set_read_mostly corrupted data at byte " << i;
+    }
+
+    // Test unsetting
+    EXPECT_NO_THROW(umpire::unset_read_mostly(um_ptr, 0, size));
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+
+TEST(Advise, ExplicitCudaAdvise)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    void* um_ptr = um_allocator.allocate(size);
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0x88, size);
+
+    // Explicit CUDA advise operations
+    EXPECT_NO_THROW(umpire::set_accessed_by<umpire::cuda>(um_ptr, 0, size));
+    EXPECT_NO_THROW(umpire::set_preferred_location<umpire::cuda>(um_ptr, 0, size));
+    EXPECT_NO_THROW(umpire::set_read_mostly<umpire::cuda>(um_ptr, 0, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after advise
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0x88) << "Explicit CUDA advise corrupted data at byte " << i;
+    }
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+
+#if defined(CUDA_VERSION) && CUDA_VERSION >= 8000
+TEST(Advise, CudaCoarseGrain)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    void* um_ptr = um_allocator.allocate(size);
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0x99, size);
+
+    // Set coarse grain access pattern (CUDA 8.0+)
+    EXPECT_NO_THROW(umpire::set_coarse_grain(um_ptr, 0, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after advise
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0x99) << "CUDA set_coarse_grain corrupted data at byte " << i;
+    }
+
+    // Test unsetting coarse grain
+    EXPECT_NO_THROW(umpire::unset_coarse_grain(um_ptr, 0, size));
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+#endif // CUDA_VERSION >= 8000
+#endif // UMPIRE_ENABLE_CUDA
+
+#if defined(UMPIRE_ENABLE_HIP)
+TEST(Advise, HipSetAccessedBy)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    void* um_ptr = um_allocator.allocate(size);
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0xAA, size);
+
+    // Set accessed by device 0 (should not crash or corrupt data)
+    EXPECT_NO_THROW(umpire::set_accessed_by(um_ptr, 0, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after advise
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0xAA) << "HIP set_accessed_by corrupted data at byte " << i;
+    }
+
+    // Test unsetting
+    EXPECT_NO_THROW(umpire::unset_accessed_by(um_ptr, 0, size));
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+
+TEST(Advise, HipSetPreferredLocation)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    void* um_ptr = um_allocator.allocate(size);
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0xBB, size);
+
+    // Set preferred location to device 0
+    EXPECT_NO_THROW(umpire::set_preferred_location(um_ptr, 0, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after advise
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0xBB) << "HIP set_preferred_location corrupted data at byte " << i;
+    }
+
+    // Test unsetting
+    EXPECT_NO_THROW(umpire::unset_preferred_location(um_ptr, 0, size));
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+
+TEST(Advise, ExplicitHipAdvise)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    void* um_ptr = um_allocator.allocate(size);
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0xCC, size);
+
+    // Explicit HIP advise operations
+    EXPECT_NO_THROW(umpire::set_accessed_by<umpire::hip>(um_ptr, 0, size));
+    EXPECT_NO_THROW(umpire::set_preferred_location<umpire::hip>(um_ptr, 0, size));
+    EXPECT_NO_THROW(umpire::set_read_mostly<umpire::hip>(um_ptr, 0, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after advise
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0xCC) << "Explicit HIP advise corrupted data at byte " << i;
+    }
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+
+#if defined(HIP_VERSION_MAJOR) && HIP_VERSION_MAJOR >= 5
+TEST(Advise, HipCoarseGrain)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    void* um_ptr = um_allocator.allocate(size);
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0xDD, size);
+
+    // Set coarse grain access pattern (HIP 5.0+)
+    EXPECT_NO_THROW(umpire::set_coarse_grain(um_ptr, 0, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after advise
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0xDD) << "HIP set_coarse_grain corrupted data at byte " << i;
+    }
+
+    // Test unsetting coarse grain
+    EXPECT_NO_THROW(umpire::unset_coarse_grain(um_ptr, 0, size));
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+#endif // HIP_VERSION_MAJOR >= 5
+#endif // UMPIRE_ENABLE_HIP
diff --git a/tests/op/copy_test.cpp b/tests/op/copy_test.cpp
new file mode 100644
index 000000000..b8fa1bb8d
--- /dev/null
+++ b/tests/op/copy_test.cpp
@@ -0,0 +1,212 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
+// project contributors. See the COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (MIT)
+//////////////////////////////////////////////////////////////////////////////
+#include <algorithm>
+#include <cstring>
+#include <random>
+
+#include "gtest/gtest.h"
+#include "umpire/Allocator.hpp"
+#include "umpire/ResourceManager.hpp"
+#include "umpire/op.hpp"
+
+TEST(Copy, HostToHost)
+{
+  constexpr std::size_t size = 1024;
+  constexpr int num_elements = size / sizeof(int);
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate source and destination buffers
+  int* source_ptr = static_cast<int*>(allocator.allocate(size));
+  int* dest_ptr = static_cast<int*>(allocator.allocate(size));
+
+  // Fill source with random data
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> distribution(1, 100);
+
+  for (int i = 0; i < num_elements; ++i) {
+    source_ptr[i] = distribution(gen);
+  }
+
+  // Set destination to zero
+  std::memset(dest_ptr, 0, size);
+
+  // Copy data using umpire::copy
+  umpire::copy(source_ptr, dest_ptr, num_elements);
+
+  // Verify the copy was successful
+  for (int i = 0; i < num_elements; ++i) {
+    ASSERT_EQ(source_ptr[i], dest_ptr[i]) << "Data mismatch at index " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(source_ptr);
+  allocator.deallocate(dest_ptr);
+}
+
+TEST(Copy, TypedHostToHost)
+{
+  constexpr std::size_t num_elements = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate source and destination buffers
+  float* source_ptr = static_cast<float*>(allocator.allocate(num_elements * sizeof(float)));
+  float* dest_ptr = static_cast<float*>(allocator.allocate(num_elements * sizeof(float)));
+
+  // Fill source with test data (index as float)
+  for (std::size_t i = 0; i < num_elements; ++i) {
+    source_ptr[i] = static_cast<float>(i) + 0.5f;
+  }
+
+  // Set destination to zero
+  std::memset(dest_ptr, 0, num_elements * sizeof(float));
+
+  // Copy data using umpire::copy
+  umpire::copy(source_ptr, dest_ptr, num_elements);
+
+  // Verify the copy was successful
+  for (std::size_t i = 0; i < num_elements; ++i) {
+    ASSERT_FLOAT_EQ(source_ptr[i], dest_ptr[i]) << "Data mismatch at index " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(source_ptr);
+  allocator.deallocate(dest_ptr);
+}
+
+TEST(Copy, PartialCopy)
+{
+  constexpr std::size_t size = 1024;
+  constexpr std::size_t partial_size = 512;
+  constexpr int num_elements = size / sizeof(int);
+  constexpr int partial_elements = partial_size / sizeof(int);
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate source and destination buffers
+  int* source_ptr = static_cast<int*>(allocator.allocate(size));
+  int* dest_ptr = static_cast<int*>(allocator.allocate(size));
+
+  // Fill source and destination with known patterns
+  for (int i = 0; i < num_elements; ++i) {
+    source_ptr[i] = i + 100; // Source pattern
+    dest_ptr[i] = i;         // Destination pattern
+  }
+
+  // Copy partial data using umpire::copy
+  umpire::copy(source_ptr, dest_ptr, partial_elements);
+
+  // Verify partial copy was successful
+  for (int i = 0; i < partial_elements; ++i) {
+    ASSERT_EQ(source_ptr[i], dest_ptr[i]) << "Data mismatch at index " << i;
+  }
+
+  // Verify the rest of destination is unchanged
+  for (int i = partial_elements; i < num_elements; ++i) {
+    ASSERT_EQ(dest_ptr[i], i) << "Data unexpectedly changed at index " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(source_ptr);
+  allocator.deallocate(dest_ptr);
+}
+
+// Test for zero-sized copy
+TEST(Copy, ZeroSize)
+{
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate source and destination buffers
+  void* source_ptr = allocator.allocate(1024);
+  void* dest_ptr = allocator.allocate(1024);
+
+  // Fill with known values
+  std::memset(source_ptr, 0xAA, 1024);
+  std::memset(dest_ptr, 0xBB, 1024);
+
+  // Copy with zero size - should be a no-op
+  umpire::copy(source_ptr, dest_ptr, 0);
+
+  // Check first byte to make sure it wasn't changed
+  ASSERT_EQ(*static_cast<unsigned char*>(dest_ptr), 0xBB);
+
+  // Cleanup
+  allocator.deallocate(source_ptr);
+  allocator.deallocate(dest_ptr);
+}
+
+// Test to validate that typed copy uses element counts, not bytes
+TEST(Copy, TypedCopySemantics)
+{
+  constexpr std::size_t num_elements = 100;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate buffers for int*
+  int* source = static_cast<int*>(allocator.allocate(num_elements * sizeof(int)));
+  int* dest = static_cast<int*>(allocator.allocate(num_elements * sizeof(int)));
+
+  // Fill source with known pattern
+  for (std::size_t i = 0; i < num_elements; ++i) {
+    source[i] = static_cast<int>(i * 7 + 13);
+  }
+
+  // Zero out destination
+  std::memset(dest, 0, num_elements * sizeof(int));
+
+  // Copy using element count (not bytes)
+  umpire::copy(source, dest, num_elements);
+
+  // Verify exactly num_elements were copied
+  for (std::size_t i = 0; i < num_elements; ++i) {
+    ASSERT_EQ(dest[i], static_cast<int>(i * 7 + 13)) << "Element " << i << " not copied correctly";
+  }
+
+  // Cleanup
+  allocator.deallocate(source);
+  allocator.deallocate(dest);
+}
+
+// Test to validate void* copy uses bytes, not elements
+TEST(Copy, VoidVsTypedSemantics)
+{
+  constexpr std::size_t byte_count = 100;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate as void*
+  void* void_src = allocator.allocate(byte_count);
+  void* void_dst = allocator.allocate(byte_count);
+
+  // Fill with byte pattern
+  std::memset(void_src, 0xAB, byte_count);
+  std::memset(void_dst, 0xCD, byte_count);
+
+  // Copy exact byte count
+  umpire::copy(void_src, void_dst, byte_count);
+
+  // Verify byte-for-byte copy
+  unsigned char* src_bytes = static_cast<unsigned char*>(void_src);
+  unsigned char* dst_bytes = static_cast<unsigned char*>(void_dst);
+  for (std::size_t i = 0; i < byte_count; ++i) {
+    ASSERT_EQ(dst_bytes[i], src_bytes[i]) << "Byte " << i << " not copied correctly";
+  }
+
+  // Cleanup
+  allocator.deallocate(void_src);
+  allocator.deallocate(void_dst);
+}
+
+// Additional tests for async copy operations could be added here
diff --git a/tests/op/memset_test.cpp b/tests/op/memset_test.cpp
new file mode 100644
index 000000000..cf9964806
--- /dev/null
+++ b/tests/op/memset_test.cpp
@@ -0,0 +1,284 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
+// project contributors. See the COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (MIT)
+//////////////////////////////////////////////////////////////////////////////
+#include <algorithm>
+#include <cstring>
+#include <random>
+
+#include "gtest/gtest.h"
+#include "umpire/Allocator.hpp"
+#include "umpire/ResourceManager.hpp"
+#include "umpire/op.hpp"
+
+TEST(Memset, HostMemsetBasic)
+{
+  constexpr std::size_t size = 1024;
+  constexpr int value = 0x42;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate buffer
+  void* ptr = allocator.allocate(size);
+
+  // Fill with different pattern first
+  std::memset(ptr, 0xAA, size);
+
+  // Use umpire::memset to set to value
+  umpire::memset(ptr, value, size);
+
+  // Verify the memset was successful
+  unsigned char* byte_ptr = static_cast<unsigned char*>(ptr);
+  for (std::size_t i = 0; i < size; ++i) {
+    ASSERT_EQ(byte_ptr[i], value) << "Memset failed at byte " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Memset, TypedMemsetInt)
+{
+  constexpr std::size_t num_elements = 256;
+  constexpr int value = 0;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate buffer for integers
+  int* ptr = static_cast<int*>(allocator.allocate(num_elements * sizeof(int)));
+
+  // Fill with pattern first
+  for (std::size_t i = 0; i < num_elements; ++i) {
+    ptr[i] = static_cast<int>(i);
+  }
+
+  // Use umpire::memset to zero out (pass element count for typed pointer)
+  umpire::memset(ptr, value, num_elements);
+
+  // Verify each int is zero
+  for (std::size_t i = 0; i < num_elements; ++i) {
+    ASSERT_EQ(ptr[i], 0) << "Memset failed at element " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Memset, PartialMemset)
+{
+  constexpr std::size_t total_size = 1024;
+  constexpr std::size_t partial_size = 512;
+  constexpr int set_value = 0x55;
+  constexpr int initial_value = 0xAA;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate buffer
+  unsigned char* ptr = static_cast<unsigned char*>(allocator.allocate(total_size));
+
+  // Fill entire buffer with initial pattern
+  std::memset(ptr, initial_value, total_size);
+
+  // Use umpire::memset on first half only
+  umpire::memset(ptr, set_value, partial_size);
+
+  // Verify first half is set to new value
+  for (std::size_t i = 0; i < partial_size; ++i) {
+    ASSERT_EQ(ptr[i], set_value) << "Memset failed at byte " << i;
+  }
+
+  // Verify second half is unchanged
+  for (std::size_t i = partial_size; i < total_size; ++i) {
+    ASSERT_EQ(ptr[i], initial_value) << "Unchanged region modified at byte " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Memset, ZeroSize)
+{
+  constexpr std::size_t size = 1024;
+  constexpr int initial_value = 0xCC;
+  constexpr int memset_value = 0x33;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate buffer
+  unsigned char* ptr = static_cast<unsigned char*>(allocator.allocate(size));
+
+  // Fill with initial pattern
+  std::memset(ptr, initial_value, size);
+
+  // Use umpire::memset with zero size - should be no-op
+  umpire::memset(ptr, memset_value, 0);
+
+  // Verify buffer is unchanged
+  for (std::size_t i = 0; i < size; ++i) {
+    ASSERT_EQ(ptr[i], initial_value) << "Zero-size memset modified byte " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Memset, DifferentValues)
+{
+  constexpr std::size_t size = 256;
+  constexpr unsigned char test_values[] = {0x00, 0x01, 0x7F, 0x80, 0xFE, 0xFF};
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate buffer
+  unsigned char* ptr = static_cast<unsigned char*>(allocator.allocate(size));
+
+  // Test each value
+  for (unsigned char test_value : test_values) {
+    // Use umpire::memset with test value
+    umpire::memset(ptr, test_value, size);
+
+    // Verify all bytes are set to test value
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(ptr[i], test_value) << "Memset failed for value 0x" << std::hex << static_cast<int>(test_value)
+                                    << " at byte " << std::dec << i;
+    }
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+#if defined(UMPIRE_ENABLE_CUDA)
+TEST(Memset, CudaMemset)
+{
+  constexpr std::size_t size = 1024;
+  constexpr int value = 0x42;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto cuda_allocator = rm.getAllocator("DEVICE");
+  auto host_allocator = rm.getAllocator("HOST");
+
+  // Allocate device buffer
+  unsigned char* device_ptr = static_cast<unsigned char*>(cuda_allocator.allocate(size));
+
+  // Allocate host buffer for verification
+  unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+  // Use umpire::memset on device memory
+  umpire::memset(device_ptr, value, size);
+
+  // Copy back to host for verification
+  umpire::copy(device_ptr, host_ptr, size);
+
+  // Verify the memset was successful
+  for (std::size_t i = 0; i < size; ++i) {
+    ASSERT_EQ(host_ptr[i], value) << "CUDA memset failed at byte " << i;
+  }
+
+  // Cleanup
+  cuda_allocator.deallocate(device_ptr);
+  host_allocator.deallocate(host_ptr);
+}
+
+TEST(Memset, ExplicitCudaMemset)
+{
+  constexpr std::size_t size = 1024;
+  constexpr int value = 0x55;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto cuda_allocator = rm.getAllocator("DEVICE");
+  auto host_allocator = rm.getAllocator("HOST");
+
+  // Allocate device buffer
+  unsigned char* device_ptr = static_cast<unsigned char*>(cuda_allocator.allocate(size));
+
+  // Allocate host buffer for verification
+  unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+  // Use explicit platform memset
+  umpire::memset<umpire::cuda>(device_ptr, value, size);
+
+  // Copy back to host for verification
+  umpire::copy(device_ptr, host_ptr, size);
+
+  // Verify the memset was successful
+  for (std::size_t i = 0; i < size; ++i) {
+    ASSERT_EQ(host_ptr[i], value) << "Explicit CUDA memset failed at byte " << i;
+  }
+
+  // Cleanup
+  cuda_allocator.deallocate(device_ptr);
+  host_allocator.deallocate(host_ptr);
+}
+#endif // UMPIRE_ENABLE_CUDA
+
+#if defined(UMPIRE_ENABLE_HIP)
+TEST(Memset, HipMemset)
+{
+  constexpr std::size_t size = 1024;
+  constexpr int value = 0x77;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto hip_allocator = rm.getAllocator("DEVICE");
+  auto host_allocator = rm.getAllocator("HOST");
+
+  // Allocate device buffer
+  unsigned char* device_ptr = static_cast<unsigned char*>(hip_allocator.allocate(size));
+
+  // Allocate host buffer for verification
+  unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+  // Use umpire::memset on device memory
+  umpire::memset(device_ptr, value, size);
+
+  // Copy back to host for verification
+  umpire::copy(device_ptr, host_ptr, size);
+
+  // Verify the memset was successful
+  for (std::size_t i = 0; i < size; ++i) {
+    ASSERT_EQ(host_ptr[i], value) << "HIP memset failed at byte " << i;
+  }
+
+  // Cleanup
+  hip_allocator.deallocate(device_ptr);
+  host_allocator.deallocate(host_ptr);
+}
+
+TEST(Memset, ExplicitHipMemset)
+{
+  constexpr std::size_t size = 1024;
+  constexpr int value = 0x88;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto hip_allocator = rm.getAllocator("DEVICE");
+  auto host_allocator = rm.getAllocator("HOST");
+
+  // Allocate device buffer
+  unsigned char* device_ptr = static_cast<unsigned char*>(hip_allocator.allocate(size));
+
+  // Allocate host buffer for verification
+  unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+  // Use explicit platform memset
+  umpire::memset<umpire::hip>(device_ptr, value, size);
+
+  // Copy back to host for verification
+  umpire::copy(device_ptr, host_ptr, size);
+
+  // Verify the memset was successful
+  for (std::size_t i = 0; i < size; ++i) {
+    ASSERT_EQ(host_ptr[i], value) << "Explicit HIP memset failed at byte " << i;
+  }
+
+  // Cleanup
+  hip_allocator.deallocate(device_ptr);
+  host_allocator.deallocate(host_ptr);
+}
+#endif // UMPIRE_ENABLE_HIP
\ No newline at end of file
diff --git a/tests/op/prefetch_test.cpp b/tests/op/prefetch_test.cpp
new file mode 100644
index 000000000..47a2e072d
--- /dev/null
+++ b/tests/op/prefetch_test.cpp
@@ -0,0 +1,326 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
+// project contributors. See the COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (MIT)
+//////////////////////////////////////////////////////////////////////////////
+#include <algorithm>
+#include <cstring>
+#include <random>
+
+#include "gtest/gtest.h"
+#include "umpire/Allocator.hpp"
+#include "umpire/ResourceManager.hpp"
+#include "umpire/op.hpp"
+
+// Host prefetch tests - these are mostly no-ops but ensure API compatibility
+TEST(Prefetch, HostPrefetchNoOp)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate host buffer
+  void* ptr = allocator.allocate(size);
+
+  // Fill with test data
+  std::memset(ptr, 0x42, size);
+
+  // Prefetch should be a no-op for host memory, but should not crash
+  EXPECT_NO_THROW(umpire::prefetch(ptr, 0, size));
+
+  // Data should be unchanged
+  unsigned char* byte_ptr = static_cast<unsigned char*>(ptr);
+  for (std::size_t i = 0; i < size; ++i) {
+    ASSERT_EQ(byte_ptr[i], 0x42) << "Host prefetch modified data at byte " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Prefetch, HostPrefetchExplicit)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate host buffer
+  void* ptr = allocator.allocate(size);
+
+  // Fill with test data
+  std::memset(ptr, 0x55, size);
+
+  // Explicit host prefetch - should be a no-op
+  EXPECT_NO_THROW(umpire::prefetch<umpire::host>(ptr, 0, size));
+
+  // Data should be unchanged
+  unsigned char* byte_ptr = static_cast<unsigned char*>(ptr);
+  for (std::size_t i = 0; i < size; ++i) {
+    ASSERT_EQ(byte_ptr[i], 0x55) << "Explicit host prefetch modified data at byte " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Prefetch, ZeroSizePrefetch)
+{
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate host buffer
+  void* ptr = allocator.allocate(1024);
+
+  // Zero-size prefetch should be safe
+  EXPECT_NO_THROW(umpire::prefetch(ptr, 0, 0));
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+#if defined(UMPIRE_ENABLE_CUDA)
+TEST(Prefetch, CudaPrefetchToHost)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    unsigned char* um_ptr = static_cast<unsigned char*>(um_allocator.allocate(size));
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data (on host)
+    std::memset(um_ptr, 0x77, size);
+
+    // Prefetch to host (CPU device ID is typically -1 or cudaCpuDeviceId)
+    EXPECT_NO_THROW(umpire::prefetch(um_ptr, -1, size));
+
+    // Copy to regular host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after prefetch
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0x77) << "CUDA prefetch corrupted data at byte " << i;
+    }
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+
+TEST(Prefetch, CudaPrefetchToDevice)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    unsigned char* um_ptr = static_cast<unsigned char*>(um_allocator.allocate(size));
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0x88, size);
+
+    // Prefetch to device 0
+    EXPECT_NO_THROW(umpire::prefetch(um_ptr, 0, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after prefetch
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0x88) << "CUDA device prefetch corrupted data at byte " << i;
+    }
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+
+TEST(Prefetch, ExplicitCudaPrefetch)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    unsigned char* um_ptr = static_cast<unsigned char*>(um_allocator.allocate(size));
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0x99, size);
+
+    // Explicit CUDA prefetch to host
+    EXPECT_NO_THROW(umpire::prefetch<umpire::cuda>(um_ptr, -1, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after prefetch
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0x99) << "Explicit CUDA prefetch corrupted data at byte " << i;
+    }
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+#endif // UMPIRE_ENABLE_CUDA
+
+#if defined(UMPIRE_ENABLE_HIP)
+TEST(Prefetch, HipPrefetchToHost)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    unsigned char* um_ptr = static_cast<unsigned char*>(um_allocator.allocate(size));
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data (on host)
+    std::memset(um_ptr, 0xAA, size);
+
+    // Prefetch to host (CPU device ID is typically -1 or hipCpuDeviceId)
+    EXPECT_NO_THROW(umpire::prefetch(um_ptr, -1, size));
+
+    // Copy to regular host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after prefetch
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0xAA) << "HIP prefetch corrupted data at byte " << i;
+    }
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+
+TEST(Prefetch, HipPrefetchToDevice)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    unsigned char* um_ptr = static_cast<unsigned char*>(um_allocator.allocate(size));
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0xBB, size);
+
+    // Prefetch to device 0
+    EXPECT_NO_THROW(umpire::prefetch(um_ptr, 0, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after prefetch
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0xBB) << "HIP device prefetch corrupted data at byte " << i;
+    }
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+
+TEST(Prefetch, ExplicitHipPrefetch)
+{
+  constexpr std::size_t size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+
+  // Check if we have managed memory support
+  try {
+    auto um_allocator = rm.getAllocator("UM");
+    auto host_allocator = rm.getAllocator("HOST");
+
+    // Allocate managed memory
+    unsigned char* um_ptr = static_cast<unsigned char*>(um_allocator.allocate(size));
+
+    // Allocate host buffer for verification
+    unsigned char* host_ptr = static_cast<unsigned char*>(host_allocator.allocate(size));
+
+    // Fill managed memory with test data
+    std::memset(um_ptr, 0xCC, size);
+
+    // Explicit HIP prefetch to host
+    EXPECT_NO_THROW(umpire::prefetch<umpire::hip>(um_ptr, -1, size));
+
+    // Copy to host memory to verify data integrity
+    std::memcpy(host_ptr, um_ptr, size);
+
+    // Verify data is intact after prefetch
+    for (std::size_t i = 0; i < size; ++i) {
+      ASSERT_EQ(host_ptr[i], 0xCC) << "Explicit HIP prefetch corrupted data at byte " << i;
+    }
+
+    // Cleanup
+    um_allocator.deallocate(um_ptr);
+    host_allocator.deallocate(host_ptr);
+  } catch (const std::runtime_error& e) {
+    // If UM allocator is not available, skip this test
+    GTEST_SKIP() << "Unified Memory not available: " << e.what();
+  }
+}
+#endif // UMPIRE_ENABLE_HIP
\ No newline at end of file
diff --git a/tests/op/reallocate_test.cpp b/tests/op/reallocate_test.cpp
new file mode 100644
index 000000000..0095538ac
--- /dev/null
+++ b/tests/op/reallocate_test.cpp
@@ -0,0 +1,343 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2016-25, Lawrence Livermore National Security, LLC and Umpire
+// project contributors. See the COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (MIT)
+//////////////////////////////////////////////////////////////////////////////
+#include <algorithm>
+#include <cstring>
+#include <random>
+
+#include "gtest/gtest.h"
+#include "umpire/Allocator.hpp"
+#include "umpire/ResourceManager.hpp"
+#include "umpire/op.hpp"
+
+TEST(Reallocate, HostReallocateGrow)
+{
+  constexpr std::size_t initial_size = 512;
+  constexpr std::size_t final_size = 1024;
+  constexpr int num_initial_elements = initial_size / sizeof(int);
+  constexpr int num_final_elements = final_size / sizeof(int);
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate initial buffer
+  int* ptr = static_cast<int*>(allocator.allocate(initial_size));
+
+  // Fill with test data
+  for (int i = 0; i < num_initial_elements; ++i) {
+    ptr[i] = i + 100;
+  }
+
+  // Reallocate to larger size
+  ptr = umpire::reallocate(&ptr, num_final_elements);
+
+  // Verify original data is preserved
+  for (int i = 0; i < num_initial_elements; ++i) {
+    ASSERT_EQ(ptr[i], i + 100) << "Original data lost at index " << i;
+  }
+
+  // Fill new portion
+  for (int i = num_initial_elements; i < num_final_elements; ++i) {
+    ptr[i] = i + 200;
+  }
+
+  // Verify new data can be written
+  for (int i = num_initial_elements; i < num_final_elements; ++i) {
+    ASSERT_EQ(ptr[i], i + 200) << "New data write failed at index " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Reallocate, HostReallocateShrink)
+{
+  constexpr std::size_t initial_size = 1024;
+  constexpr std::size_t final_size = 512;
+  constexpr int num_initial_elements = initial_size / sizeof(int);
+  constexpr int num_final_elements = final_size / sizeof(int);
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate initial buffer
+  int* ptr = static_cast<int*>(allocator.allocate(initial_size));
+
+  // Fill with test data
+  for (int i = 0; i < num_initial_elements; ++i) {
+    ptr[i] = i * 2;
+  }
+
+  // Reallocate to smaller size
+  ptr = umpire::reallocate(&ptr, num_final_elements);
+
+  // Verify preserved data is correct
+  for (int i = 0; i < num_final_elements; ++i) {
+    ASSERT_EQ(ptr[i], i * 2) << "Data corruption at index " << i << " after shrinking";
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Reallocate, HostReallocateSameSize)
+{
+  constexpr std::size_t size = 1024;
+  constexpr int num_elements = size / sizeof(int);
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate buffer
+  int* ptr = static_cast<int*>(allocator.allocate(size));
+
+  // Fill with test data
+  for (int i = 0; i < num_elements; ++i) {
+    ptr[i] = i * 3 + 42;
+  }
+
+  // Reallocate to same size
+  ptr = umpire::reallocate(&ptr, num_elements);
+
+  // Verify all data is preserved
+  for (int i = 0; i < num_elements; ++i) {
+    ASSERT_EQ(ptr[i], i * 3 + 42) << "Data corruption at index " << i << " for same-size realloc";
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Reallocate, HostReallocateToZero)
+{
+  constexpr std::size_t initial_size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate initial buffer
+  int* ptr = static_cast<int*>(allocator.allocate(initial_size));
+
+  // Fill with test data
+  *ptr = 12345;
+
+  // Reallocate to zero size
+  ptr = static_cast<int*>(umpire::reallocate(&ptr, 0));
+
+  // Should get a valid pointer to zero-sized allocation
+  ASSERT_NE(ptr, nullptr);
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Reallocate, HostReallocateFromNull)
+{
+  constexpr std::size_t size = 1024;
+  constexpr int num_elements = size / sizeof(int);
+
+  // Start with null pointer
+  int* ptr = nullptr;
+
+  // Reallocate from null - should work like malloc
+  ptr = umpire::reallocate(&ptr, num_elements);
+  ASSERT_NE(ptr, nullptr);
+
+  // Should be able to write to the allocated memory
+  for (int i = 0; i < static_cast<int>(size / sizeof(int)); ++i) {
+    ptr[i] = i + 500;
+  }
+
+  // Verify data
+  for (int i = 0; i < static_cast<int>(size / sizeof(int)); ++i) {
+    ASSERT_EQ(ptr[i], i + 500) << "Data write failed at index " << i;
+  }
+
+  // Cleanup
+  auto& rm = umpire::ResourceManager::getInstance();
+  rm.deallocate(ptr);
+}
+
+TEST(Reallocate, TypedReallocate)
+{
+  constexpr std::size_t initial_elements = 128;
+  constexpr std::size_t final_elements = 256;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate initial buffer for doubles
+  double* ptr = static_cast<double*>(allocator.allocate(initial_elements * sizeof(double)));
+
+  // Fill with test data
+  for (std::size_t i = 0; i < initial_elements; ++i) {
+    ptr[i] = static_cast<double>(i) + 0.5;
+  }
+
+  // Reallocate to larger size
+  ptr = umpire::reallocate(&ptr, final_elements);
+
+  // Verify original data is preserved
+  for (std::size_t i = 0; i < initial_elements; ++i) {
+    ASSERT_DOUBLE_EQ(ptr[i], static_cast<double>(i) + 0.5) << "Double data lost at index " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+TEST(Reallocate, VoidPointerReallocate)
+{
+  constexpr std::size_t initial_size = 512;
+  constexpr std::size_t final_size = 1024;
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate initial buffer as void*
+  void* ptr = allocator.allocate(initial_size);
+
+  // Fill with pattern
+  std::memset(ptr, 0xAB, initial_size);
+
+  // Reallocate using void* interface
+  ptr = umpire::reallocate(&ptr, final_size);
+
+  // Verify pattern is preserved in original portion
+  unsigned char* byte_ptr = static_cast<unsigned char*>(ptr);
+  for (std::size_t i = 0; i < initial_size; ++i) {
+    ASSERT_EQ(byte_ptr[i], 0xAB) << "Pattern lost at byte " << i;
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}
+
+#if defined(UMPIRE_ENABLE_CUDA)
+TEST(Reallocate, CudaReallocate)
+{
+  constexpr std::size_t initial_size = 512;
+  constexpr std::size_t final_size = 1024;
+  constexpr int num_initial_elements = initial_size / sizeof(int);
+  constexpr int num_final_elements = final_size / sizeof(int);
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto cuda_allocator = rm.getAllocator("DEVICE");
+  auto host_allocator = rm.getAllocator("HOST");
+
+  // Allocate device buffer
+  int* device_ptr = static_cast<int*>(cuda_allocator.allocate(initial_size));
+
+  // Allocate host buffer for data setup
+  int* host_ptr = static_cast<int*>(host_allocator.allocate(final_size));
+
+  // Fill host buffer with test data
+  for (int i = 0; i < num_initial_elements; ++i) {
+    host_ptr[i] = i + 300;
+  }
+
+  // Copy initial data to device
+  umpire::copy(host_ptr, device_ptr, num_initial_elements);
+
+  // Reallocate device memory
+  device_ptr = umpire::reallocate(&device_ptr, num_final_elements);
+
+  // Copy back to host for verification
+  umpire::copy(device_ptr, host_ptr, num_final_elements);
+
+  // Verify original data is preserved
+  for (int i = 0; i < num_initial_elements; ++i) {
+    ASSERT_EQ(host_ptr[i], i + 300) << "CUDA reallocate lost data at index " << i;
+  }
+
+  // Cleanup
+  cuda_allocator.deallocate(device_ptr);
+  host_allocator.deallocate(host_ptr);
+}
+#endif // UMPIRE_ENABLE_CUDA
+
+#if defined(UMPIRE_ENABLE_HIP)
+TEST(Reallocate, HipReallocate)
+{
+  constexpr std::size_t initial_size = 512;
+  constexpr std::size_t final_size = 1024;
+  constexpr int num_initial_elements = initial_size / sizeof(int);
+  constexpr int num_final_elements = final_size / sizeof(int);
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto hip_allocator = rm.getAllocator("DEVICE");
+  auto host_allocator = rm.getAllocator("HOST");
+
+  // Allocate device buffer
+  int* device_ptr = static_cast<int*>(hip_allocator.allocate(initial_size));
+
+  // Allocate host buffer for data setup
+  int* host_ptr = static_cast<int*>(host_allocator.allocate(final_size));
+
+  // Fill host buffer with test data
+  for (int i = 0; i < num_initial_elements; ++i) {
+    host_ptr[i] = i + 400;
+  }
+
+  // Copy initial data to device
+  umpire::copy(host_ptr, device_ptr, num_initial_elements);
+
+  // Reallocate device memory
+  device_ptr = umpire::reallocate(&device_ptr, num_final_elements);
+
+  // Copy back to host for verification
+  umpire::copy(device_ptr, host_ptr, num_final_elements);
+
+  // Verify original data is preserved
+  for (int i = 0; i < num_initial_elements; ++i) {
+    ASSERT_EQ(host_ptr[i], i + 400) << "HIP reallocate lost data at index " << i;
+  }
+
+  // Cleanup
+  hip_allocator.deallocate(device_ptr);
+  host_allocator.deallocate(host_ptr);
+}
+#endif // UMPIRE_ENABLE_HIP
+
+// Test typed reallocate with element count that's not a power of 2
+TEST(Reallocate, TypedReallocateOddElements)
+{
+  constexpr std::size_t initial_elements = 77;  // Not a power of 2
+  constexpr std::size_t final_elements = 133;   // Not a power of 2
+
+  auto& rm = umpire::ResourceManager::getInstance();
+  auto allocator = rm.getAllocator("HOST");
+
+  // Allocate buffer for long*
+  long* ptr = static_cast<long*>(allocator.allocate(initial_elements * sizeof(long)));
+
+  // Fill with recognizable pattern
+  for (std::size_t i = 0; i < initial_elements; ++i) {
+    ptr[i] = static_cast<long>(i * 1000 + 7);
+  }
+
+  // Reallocate using element count
+  ptr = umpire::reallocate(&ptr, final_elements);
+
+  // Verify all original data preserved
+  for (std::size_t i = 0; i < initial_elements; ++i) {
+    ASSERT_EQ(ptr[i], static_cast<long>(i * 1000 + 7))
+        << "Element " << i << " corrupted during reallocate";
+  }
+
+  // Verify we can write to new elements
+  for (std::size_t i = initial_elements; i < final_elements; ++i) {
+    ptr[i] = static_cast<long>(i * 2000);
+  }
+
+  // Verify writes succeeded
+  for (std::size_t i = initial_elements; i < final_elements; ++i) {
+    ASSERT_EQ(ptr[i], static_cast<long>(i * 2000)) << "New element " << i << " write failed";
+  }
+
+  // Cleanup
+  allocator.deallocate(ptr);
+}