QuantumKitHub · kshyatt · Feb 18, 2026 · Apr 27, 2026 · May 11, 2026 · May 12, 2026
diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl
@@ -6,6 +6,9 @@ const AdjointCuTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂,
 function CuTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A}
     return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t))
 end
+function TensorMap{T, S, N₁, N₂, DA}(t::TensorMap{T, S, N₁, N₂, HA}) where {T, S, N₁, N₂, DA <: CuArray{T}, HA <: Array{T}}
+    return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t))
+end
 
 # project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy
 function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: CuVector{T}}
@@ -101,18 +104,6 @@ function TensorKit.scalar(t::CuTensorMap{T, S, 0, 0}) where {T, S}
     return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)]
 end
 
-function Base.convert(
-        TT::Type{CuTensorMap{T, S, N₁, N₂}},
-        t::AbstractTensorMap{<:Any, S, N₁, N₂}
-    ) where {T, S, N₁, N₂}
-    if typeof(t) === TT
-        return t
-    else
-        tnew = TT(undef, space(t))
-        return copy!(tnew, t)
-    end
-end
-
 function LinearAlgebra.isposdef(t::CuTensorMap)
     domain(t) == codomain(t) ||
         throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same"))
@@ -138,10 +129,9 @@ function Base.promote_rule(
     return CuTensorMap{T, S, N₁, N₂}
 end
 
-TensorKit.promote_storage_rule(::Type{CuArray{T, N}}, ::Type{<:CuArray{T, N}}) where {T, N} =
+TensorKit.promote_storage_rule(::Type{<:CuArray{T, N}}, ::Type{<:CuArray{T, N}}) where {T, N} =
     CuArray{T, N, CUDA.default_memory}
 
-
 # CuTensorMap exponentation:
 function TensorKit.exp!(t::CuTensorMap)
     domain(t) == codomain(t) ||

diff --git a/ext/TensorKitCUDAExt/truncation.jl b/ext/TensorKitCUDAExt/truncation.jl
@@ -10,7 +10,7 @@ function MatrixAlgebraKit.findtruncated(
         fill!(v, dim(c))
     end
 
-    perm = sortperm(parent(values); strategy.by, strategy.rev)
+    perm = isempty(parent(values)) ? Int64[] : sortperm(parent(values); strategy.by, strategy.rev)
     cumulative_dim = cumsum(Base.permute!(parent(dims), perm))
 
     result = similar(values, Bool)
@@ -36,7 +36,7 @@ function MatrixAlgebraKit.findtruncated(
         end
     end
 
-    perm = sortperm(parent(values); by = abs, rev = false)
+    perm = isempty(parent(values)) ? Int64[] : sortperm(parent(values); by = abs, rev = false)
     cumulative_err = cumsum(Base.permute!(parent(ϵᵖ), perm))
 
     result = similar(values, Bool)

diff --git a/src/tensors/abstracttensor.jl b/src/tensors/abstracttensor.jl
@@ -53,9 +53,7 @@ storagetype(t) = storagetype(typeof(t))
 function storagetype(::Type{T}) where {T <: AbstractTensorMap}
     if T isa Union
         # attempt to be slightly more specific by promoting unions
-        Ma = storagetype(T.a)
-        Mb = storagetype(T.b)
-        return promote_storagetype(Ma, Mb)
+        return promote_storagetype(T.a, T.b)
     else
         # fallback definition by using scalartype
         return similarstoragetype(scalartype(T))
@@ -103,8 +101,15 @@ similarstoragetype(X::Type, ::Type{T}) where {T <: Number} =
 
 # implement on tensors
 similarstoragetype(::Type{TT}) where {TT <: AbstractTensorMap} = similarstoragetype(storagetype(TT))
-similarstoragetype(::Type{TT}, ::Type{T}) where {TT <: AbstractTensorMap, T <: Number} =
-    similarstoragetype(storagetype(TT), T)
+function similarstoragetype(::Type{TT}, ::Type{T}) where {TT <: AbstractTensorMap, T <: Number}
+    return similarstoragetype(storagetype(TT), T)
+end
+function similarstoragetype(::Type{<:AbstractTensorMap{T, S, N₁, N₂}}, ::Type{TA}) where {T <: Number, TA <: DenseVector, S, N₁, N₂}
+    return similarstoragetype(TA, T)
+end
+function similarstoragetype(t::AbstractTensorMap{T, S, N₁, N₂}, ::Type{TA}) where {T <: Number, TA <: DenseVector, S, N₁, N₂}
+    return similarstoragetype(typeof(t), TA)
+end
-function similarstoragetype(::Type{<:AbstractTensorMap{T, S, N₁, N₂}}, ::Type{TA}) where {T <: Number, TA <: DenseVector, S, N₁, N₂}
-    return similarstoragetype(TA, T)
-end
-function similarstoragetype(t::AbstractTensorMap{T, S, N₁, N₂}, ::Type{TA}) where {T <: Number, TA <: DenseVector, S, N₁, N₂}
-    return similarstoragetype(typeof(t), TA)
-end
-function similarstoragetype(::Type{<:AbstractTensorMap{T, S, N₁, N₂}}, ::Type{TA}) where {T <: Number, TA <: DenseVector, S, N₁, N₂}
-    return similarstoragetype(TA, T)
-end
-function similarstoragetype(t::AbstractTensorMap{T, S, N₁, N₂}, ::Type{TA}) where {T <: Number, TA <: DenseVector, S, N₁, N₂}
-    return similarstoragetype(typeof(t), TA)
-end
 
 # implement on arrays
 similarstoragetype(::Type{A}) where {A <: DenseVector{<:Number}} = A

diff --git a/src/tensors/adjoint.jl b/src/tensors/adjoint.jl
@@ -50,8 +50,6 @@ Base.@propagate_inbounds function subblock(t::AdjointTensorMap, (f₁, f₂)::Tu
     return permutedims(conj(data), (domainind(tp)..., codomainind(tp)...))
 end
 
-to_cpu(t::AdjointTensorMap) = adjoint(to_cpu(adjoint(t)))
-
 # Show
 #------
 function Base.showarg(io::IO, t::AdjointTensorMap, toplevel::Bool)

diff --git a/src/tensors/braidingtensor.jl b/src/tensors/braidingtensor.jl
@@ -190,7 +190,6 @@ has_shared_permute(t::BraidingTensor, ::Index2Tuple) = false
 function add_transform!(
         tdst::AbstractTensorMap,
         tsrc::BraidingTensor, (p₁, p₂)::Index2Tuple,
-        fusiontreetransform,
         α::Number, β::Number, backend::AbstractBackend...
     )
     return add_transform!(

diff --git a/src/tensors/diagonal.jl b/src/tensors/diagonal.jl
@@ -280,7 +280,7 @@ end
 # ----------------
 function TO.tensoradd_type(TC, A::DiagonalTensorMap, ::Index2Tuple{1, 1}, ::Bool)
     M = similarstoragetype(A, TC)
-    return DiagonalTensorMap{TC, spacetype(A), M}
+    return DiagonalTensorMap{scalartype(M), spacetype(A), M}
 end
 
 function TO.tensorcontract_type(

diff --git a/src/tensors/indexmanipulations.jl b/src/tensors/indexmanipulations.jl
@@ -17,6 +17,8 @@ for (operation, manipulation) in (
         $promote_op(::Type{T}, ::Type{I}) where {T <: Number, I <: Sector} =
             sectorscalartype(I) <: Integer ? T :
             sectorscalartype(I) <: Real ? float(T) : complex(T)
+        $promote_op(::Type{TA}, ::Type{I}) where {TA <: DenseVector, I <: Sector} =
+            similarstoragetype(TA, $promote_op(eltype(TA), I))
         # TODO: currently the manipulations all use sectorscalartype, change to:
         # $manipulation_scalartype(I) <: Integer ? T :
         # $manipulation_scalartype(I) <: Real ? float(T) : complex(T)

diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl
@@ -21,7 +21,6 @@ struct TensorMap{T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} <: Abstrac
         end
         return TensorMap{T, S, N₁, N₂, A}(data, space)
     end
-
     # constructors from data
     function TensorMap{T, S, N₁, N₂, A}(
             data::A, space::TensorMapSpace{S, N₁, N₂}
@@ -34,6 +33,9 @@ struct TensorMap{T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} <: Abstrac
         return new{T, S, N₁, N₂, A}(data, space)
     end
 end
+# constructors from another TensorMap -- no-op
+TensorMap{T, S, N₁, N₂, A}(t::TensorMap{T, S, N₁, N₂, A}) where {T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} = t
-TensorMap{T, S, N₁, N₂, A}(t::TensorMap{T, S, N₁, N₂, A}) where {T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} = t
-TensorMap{T, S, N₁, N₂, A}(t::TensorMap{T, S, N₁, N₂, A}) where {T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} = t
+TensorMap{T, S, N₁, N₂, A}(t::TensorMap{T, S, N₁, N₂}) where {T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} = TensorMap(A(t.data), space(t))
 
 """
     Tensor{T, S, N, A<:DenseVector{T}} = TensorMap{T, S, N, 0, A}
@@ -407,11 +409,6 @@ for randf in (:rand, :randn, :randexp, :randisometry)
     end
 end
 
-# Moving arbitrary TensorMaps to CPU
-#-----------------------------
-to_cpu(t::TensorMapWithStorage{T, Vector{T}}) where {T} = t # no op
-to_cpu(t::TensorMap) = convert(TensorMapWithStorage{scalartype(t), similarstoragetype(scalartype(t))}, t)
-
 # Efficient copy constructors
 #-----------------------------
 Base.copy(t::TensorMap) = typeof(t)(copy(t.data), t.space)

diff --git a/src/tensors/tensoroperations.jl b/src/tensors/tensoroperations.jl
@@ -379,7 +379,7 @@ function blas_contract!(
     bstyle = BraidingStyle(sectortype(C))
     bstyle isa SymmetricBraiding ||
         throw(SectorMismatch("only tensors with symmetric braiding rules can be contracted; try `@planar` instead"))
-    TC = scalartype(C)
+    TC = storagetype(C) # without this, Anew below has wrong storagetype
-    TC = storagetype(C) # without this, Anew below has wrong storagetype
+    TC = scalartype(C)
-    TC = storagetype(C) # without this, Anew below has wrong storagetype
+    TC = scalartype(C)
 
     # check which tensors have to be permuted/copied
     copyA = !(TO.isblascontractable(A, pA) && scalartype(A) === TC)

diff --git a/test/amd/tensors.jl b/test/amd/tensors.jl
@@ -97,7 +97,7 @@ for V in spacelist
             for T in (Int, Float32, ComplexF64)
                 t = @constinferred AMDGPU.rand(T, W)
                 d = convert(Dict, t)
-                @test TensorKit.to_cpu(t) == convert(TensorMap, d)
+                @test adapt(Array, t) == convert(TensorMap, d)
             end
         end
         symmetricbraiding && @timedtestset "Basic linear algebra" begin
@@ -189,10 +189,10 @@ for V in spacelist
                     t = AMDGPU.rand(T, W)
                     t2 = @constinferred AMDGPU.rand!(similar(t))
                     α = rand(T)
-                    @test norm(t, 2) ≈ norm(TensorKit.to_cpu(t), 2)
-                    @test dot(t2, t) ≈ dot(TensorKit.to_cpu(t2), TensorKit.to_cpu(t))
-                    @test TensorKit.to_cpu(α * t) ≈ α * TensorKit.to_cpu(t)
-                    @test TensorKit.to_cpu(t + t) ≈ 2 * TensorKit.to_cpu(t)
+                    @test norm(t, 2) ≈ norm(adapt(Array, t), 2)
+                    @test dot(t2, t) ≈ dot(adapt(Array, t2), adapt(Array, t))
+                    @test adapt(Array, α * t) ≈ α * adapt(Array, t)
+                    @test adapt(Array, t + t) ≈ 2 * adapt(Array, t)
                 end
             end
             @timedtestset "Real and imaginary parts" begin
@@ -202,17 +202,17 @@ for V in spacelist
 
                     tr = @constinferred real(t)
                     @test scalartype(tr) <: Real
-                    @test real(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tr)
+                    @test real(adapt(Array, t)) == adapt(Array, tr)
                     @test storagetype(tr) == ROCVector{real(T), AMDGPU.Mem.HIPBuffer}
 
                     ti = @constinferred imag(t)
                     @test scalartype(ti) <: Real
-                    @test imag(TensorKit.to_cpu(t)) == TensorKit.to_cpu(ti)
+                    @test imag(adapt(Array, t)) == adapt(Array, ti)
                     @test storagetype(ti) == ROCVector{real(T), AMDGPU.Mem.HIPBuffer}
 
                     tc = @inferred complex(t)
                     @test scalartype(tc) <: Complex
-                    @test complex(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tc)
+                    @test complex(adapt(Array, t)) == adapt(Array, tc)
                     @test storagetype(tc) == ROCVector{complex(T), AMDGPU.Mem.HIPBuffer}
 
                     tc2 = @inferred complex(tr, ti)
@@ -275,13 +275,13 @@ for V in spacelist
                     p1 = ntuple(n -> p[n], k)
                     p2 = ntuple(n -> p[k + n], 5 - k)
                     dt2 = AMDGPU.@allowscalar permute(t, (p1, p2))
-                    ht2 = permute(TensorKit.to_cpu(t), (p1, p2))
-                    @test ht2 == TensorKit.to_cpu(dt2)
+                    ht2 = permute(adapt(Array, t), (p1, p2))
+                    @test ht2 == adapt(Array, dt2)
                 end
 
                 dt3 = AMDGPU.@allowscalar repartition(t, k)
-                ht3 = repartition(TensorKit.to_cpu(t), k)
-                @test ht3 == TensorKit.to_cpu(dt3)
+                ht3 = repartition(adapt(Array, t), k)
+                @test ht3 == adapt(Array, dt3)
             end
         end
         symmetricbraiding && @timedtestset "Full trace: test self-consistency" begin
@@ -339,10 +339,10 @@ for V in spacelist
                 @tensor dHrA12[a, s1, s2, c] := drhoL[a, a'] * conj(dA1[a', t1, b]) *
                     dA2[b, t2, c'] * drhoR[c', c] *
                     dH[s1, s2, t1, t2]
-                @tensor hHrA12[a, s1, s2, c] := TensorKit.to_cpu(drhoL)[a, a'] * conj(TensorKit.to_cpu(dA1)[a', t1, b]) *
-                    TensorKit.to_cpu(dA2)[b, t2, c'] * TensorKit.to_cpu(drhoR)[c', c] *
-                    TensorKit.to_cpu(dH)[s1, s2, t1, t2]
-                @test TensorKit.to_cpu(dHrA12) ≈ hHrA12
+                @tensor hHrA12[a, s1, s2, c] := adapt(Array, drhoL)[a, a'] * conj(adapt(Array, dA1)[a', t1, b]) *
+                    adapt(Array, dA2)[b, t2, c'] * adapt(Array, drhoR)[c', c] *
+                    adapt(Array, dH)[s1, s2, t1, t2]
+                @test adapt(Array, dHrA12) ≈ hHrA12
             end
         end=# # doesn't yet work because of AdjointTensor
         BraidingStyle(I) isa HasBraiding && @timedtestset "Index flipping: test flipping inverse" begin
@@ -422,31 +422,31 @@ for V in spacelist
                 t1 = AMDGPU.rand(T, W1, W1)
                 t2 = AMDGPU.rand(T, W2, W2)
                 t = AMDGPU.rand(T, W1, W2)
-                ht1 = TensorKit.to_cpu(t1)
-                ht2 = TensorKit.to_cpu(t2)
-                ht = TensorKit.to_cpu(t)
-                @test TensorKit.to_cpu(t1 * t) ≈ ht1 * ht
-                @test TensorKit.to_cpu(t1' * t) ≈ ht1' * ht
-                @test TensorKit.to_cpu(t2 * t') ≈ ht2 * ht'
-                @test TensorKit.to_cpu(t2' * t') ≈ ht2' * ht'
+                ht1 = adapt(Array, t1)
+                ht2 = adapt(Array, t2)
+                ht = adapt(Array, t)
+                @test adapt(Array, t1 * t) ≈ ht1 * ht
+                @test adapt(Array, t1' * t) ≈ ht1' * ht
+                @test adapt(Array, t2 * t') ≈ ht2 * ht'
+                @test adapt(Array, t2' * t') ≈ ht2' * ht'
 
                 #=AMDGPU.@allowscalar begin
-                    @test TensorKit.to_cpu(inv(t1)) ≈ inv(ht1)
-                    @test TensorKit.to_cpu(pinv(t)) ≈ pinv(ht)
+                    @test adapt(Array, inv(t1)) ≈ inv(ht1)
+                    @test adapt(Array, pinv(t)) ≈ pinv(ht)
 
                     if T == Float32 || T == ComplexF32
                         continue
                     end
 
-                    @test TensorKit.to_cpu(t1 \ t) ≈ ht1 \ ht
-                    @test TensorKit.to_cpu(t1' \ t) ≈ ht1' \ ht
-                    @test TensorKit.to_cpu(t2 \ t') ≈ ht2 \ ht'
-                    @test TensorKit.to_cpu(t2' \ t') ≈ ht2' \ ht'
+                    @test adapt(Array, t1 \ t) ≈ ht1 \ ht
+                    @test adapt(Array, t1' \ t) ≈ ht1' \ ht
+                    @test adapt(Array, t2 \ t') ≈ ht2 \ ht'
+                    @test adapt(Array, t2' \ t') ≈ ht2' \ ht'
 
-                    @test TensorKit.to_cpu(t2 / t) ≈ ht2 / ht
-                    @test TensorKit.to_cpu(t2' / t) ≈ ht2' / ht
-                    @test TensorKit.to_cpu(t1 / t') ≈ ht1 / ht'
-                    @test TensorKit.to_cpu(t1' / t') ≈ ht1' / ht'
+                    @test adapt(Array, t2 / t) ≈ ht2 / ht
+                    @test adapt(Array, t2' / t) ≈ ht2' / ht
+                    @test adapt(Array, t1 / t') ≈ ht1 / ht'
+                    @test adapt(Array, t1' / t') ≈ ht1' / ht'
                 end=#
             end
         end
@@ -456,11 +456,11 @@ for V in spacelist
                 #=t = project_hermitian!(AMDGPU.randn(T, W, W))
                 s = dim(W)
                 @test (@constinferred sqrt(t))^2 ≈ t
-                @test TensorKit.to_cpu(sqrt(t)) ≈ sqrt(TensorKit.to_cpu(t))
+                @test adapt(Array, sqrt(t)) ≈ sqrt(adapt(Array, t))
                 expt = @constinferred exp(t)
-                @test TensorKit.to_cpu(expt) ≈ exp(TensorKit.to_cpu(t))
+                @test adapt(Array, expt) ≈ exp(adapt(Array, t))
                 @test exp(@constinferred log(project_hermitian!(expt))) ≈ expt
-                @test TensorKit.to_cpu(log(project_hermitian!(expt))) ≈ log(TensorKit.to_cpu(expt))
+                @test adapt(Array, log(project_hermitian!(expt))) ≈ log(adapt(Array, expt))
 
                 @test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈
                       id(storagetype(t), W)
-Original file line number
+Diff line change
@@ Expand Up @@
         return permutedims(conj(data), (domainind(tp)..., codomainind(tp)...))
     end
-    to_cpu(t::AdjointTensorMap) = adjoint(to_cpu(adjoint(t)))
     # Show
     #------
     function Base.showarg(io::IO, t::AdjointTensorMap, toplevel::Bool)
@@ Expand Down @@