fortran-lang · JAi-SATHVIK · Jan 6, 2026 · Jan 6, 2026 · Jan 6, 2026 · Jan 6, 2026
diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt
@@ -1,5 +1,9 @@
 set(stats_cppFiles
     ../stdlib_linalg_constants.fypp
+    ../stdlib_sorting.fypp
+    ../stdlib_sorting_ord_sort.fypp
+    ../stdlib_sorting_sort_adjoint.fypp
+    ../stdlib_sorting_sort.fypp
     )
 
 set(stats_fppFiles
@@ -10,12 +14,15 @@ set(stats_fppFiles
     ../stdlib_error.fypp
     ../stdlib_linalg.fypp
     ../stdlib_linalg_diag.fypp
+    ../stdlib_linalg_eigenvalues.fypp
     ../stdlib_linalg_state.fypp
+    ../stdlib_linalg_svd.fypp
     ../stdlib_random.fypp
     ../stdlib_selection.fypp
     ../stdlib_string_type.fypp
     stdlib_stats_corr.fypp
     stdlib_stats_cov.fypp
+    stdlib_stats_pca.fypp
     stdlib_stats_distribution_exponential.fypp
     stdlib_stats_distribution_normal.fypp
     stdlib_stats_distribution_uniform.fypp
@@ -29,4 +36,9 @@ set(stats_fppFiles
     stdlib_stats_var.fypp
 )
 
+set(f90Files
+    ../stdlib_sorting_radix_sort.f90
+    )
+
 configure_stdlib_target(stats "" stats_fppFiles stats_cppFiles)
+target_link_libraries(stats PUBLIC blas lapack $<$<NOT:$<BOOL:${STDLIB_NO_BITSET}>>:bitsets>)
diff --git a/src/stats/stdlib_stats.fypp b/src/stats/stdlib_stats.fypp
@@ -9,10 +9,12 @@ module stdlib_stats
   !! ([Specification](../page/specs/stdlib_stats.html))
   use stdlib_kinds, only: sp, dp, xdp, qp, &
       int8, int16, int32, int64
+  use stdlib_linalg_state, only: linalg_state_type
   implicit none
   private
   ! Public API
   public :: corr, cov, mean, median, moment, var
+  public :: pca, pca_transform, pca_inverse_transform
 
 
   interface corr
@@ -639,4 +641,62 @@ module stdlib_stats
     #:endfor
   end interface moment
 
+
+  #! Note: PCA is limited to single (sp) and double (dp) precision because external
+  #! optimized BLAS/LAPACK libraries (OpenBLAS, MKL) only support these precisions.
+  #! Extended (xdp) and quadruple (qp) precision are not supported for PCA.
+  #:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")]
+
+  interface pca
+    !! version: experimental
+    !!
+    !! Principal Component Analysis (PCA)
+    !! ([Specification](../page/specs/stdlib_stats.html#pca))
+    #:for k1, t1 in PCA_KINDS_TYPES
+      module subroutine pca_${k1}$(x, components, singular_values, x_mean, &
+                                  method, overwrite_x, err)
+        ${t1}$, intent(inout), target :: x(:,:)
+        ${t1}$, intent(out) :: components(:,:)
+        real(${k1}$), intent(out) :: singular_values(:)
+        ${t1}$, intent(out), optional :: x_mean(:)
+        character(*), intent(in), optional :: method
+        logical, intent(in), optional :: overwrite_x
+        type(linalg_state_type), intent(out), optional :: err
+      end subroutine pca_${k1}$
+    #:endfor
+  end interface pca
+
+
+  interface pca_transform
+    !! version: experimental
+    !!
+    !! Projects data into the reduced dimensional space
+    !! ([Specification](../page/specs/stdlib_stats.html#pca_transform))
+    #:for k1, t1 in PCA_KINDS_TYPES
+      module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed)
+        ${t1}$, intent(in) :: x(:,:)
+        ${t1}$, intent(in) :: components(:,:)
+        ${t1}$, intent(in), optional :: x_mean(:)
+        ${t1}$, intent(out) :: x_transformed(:,:)
+      end subroutine pca_transform_${k1}$
+    #:endfor
+  end interface pca_transform
+
+
+  interface pca_inverse_transform
+    !! version: experimental
+    !!
+    !! Reconstructs original data from the reduced space
+    !! ([Specification](../page/specs/stdlib_stats.html#pca_inverse_transform))
+    #:for k1, t1 in PCA_KINDS_TYPES
+      module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed)
+        ${t1}$, intent(in) :: x_reduced(:,:)
+        ${t1}$, intent(in) :: components(:,:)
+        ${t1}$, intent(in), optional :: x_mean(:)
+        ${t1}$, intent(out) :: x_reconstructed(:,:)
+      end subroutine pca_inverse_transform_${k1}$
+    #:endfor
+  end interface pca_inverse_transform
+
+
 end module stdlib_stats
diff --git a/src/stats/stdlib_stats_pca.fypp b/src/stats/stdlib_stats_pca.fypp
@@ -0,0 +1,191 @@
+#:include "common.fypp"
+#! Note: PCA is limited to single (sp) and double (dp) precision because external
+#! optimized BLAS/LAPACK libraries (OpenBLAS, MKL) only support these precisions.
+#:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")]
+submodule (stdlib_stats) stdlib_stats_pca
+  use stdlib_kinds, only: sp, dp
+  use stdlib_error, only: error_stop
+  use stdlib_optval, only: optval
+  use stdlib_linalg, only: svd, eigh
+  use stdlib_linalg_constants, only: ilp
+  use stdlib_linalg_blas, only: gemm
+  use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR
+  use stdlib_sorting, only: sort_index
+  implicit none
+
+contains
+
+  #:for k1, t1 in PCA_KINDS_TYPES
+    module subroutine pca_${k1}$(x, components, singular_values, x_mean, &
+                                method, overwrite_x, err)
+      ${t1}$, intent(inout), target :: x(:,:)
+      ${t1}$, intent(out) :: components(:,:)
+      real(${k1}$), intent(out) :: singular_values(:)
+      ${t1}$, intent(out), optional :: x_mean(:)
+      character(*), intent(in), optional :: method
+      logical, intent(in), optional :: overwrite_x
+      type(linalg_state_type), intent(out), optional :: err
+
+      type(linalg_state_type) :: err0
+      integer(ilp) :: n, p, i, j, k, m, n_s
+      ${t1}$, allocatable :: mu(:)
+      character(16) :: method_
+
+      n = size(x, 1, kind=ilp)
+      p = size(x, 2, kind=ilp)
+      k = size(components, 1, kind=ilp)
+
+      method_ = optval(method, "svd")
+
+      ! 1. Calculate mean using intrinsic sum (avoids submodule dependency issues)
+      allocate(mu(p))
+      do j = 1, p
+         mu(j) = sum(x(:, j)) / real(n, ${k1}$)
+      end do
+      if (present(x_mean)) x_mean = mu
+
+      if (method_ == "svd") then
+         ! 2. Center data and call SVD with temporaries for robustness
+         block
+            ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:)
+            n_s = min(n, p)
+            allocate(s_tmp(n_s), vt_tmp(n_s, p))
+
+            if (optval(overwrite_x, .false.)) then
+               do i = 1, n
+                  x(i, :) = x(i, :) - mu
+               end do
+               call svd(x, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0)
+            else
+               block
+                  ${t1}$, allocatable :: x_centered(:,:)
+                  allocate(x_centered(n, p))
+                  do i = 1, n
+                     x_centered(i, :) = x(i, :) - mu
+                  end do
+                  call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err0)
+               end block
+            end if
+
+            if (err0%ok()) then
+               m = min(size(components, 1, kind=ilp), n_s)
+               components(:m, :) = vt_tmp(:m, :)
+               m = min(size(singular_values, 1, kind=ilp), n_s)
+               singular_values(:m) = s_tmp(:m)
+            end if
+         end block
+      else if (method_ == "eig" .or. method_ == "cov") then
+         ! 3. Eigendecomposition of covariance matrix (computed inline)
+         block
+            ${t1}$, allocatable :: c(:,:), vectors(:,:), x_centered(:,:)
+            real(${k1}$), allocatable :: lambda(:), lambda_copy(:)
+            integer(ilp), allocatable :: idx(:)
+            real(${k1}$) :: scale_factor
+
+            allocate(c(p, p), lambda(p), lambda_copy(p), idx(p), vectors(p, p))
+            allocate(x_centered(n, p))
+
+            ! Center data
+            do i = 1, n
+               x_centered(i, :) = x(i, :) - mu
+            end do
+
+            ! Compute covariance matrix: C = X^T * X / (n-1)
+            scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$)
+            do i = 1, p
+               do j = 1, p
+                  c(i, j) = dot_product(x_centered(:, i), x_centered(:, j)) * scale_factor
+               end do
+            end do
+
+            call eigh(c, lambda, vectors=vectors, err=err0)
+
+            if (err0%ok()) then
+               ! Sort eigenvalues in descending order using stdlib_sorting
+               ! sort_index sorts in ascending order, so we negate values
+               lambda_copy = -lambda
+               call sort_index(lambda_copy, idx)
+
+               ! Assign sorted results
+               m = min(size(components, 1, kind=ilp), p)
+               do i = 1, m
+                  components(i, :) = vectors(:, idx(i))
+                  if (lambda(idx(i)) > 0.0_${k1}$) then
+                     singular_values(i) = sqrt(lambda(idx(i)) * real(n-1, ${k1}$))
+                  else
+                     singular_values(i) = 0.0_${k1}$
+                  end if
+               end do
+            end if
+         end block
+      else
+         err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//method_)
+      end if
+
+      ! Handle error state: return error or stop if err not present
+      call err0%handle(err)
+
+    end subroutine pca_${k1}$
+  #:endfor
+
+
+  #:for k1, t1 in PCA_KINDS_TYPES
+    module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed)
+      ${t1}$, intent(in) :: x(:,:)
+      ${t1}$, intent(in) :: components(:,:)
+      ${t1}$, intent(in), optional :: x_mean(:)
+      ${t1}$, intent(out) :: x_transformed(:,:)
+
+      integer(ilp) :: i, n, p, nc
+      ${t1}$, allocatable :: x_centered(:,:)
+      ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$
+
+      n = size(x, 1, kind=ilp)
+      p = size(x, 2, kind=ilp)
+      nc = size(components, 1, kind=ilp)
+
+      allocate(x_centered(n, p))
+      if (present(x_mean)) then
+         do i = 1, n
+            x_centered(i, :) = x(i, :) - x_mean
+         end do
+      else
+         x_centered = x
+      end if
+
+      ! x_transformed = x_centered * components^T using GEMM
+      ! GEMM: C = alpha * op(A) * op(B) + beta * C
+      ! x_transformed(n, nc) = x_centered(n, p) * components(nc, p)^T
+      call gemm('N', 'T', n, nc, p, alpha, x_centered, n, components, nc, beta, x_transformed, n)
+    end subroutine pca_transform_${k1}$
+  #:endfor
+
+
+  #:for k1, t1 in PCA_KINDS_TYPES
+    module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed)
+      ${t1}$, intent(in) :: x_reduced(:,:)
+      ${t1}$, intent(in) :: components(:,:)
+      ${t1}$, intent(in), optional :: x_mean(:)
+      ${t1}$, intent(out) :: x_reconstructed(:,:)
+
+      integer(ilp) :: i, n, nc, p
+      ${t1}$, parameter :: alpha = 1.0_${k1}$, beta = 0.0_${k1}$
+
+      n = size(x_reduced, 1, kind=ilp)
+      nc = size(x_reduced, 2, kind=ilp)
+      p = size(components, 2, kind=ilp)
+
+      ! x_reconstructed = x_reduced * components using GEMM
+      ! GEMM: C = alpha * op(A) * op(B) + beta * C
+      ! x_reconstructed(n, p) = x_reduced(n, nc) * components(nc, p)
+      call gemm('N', 'N', n, p, nc, alpha, x_reduced, n, components, nc, beta, x_reconstructed, n)
+
+      if (present(x_mean)) then
+         do i = 1, n
+            x_reconstructed(i, :) = x_reconstructed(i, :) + x_mean
+         end do
+      end if
+    end subroutine pca_inverse_transform_${k1}$
+  #:endfor
+
+end submodule stdlib_stats_pca
diff --git a/test/stats/CMakeLists.txt b/test/stats/CMakeLists.txt
@@ -8,12 +8,14 @@ set(fppFiles
     test_distribution_uniform.fypp
     test_distribution_normal.fypp
     test_distribution_exponential.fypp
+    test_pca.fypp
 )
 
 fypp_f90("${fyppFlags}" "${fppFiles}" outFiles)
 
 ADDTEST(corr)
 ADDTEST(cov)
+ADDTEST(pca)
 ADDTEST(mean)
 ADDTEST(median)
 ADDTEST(moment)

diff --git a/test/stats/test_pca.fypp b/test/stats/test_pca.fypp
@@ -0,0 +1,55 @@
+#:include "common.fypp"
+#:set PCA_KINDS = ["sp", "dp"]
+program test_pca
+    use stdlib_error, only: check
+    use stdlib_kinds, only: sp, dp
+    use stdlib_stats, only: pca, pca_transform, pca_inverse_transform
+    use stdlib_linalg_state, only: linalg_state_type
+    implicit none
+
+    #:for k1 in PCA_KINDS
+    real(${k1}$), parameter :: ${k1}$tol = 1000 * epsilon(1._${k1}$)
+    #:endfor
+
+    #:for k1 in PCA_KINDS
+    call test_pca_${k1}$()
+    #:endfor
+
+contains
+
+    #:for k1 in PCA_KINDS
+    subroutine test_pca_${k1}$()
+        real(${k1}$) :: x(3, 2), components(2, 2), s(2), mu(2)
+        real(${k1}$) :: x_trans(3, 2), x_inv(3, 2)
+        type(linalg_state_type) :: err
+
+        ! Data: [1, 2], [3, 4], [5, 6]
+        x = reshape([1.0_${k1}$, 3.0_${k1}$, 5.0_${k1}$, 2.0_${k1}$, 4.0_${k1}$, 6.0_${k1}$], [3, 2])
+
+        ! Test SVD method
+        call pca(x, components, s, x_mean=mu, method="svd", err=err)
+        call check(err%ok(), "pca_${k1}$ svd err")
+        call check(all(abs(mu - [3.0_${k1}$, 4.0_${k1}$]) < ${k1}$tol), "pca_${k1}$ svd mean")
+        ! First component should be approx [0.707, 0.707] (or negative)
+        call check(abs(abs(components(1,1)) - 1.0_${k1}$/sqrt(2.0_${k1}$)) < ${k1}$tol, "pca_${k1}$ svd comp1")
+        call check(abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ svd s1")
+        call check(abs(s(2)) < ${k1}$tol, "pca_${k1}$ svd s2")
+
+        ! Test Transform
+        call pca_transform(x, components, mu, x_trans)
+        ! Second dimension should be zero
+        call check(all(abs(x_trans(:, 2)) < ${k1}$tol), "pca_${k1}$ transform")
+
+        ! Test Inverse Transform
+        call pca_inverse_transform(x_trans, components, mu, x_inv)
+        call check(all(abs(x_inv - x) < ${k1}$tol), "pca_${k1}$ inverse")
+
+        ! Test EIG method
+        call pca(x, components, s, method="eig", err=err)
+        call check(err%ok(), "pca_${k1}$ eig err")
+        call check(abs(s(1) - 4.0_${k1}$) < ${k1}$tol, "pca_${k1}$ eig s1")
+
+    end subroutine test_pca_${k1}$
+    #:endfor
+
+end program test_pca