NVIDIA · konakaji · Nov 26, 2025 · Nov 26, 2025 · Nov 26, 2025 · Nov 27, 2025
diff --git a/docs/sphinx/examples/solvers/python/gqe_h2.py b/docs/sphinx/examples/solvers/python/gqe_h2.py
@@ -44,7 +44,7 @@
 import cudaq_solvers as solvers
 from cudaq import spin
 
-from lightning.fabric.loggers import CSVLogger
+from lightning.pytorch.loggers import CSVLogger
 from cudaq_solvers.gqe_algorithm.gqe import get_default_config
 
 # Set deterministic seed and environment variables for deterministic behavior
@@ -171,18 +171,22 @@ def cost(sampled_ops: list[cudaq.SpinOperator], **kwargs):
 
 # Configure GQE
 cfg = get_default_config()
-cfg.use_fabric_logging = False
-logger = CSVLogger("gqe_h2_logs/gqe.csv")
-cfg.fabric_logger = logger
+cfg.use_lightning_logging = True
+logger = CSVLogger(save_dir="gqe_h2_logs", name="gqe")
+cfg.max_iters = 50
+cfg.ngates = 10
+cfg.lightning_logger = logger
 cfg.save_trajectory = False
 cfg.verbose = True
+cfg.enable_checkpointing = True
 
 # Run GQE
-minE, best_ops = solvers.gqe(cost, op_pool, max_iters=25, ngates=10, config=cfg)
+minE, best_ops = solvers.gqe(cost, op_pool, config=cfg)
 
 # Only print results from rank 0 when using MPI
 if not args.mpi or cudaq.mpi.rank() == 0:
-    print(f'Ground Energy = {minE}')
+    print(f'Ground Energy = {minE} (Ha)')
+    print(f'Error = {minE - molecule.energies["fci_energy"]} (Ha)')
     print('Ansatz Ops')
     for idx in best_ops:
         # Get the first (and only) term since these are simple operators

diff --git a/docs/sphinx/examples/solvers/python/gqe_n2.py b/docs/sphinx/examples/solvers/python/gqe_n2.py
@@ -0,0 +1,163 @@
+# ============================================================================ #
+# Copyright (c) 2025 NVIDIA Corporation & Affiliates.                          #
+# All rights reserved.                                                         #
+#                                                                              #
+# This source code and the accompanying materials are made available under     #
+# the terms of the Apache License 2.0 which accompanies this distribution.     #
+# ============================================================================ #
+# [Begin Documentation]
+
+# GQE is an optional component of the CUDA-QX Solvers Library. To install its
+# dependencies, run:
+# pip install cudaq-solvers[gqe]
+#
+# This example demonstrates GQE on the N2 molecule using the utility function
+# get_gqe_pauli_pool() to generate an operator pool based on UCCSD Pauli terms.
+# The pool is automatically generated from UCCSD operators and scaled by
+# different parameter values, making it suitable for variational quantum algorithms.
+#
+# Run this script with
+# python3 gqe_n2.py
+#
+# In order to leverage CUDA-Q MQPU and distribute the work across
+# multiple QPUs (thereby observing a speed-up), run with:
+#
+# mpiexec -np N and vary N to see the speedup...
+# e.g. PMIX_MCA_gds=hash mpiexec -np 2 python3 gqe_n2.py --mpi
+
+import argparse, cudaq
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--mpi', action='store_true')
+args = parser.parse_args()
+
+if args.mpi:
+    try:
+        cudaq.set_target('nvidia', option='mqpu')
+        cudaq.mpi.initialize()
+    except RuntimeError:
+        print(
+            'Warning: NVIDIA GPUs or MPI not available, unable to use CUDA-Q MQPU. Skipping...'
+        )
+        exit(0)
+else:
+    try:
+        cudaq.set_target('nvidia', option='fp64')
+    except RuntimeError:
+        cudaq.set_target('qpp-cpu')
+
+import cudaq_solvers as solvers
+
+from lightning.pytorch.loggers import CSVLogger
+from cudaq_solvers.gqe_algorithm.gqe import get_default_config
+from cudaq_solvers.gqe_algorithm.utils import get_gqe_pauli_pool
+
+# Set deterministic seed and environment variables for deterministic behavior
+# Disable this section for non-deterministic behavior
+import os, torch
+
+os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
+torch.manual_seed(3047)
+torch.use_deterministic_algorithms(True)
+torch.backends.cudnn.deterministic = True
+torch.backends.cudnn.benchmark = False
+
+# Create the molecular hamiltonian
+geometry = [('N', (0., 0., 0.)), ('N', (0., 0., 1.1))]
+molecule = solvers.create_molecule(geometry,
+                                   'sto-3g',
+                                   0,
+                                   0,
+                                   nele_cas=6,
+                                   norb_cas=6,
+                                   casci=True)
+
+spin_ham = molecule.hamiltonian
+n_qubits = molecule.n_orbitals * 2
+n_electrons = molecule.n_electrons
+
+# Generate the operator pool using utility function
+params = [
+    0.003125, -0.003125, 0.00625, -0.00625, 0.0125, -0.0125, 0.025, -0.025,
+    0.05, -0.05, 0.1, -0.1
+]
+
+op_pool = get_gqe_pauli_pool(n_qubits, n_electrons, params)
+
+
+def term_coefficients(op: cudaq.SpinOperator) -> list[complex]:
+    return [term.evaluate_coefficient() for term in op]
+
+
+def term_words(op: cudaq.SpinOperator) -> list[cudaq.pauli_word]:
+    return [term.get_pauli_word(n_qubits) for term in op]
+
+
+# Kernel that applies the selected operators
+@cudaq.kernel
+def kernel(n_qubits: int, n_electrons: int, coeffs: list[float],
+           words: list[cudaq.pauli_word]):
+    q = cudaq.qvector(n_qubits)
+
+    for i in range(n_electrons):
+        x(q[i])
+
+    for i in range(len(coeffs)):
+        exp_pauli(coeffs[i], q, words[i])
+
+
+def cost(sampled_ops: list[cudaq.SpinOperator], **kwargs):
+
+    full_coeffs = []
+    full_words = []
+
+    for op in sampled_ops:
+        full_coeffs += [c.real for c in term_coefficients(op)]
+        full_words += term_words(op)
+
+    if args.mpi:
+        handle = cudaq.observe_async(kernel,
+                                     spin_ham,
+                                     n_qubits,
+                                     n_electrons,
+                                     full_coeffs,
+                                     full_words,
+                                     qpu_id=kwargs['qpu_id'])
+        return handle, lambda res: res.get().expectation()
+    else:
+        return cudaq.observe(kernel, spin_ham, n_qubits, n_electrons,
+                             full_coeffs, full_words).expectation()
+
+
+# Configure GQE
+cfg = get_default_config()
+cfg.use_lightning_logging = True
+logger = CSVLogger(save_dir="gqe_n2_logs", name="gqe")
+cfg.max_iters = 50  # For full training, set to more than 1000
+cfg.ngates = 60
+cfg.num_samples = 50
+cfg.buffer_size = 50
+cfg.warmup_size = 50
+cfg.batch_size = 50
+
+cfg.scheduler = 'variance'
+cfg.lightning_logger = logger
+cfg.save_trajectory = False
+cfg.verbose = True
+cfg.benchmark_energy = molecule.energies
+
+# Run GQE
+minE, best_ops = solvers.gqe(cost, op_pool, config=cfg)
+
+# Only print results from rank 0 when using MPI
+if not args.mpi or cudaq.mpi.rank() == 0:
+    print(f'Ground Energy = {minE} (Ha)')
+    print(f'Error = {minE - molecule.energies["R-CASCI"]} (Ha)')
+    print('Ansatz Ops')
+    for idx in best_ops:
+        # Get the first (and only) term since these are simple operators
+        term = next(iter(op_pool[idx]))
+        print(term.evaluate_coefficient().real, term.get_pauli_word(n_qubits))
+
+if args.mpi:
+    cudaq.mpi.finalize()
diff --git a/libs/solvers/python/cudaq_solvers/gqe_algorithm/callbacks.py b/libs/solvers/python/cudaq_solvers/gqe_algorithm/callbacks.py
@@ -0,0 +1,139 @@
+# ============================================================================ #
+# Copyright (c) 2025 NVIDIA Corporation & Affiliates.                          #
+# All rights reserved.                                                         #
+#                                                                              #
+# This source code and the accompanying materials are made available under     #
+# the terms of the Apache License 2.0 which accompanies this distribution.     #
+# ============================================================================ #
+
+import sys
+import torch
+from lightning.pytorch.callbacks import Callback
+
+
+class MinEnergyCallback(Callback):
+    """Callback to track minimum energy found during training.
+
+    Keeps track of the minimum energy value and corresponding operator indices
+    across all training epochs.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.min_energy = sys.maxsize
+        self.min_indices = None
+        self.min_energy_history = []
+
+    def on_train_epoch_end(self, trainer, pl_module):
+        """Update minimum energy after each epoch.
+
+        Args:
+            trainer: Lightning trainer instance
+            pl_module: The Pipeline module being trained
+        """
+        # Get energies from the buffer
+        if len(pl_module.buffer) > 0:
+            # Check recent energies added to buffer
+            for i in range(
+                    max(0,
+                        len(pl_module.buffer) - pl_module.num_samples),
+                    len(pl_module.buffer)):
+                seq, energy = pl_module.buffer.buf[i]
+                if isinstance(energy, torch.Tensor):
+                    energy = energy.item()
+                if energy < self.min_energy:
+                    self.min_energy = energy
+                    self.min_indices = seq
+
+            self.min_energy_history.append(self.min_energy)
+            pl_module.log("best energy",
+                          self.min_energy,
+                          prog_bar=False,
+                          on_epoch=True,
+                          on_step=False)
+            for key, value in pl_module.benchmark_energy.items():
+                pl_module.log(f"best energy - {key}",
+                              self.min_energy - value,
+                              prog_bar=False,
+                              on_epoch=True,
+                              on_step=False)
+
+    def get_results(self):
+        """Get the minimum energy and corresponding indices.
+
+        Returns:
+            tuple: (min_energy, min_indices)
+        """
+        return self.min_energy, self.min_indices
+
+
+class TrajectoryCallback(Callback):
+    """Callback to save training trajectory data.
+
+    Records loss, energies, and indices for each training step and saves
+    to a file at the end of training.
+
+    Args:
+        trajectory_file_path: Path to save trajectory data
+    """
+
+    def __init__(self, trajectory_file_path):
+        super().__init__()
+        self.trajectory_file_path = trajectory_file_path
+        self.trajectory_data = []
+
+    def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
+        """Record trajectory data after each training batch.
+
+        Args:
+            trainer: Lightning trainer instance
+            pl_module: The Pipeline module being trained
+            outputs: Training step outputs
+            batch: Current batch data
+            batch_idx: Index of current batch
+        """
+        # Record the batch data
+        if outputs is not None and 'loss' in outputs:
+            loss = outputs['loss']
+            if isinstance(loss, torch.Tensor):
+                loss = loss.item()
+
+            # Get indices and energies from batch
+            indices = batch.get('idx', None)
+            energies = batch.get('energy', None)
+
+            if indices is not None and energies is not None:
+                if isinstance(indices, torch.Tensor):
+                    indices = indices.cpu().numpy().tolist()
+                if isinstance(energies, torch.Tensor):
+                    energies = energies.cpu().numpy().tolist()
+
+                self.trajectory_data.append({
+                    'epoch': trainer.current_epoch,
+                    'batch_idx': batch_idx,
+                    'loss': loss,
+                    'indices': indices,
+                    'energies': energies
+                })
+
+    def on_train_end(self, trainer, pl_module):
+        """Save trajectory data to file at end of training.
+
+        Args:
+            trainer: Lightning trainer instance
+            pl_module: The Pipeline module being trained
+        """
+        import json
+        import os
+
+        os.makedirs(os.path.dirname(self.trajectory_file_path), exist_ok=True)
+        if os.path.exists(self.trajectory_file_path):
+            print(
+                f"Warning: Overwriting existing trajectory file at {self.trajectory_file_path}"
+            )
+
+        with open(self.trajectory_file_path, 'w') as f:
+            for data in self.trajectory_data:
+                f.write(json.dumps(data) + '\n')
+
+        print(f"Trajectory data saved to {self.trajectory_file_path}")
diff --git a/libs/solvers/python/cudaq_solvers/gqe_algorithm/data.py b/libs/solvers/python/cudaq_solvers/gqe_algorithm/data.py
@@ -0,0 +1,54 @@
+# ============================================================================ #
+# Copyright (c) 2025 NVIDIA Corporation & Affiliates.                          #
+# All rights reserved.                                                         #
+#                                                                              #
+# This source code and the accompanying materials are made available under     #
+# the terms of the Apache License 2.0 which accompanies this distribution.     #
+# ============================================================================ #
+
+from collections import deque
+from torch.utils.data import Dataset
+import sys
+import pickle
+
+
+class ReplayBuffer:
+
+    def __init__(self, size=sys.maxsize, capacity=1000000):
+        self.size = size
+        self.buf = deque(maxlen=capacity)
+
+    def push(self, seq, energy):
+        self.buf.append((seq, energy))
+        if len(self.buf) > self.size:
+            self.buf.popleft()
+
+    def save(self, path):
+        with open(path, "wb") as f:
+            pickle.dump(self.buf, f)
+
+    def load(self, path):
+        with open(path, "rb") as f:
+            self.buf = pickle.load(f)
+
+    def __getitem__(self, idx):
+        seq, energy = self.buf[idx]
+        return {"idx": seq, "energy": energy}
+
+    def __len__(self):
+        return len(self.buf)
+
+
+class BufferDataset(Dataset):
+
+    def __init__(self, buffer: ReplayBuffer, repetition):
+        self.buffer = buffer
+        self.repetition = repetition
+
+    def __getitem__(self, idx):
+        idx = idx % len(self.buffer)
+        item = self.buffer[idx]
+        return {"idx": item["idx"], "energy": item["energy"]}
+
+    def __len__(self):
+        return len(self.buffer) * self.repetition