diff --git a/Generator(forGPU)/README.md b/Generator(forGPU)/README.md new file mode 100644 index 0000000..01bf31f --- /dev/null +++ b/Generator(forGPU)/README.md @@ -0,0 +1,113 @@ +# GNN Fast Quantum - QAOA Parameter Prediction + +Prédiction rapide des paramètres QAOA optimaux pour des problèmes d'optimisation combinatoire quantique en utilisant des Graph Neural Networks (GNN). + +## 📁 Structure du projet + +### `test_gnn_with_qiskit.py` - Validation du modèle +Script de test pour valider les prédictions du GNN contre les optimisations QAOA réelles de Qiskit. + +**Usage:** +```bash +python test_gnn_with_qiskit.py +``` +**prérequis:** +- si mac os alors-> --no-gpu \ +- si on veut utilisé les GPU-> linux nécéssaire + +**Ce qu'il fait:** +- Charge le modèle GNN entraîné (`FastQuantum/best_qaoa_gat_model.pt`) +- Génère des graphes de test +- Prédit gamma/beta avec le GNN (instantané) +- Compare avec l'optimal trouvé par Qiskit QAOA (lent) +- Affiche les gaps de performance + +**Résultats attendus:** +- Gap moyen: ~5% (excellent) +- Speedup: 100-1000x plus rapide que QAOA + +--- + +### `Generator(forGPU)/` - Génération de dataset quantique + +Générateur de dataset multi-problèmes avec support GPU et parallélisation CPU. + +**Fichiers principaux:** +- `gen_quantum_mp.py`: Générateur multi-problèmes optimisé +- `check_gpu_availability.py`: Diagnostic GPU/CUDA + +**Problèmes supportés:** +- MaxCut (partition de graphe) +- Vertex Cover (couverture minimale) +- Max Independent Set (sélection de nœuds) +- Graph Coloring (3 couleurs avec 2 qubits par nœud) + +warning : +Graph Coloring utilise 2× plus de qubits (2 par nœud au lieu de 1): +Graphe 10 nœuds = 20 qubits pour coloring vs 10 pour MaxCut +-> donc Simulation BEAUCOUP plus lente (~5-10x plus lent) +Recommandation: Limitez graph_coloring à 10% du dataset et utilisez des petits graphes (5-8 nœuds) pour ce problème. + +warning2 : +Warn start utilise des données heuristique mais c'ets juste une optimisation de temps gratuites -> pas de baisse de qulité du dataset ici + +**Usage:** +```bash +# Test GPU disponible +python Generator(forGPU)/check_gpu_availability.py + +# Générer dataset (CPU) +python Generator(forGPU)/gen_quantum_mp.py \ + --samples 200 \ + --min_nodes 6 \ + --max_nodes 12 \ + --maxcut-ratio 0.70 \ + --workers 12 \ + --no-gpu \ + --output Dataset/phase1_hybrid70.json + +# Générer dataset (GPU - Linux) +python Generator(forGPU)/gen_quantum_mp.py \ + --samples 200 \ + --min_nodes 6 \ + --max_nodes 12 \ + --maxcut-ratio 0.70 \ + --workers 8 \ + --output Dataset/phase1_hybrid70.json +``` + +**Arguments:** +- `--samples`: Nombre d'échantillons à générer +- `--min_nodes`, `--max_nodes`: Taille des graphes +- `--maxcut-ratio`: Ratio de MaxCut (ex: 0.70 = 70% MaxCut, 30% autres) +- `--workers`: Nombre de workers parallèles +- `--no-gpu`: Forcer CPU (Windows) +- `--checkpoint`: Sauvegarder tous les N samples + +**Performance:** +- **GPU (Linux)**: ~5-15s par sample (recommandé) +- **CPU (16 cores)**: ~20-40s par sample +- **CPU (4 cores)**: ~60-120s par sample + +--- + +## 🚀 Quick Start + +### 1. Tester le modèle existant +```bash +python test_gnn_with_qiskit.py +``` + +### 2. Générer un nouveau dataset +```bash +# Windows (CPU seulement) +python Generator(forGPU)/gen_quantum_mp.py --samples 50 --workers 4 --no-gpu + +# Linux avec GPU +python Generator(forGPU)/gen_quantum_mp.py --samples 200 --workers 8 +``` + +### 3. Entraîner le modèle (dans FastQuantum/) +```bash +python FastQuantum/GnnmodelGat.py +``` \ No newline at end of file diff --git a/Generator(forGPU)/check_gpu_availability.py b/Generator(forGPU)/check_gpu_availability.py new file mode 100644 index 0000000..bf75f48 --- /dev/null +++ b/Generator(forGPU)/check_gpu_availability.py @@ -0,0 +1,228 @@ +""" +GPU Availability Diagnostic for Qiskit Quantum Simulation +Checks if your system can use GPU acceleration with Qiskit Aer + +Requirements for GPU: + - NVIDIA GPU with CUDA support + - CUDA Toolkit installed + - qiskit-aer-gpu OR qiskit-aer with GPU support +""" + +import sys +import platform + +def check_nvidia_gpu(): + """Check if NVIDIA GPU is available""" + print("\n" + "="*70) + print("🔍 CHECKING NVIDIA GPU") + print("="*70) + + try: + import subprocess + result = subprocess.run(['nvidia-smi'], capture_output=True, text=True) + if result.returncode == 0: + print("✅ NVIDIA GPU detected!") + print("\nGPU Information:") + print(result.stdout) + return True + else: + print("❌ nvidia-smi command failed") + return False + except FileNotFoundError: + print("❌ nvidia-smi not found - No NVIDIA GPU or drivers not installed") + return False + except Exception as e: + print(f"❌ Error checking GPU: {e}") + return False + + +def check_cuda(): + """Check if CUDA is available""" + print("\n" + "="*70) + print("🔍 CHECKING CUDA") + print("="*70) + + try: + import subprocess + result = subprocess.run(['nvcc', '--version'], capture_output=True, text=True) + if result.returncode == 0: + print("✅ CUDA Toolkit installed!") + print(result.stdout) + return True + else: + print("❌ CUDA not found") + return False + except FileNotFoundError: + print("❌ nvcc not found - CUDA Toolkit not installed") + print("\nInstall CUDA from: https://developer.nvidia.com/cuda-downloads") + return False + except Exception as e: + print(f"❌ Error checking CUDA: {e}") + return False + + +def check_qiskit_aer_gpu(): + """Check if Qiskit Aer with GPU support is available""" + print("\n" + "="*70) + print("🔍 CHECKING QISKIT AER GPU SUPPORT") + print("="*70) + + try: + from qiskit_aer import AerSimulator + + # Try to create GPU simulator + try: + simulator = AerSimulator(method='statevector', device='GPU') + print("✅ Qiskit Aer GPU support available!") + print(f" Simulator: {simulator}") + + # Get available devices + available_devices = AerSimulator().available_devices() + print(f" Available devices: {available_devices}") + + return True + except Exception as e: + print(f"❌ Qiskit Aer installed but GPU not available: {e}") + print("\nYou may need to install qiskit-aer-gpu:") + print(" pip uninstall qiskit-aer") + print(" pip install qiskit-aer-gpu") + return False + + except ImportError: + print("❌ Qiskit Aer not installed") + print("\nInstall with:") + print(" pip install qiskit-aer") + print(" OR for GPU: pip install qiskit-aer-gpu") + return False + + +def check_multiprocessing(): + """Check CPU multiprocessing capabilities""" + print("\n" + "="*70) + print("🔍 CHECKING CPU MULTIPROCESSING") + print("="*70) + + import multiprocessing + cpu_count = multiprocessing.cpu_count() + print(f"✅ CPU cores available: {cpu_count}") + print(f" Recommended parallel workers: {max(1, cpu_count - 1)}") + return cpu_count + + +def test_simple_gpu_simulation(): + """Run a simple test to verify GPU actually works""" + print("\n" + "="*70) + print("🧪 TESTING GPU SIMULATION") + print("="*70) + + try: + from qiskit import QuantumCircuit + from qiskit_aer import AerSimulator + import time + + # Create a small quantum circuit + n_qubits = 10 + qc = QuantumCircuit(n_qubits) + for i in range(n_qubits): + qc.h(i) + for i in range(n_qubits - 1): + qc.cx(i, i+1) + + # Test CPU + print("\n⏱️ Testing CPU simulation...") + simulator_cpu = AerSimulator(method='statevector', device='CPU') + start = time.time() + result_cpu = simulator_cpu.run(qc, shots=1).result() + cpu_time = time.time() - start + print(f" CPU time: {cpu_time:.4f}s") + + # Test GPU + try: + print("\n⏱️ Testing GPU simulation...") + simulator_gpu = AerSimulator(method='statevector', device='GPU') + start = time.time() + result_gpu = simulator_gpu.run(qc, shots=1).result() + gpu_time = time.time() - start + print(f" GPU time: {gpu_time:.4f}s") + + speedup = cpu_time / gpu_time + print(f"\n🚀 GPU Speedup: {speedup:.2f}x") + + if speedup > 1.5: + print(" ✅ GPU is faster - ready to use!") + return True + else: + print(" ⚠️ GPU not significantly faster (overhead for small circuits)") + print(" 💡 GPU will be faster for larger circuits (15+ qubits)") + return True + + except Exception as e: + print(f" ❌ GPU test failed: {e}") + return False + + except Exception as e: + print(f"❌ Simulation test failed: {e}") + return False + + +def main(): + """Run all diagnostics""" + print("="*70) + print("🚀 QISKIT GPU ACCELERATION DIAGNOSTIC") + print("="*70) + print(f"System: {platform.system()} {platform.release()}") + print(f"Python: {sys.version.split()[0]}") + print("="*70) + + results = { + 'gpu': check_nvidia_gpu(), + 'cuda': check_cuda(), + 'qiskit_gpu': check_qiskit_aer_gpu(), + 'cpu_cores': check_multiprocessing() + } + + # Run simulation test if Qiskit GPU is available + if results['qiskit_gpu']: + results['gpu_test'] = test_simple_gpu_simulation() + else: + results['gpu_test'] = False + + # Summary + print("\n" + "="*70) + print("📊 SUMMARY") + print("="*70) + + if results['gpu'] and results['cuda'] and results['qiskit_gpu'] and results['gpu_test']: + print("✅ GPU ACCELERATION FULLY READY!") + print("\n🚀 You can use GPU-accelerated quantum dataset generation!") + print("\nRecommended configuration:") + print(" - Use GPU for quantum simulations") + print(f" - Use {max(1, results['cpu_cores'] - 1)} CPU workers for parallelization") + print(" - Expected speedup: 10-100x for large graphs") + + elif results['gpu'] and not results['cuda']: + print("⚠️ GPU detected but CUDA not installed") + print("\nInstall CUDA Toolkit:") + print(" https://developer.nvidia.com/cuda-downloads") + + elif results['gpu'] and results['cuda'] and not results['qiskit_gpu']: + print("⚠️ GPU and CUDA ready, but Qiskit Aer GPU not available") + print("\nInstall Qiskit Aer GPU:") + print(" pip uninstall qiskit-aer") + print(" pip install qiskit-aer-gpu") + + else: + print("❌ GPU acceleration not available") + print("\n💡 Fallback to CPU multiprocessing:") + print(f" - Use {max(1, results['cpu_cores'] - 1)} parallel workers") + print(" - Expected speedup: 4-8x") + + print("\n" + "="*70) + print(" ! Need cuquantum ! please install it before") + print("="*70 + "\n") + + return results + + +if __name__ == "__main__": + main() diff --git a/Generator(forGPU)/gen_quantum_mp.py b/Generator(forGPU)/gen_quantum_mp.py new file mode 100644 index 0000000..1a3ce31 --- /dev/null +++ b/Generator(forGPU)/gen_quantum_mp.py @@ -0,0 +1,823 @@ +""" +MULTI-PROBLEM Quantum QAOA Dataset Generator (OPTIMIZED) +Generates datasets for MULTIPLE optimization problems with hybrid strategy + +Problems supported: +- MaxCut (graph partitioning) +- Max Independent Set (node selection) +- Vertex Cover (covering edges with minimum nodes) +- (Easy to extend to TSP, Graph Coloring, etc.) + +Hybrid Strategy: +- Phase 1: 70% MaxCut + 30% other problems (easier start) +- Phase 2: 50% MaxCut + 50% other problems (balanced) + +Features: +- GPU acceleration (10-100x speedup) +- CPU multiprocessing (4-8x speedup) +- Problem-agnostic Hamiltonian features for generalization +- Fast SPSA optimizer with warm start +- Incremental save + resume + +Requirements: + pip install qiskit qiskit-algorithms qiskit-aer-gpu networkx numpy +""" + +import numpy as np +import networkx as nx +from typing import Tuple, List, Dict, Optional +import json +from pathlib import Path +import time +import multiprocessing as mp +from functools import partial + +# Qiskit imports +try: + from qiskit_algorithms.minimum_eigensolvers import QAOA + from qiskit_algorithms.optimizers import SPSA, COBYLA + from qiskit.quantum_info import SparsePauliOp + from qiskit_aer import AerSimulator + # Qiskit 2.x uses StatevectorSampler + from qiskit.primitives import StatevectorSampler + QISKIT_AVAILABLE = True + print("[OK] Qiskit imported successfully") +except ImportError as e: + QISKIT_AVAILABLE = False + print("[ERROR] Qiskit not installed!") + print("Install: pip install qiskit qiskit-algorithms qiskit-aer") + print(f"Error: {e}") + + +class MultiProblemQuantumGenerator: + """ + Multi-problem QAOA dataset generator with GPU acceleration + Supports: MaxCut, Max Independent Set, Vertex Cover + """ + + PROBLEM_TYPES = ['maxcut', 'independent_set', 'vertex_cover', 'graph_coloring'] + + def __init__(self, seed: int = 42, use_gpu: bool = True, n_workers: int = None): + """ + Initialize multi-problem generator + + Args: + seed: Random seed + use_gpu: Try to use GPU if available + n_workers: Number of parallel workers (None = auto-detect) + """ + if not QISKIT_AVAILABLE: + raise ImportError("Qiskit required! Install: pip install qiskit qiskit-algorithms qiskit-aer") + + np.random.seed(seed) + self.seed = seed + self.use_gpu = use_gpu + + if n_workers is None: + n_workers = max(1, mp.cpu_count() - 1) + self.n_workers = n_workers + + self._setup_simulator() + + print(f" Multi-problem generator initialized:") + print(f" - Device: {'GPU' if self.gpu_available else 'CPU'}") + print(f" - Parallel workers: {self.n_workers}") + print(f" - Supported problems: {', '.join(self.PROBLEM_TYPES)}") + + def _setup_simulator(self): + """Setup Qiskit Aer simulator with GPU if available""" + try: + if self.use_gpu: + simulator = AerSimulator(method='statevector', device='GPU') + from qiskit import QuantumCircuit + qc = QuantumCircuit(2) + qc.h(0) + test_result = simulator.run(qc).result() + self.backend = simulator + self.gpu_available = True + print("[GPU] GPU acceleration enabled!") + else: + raise Exception("GPU disabled by user") + except Exception as e: + self.backend = AerSimulator(method='statevector', device='CPU') + self.gpu_available = False + print(f"[CPU] Using CPU (GPU not available: {str(e)[:50]})") + + # Qiskit 2.x: Use StatevectorSampler (no backend parameter needed) + self.sampler = StatevectorSampler() + + # ======================================================================== + # GRAPH GENERATION + # ======================================================================== + + def generate_erdos_renyi_graph(self, n_nodes: int, edge_prob: float, seed: int) -> nx.Graph: + """Generate an Erds-Rnyi random graph""" + return nx.erdos_renyi_graph(n_nodes, edge_prob, seed=seed) + + def compute_node_features(self, G: nx.Graph) -> np.ndarray: + """ + Compute 7 graph-structure features (problem-agnostic) + These features work for ANY graph-based optimization problem + """ + n_nodes = G.number_of_nodes() + features = np.zeros((n_nodes, 7)) + + degrees = dict(G.degree()) + features[:, 0] = [degrees[i] for i in range(n_nodes)] + + degree_centrality = nx.degree_centrality(G) + features[:, 1] = [degree_centrality[i] for i in range(n_nodes)] + + clustering = nx.clustering(G) + features[:, 2] = [clustering[i] for i in range(n_nodes)] + + betweenness = nx.betweenness_centrality(G) + features[:, 3] = [betweenness[i] for i in range(n_nodes)] + + closeness = nx.closeness_centrality(G) + features[:, 4] = [closeness[i] for i in range(n_nodes)] + + pagerank = nx.pagerank(G, max_iter=1000) + features[:, 5] = [pagerank[i] for i in range(n_nodes)] + + try: + eigenvector = nx.eigenvector_centrality(G, max_iter=1000) + features[:, 6] = [eigenvector[i] for i in range(n_nodes)] + except: + features[:, 6] = 0.0 + + return features + + def graph_to_adjacency_matrix(self, G: nx.Graph) -> np.ndarray: + """Convert graph to adjacency matrix""" + return nx.to_numpy_array(G) + + # ======================================================================== + # HAMILTONIAN CONSTRUCTION (Different for each problem!) + # ======================================================================== + + def graph_to_maxcut_hamiltonian(self, G: nx.Graph) -> SparsePauliOp: + """ + MaxCut Hamiltonian: H = 0.5 * _{(i,j)E} Z_i Z_j + Goal: Maximize cut (partition graph into 2 sets) + """ + n_nodes = G.number_of_nodes() + pauli_list = [] + coeffs = [] + + for (i, j) in G.edges(): + pauli_str = ['I'] * n_nodes + pauli_str[i] = 'Z' + pauli_str[j] = 'Z' + pauli_list.append(''.join(pauli_str)) + coeffs.append(0.5) + + if len(pauli_list) == 0: + pauli_list = ['I' * n_nodes] + coeffs = [0.0] + + return SparsePauliOp(pauli_list, coeffs=coeffs) + + def graph_to_independent_set_hamiltonian(self, G: nx.Graph) -> SparsePauliOp: + """ + Max Independent Set Hamiltonian: + H = -_i (1 - Z_i)/2 + penalty * _{(i,j)E} (1 - Z_i)(1 - Z_j)/4 + + Goal: Maximize number of non-adjacent nodes (Z_i = -1 means node selected) + Penalty: Ensure no two adjacent nodes are both selected + """ + n_nodes = G.number_of_nodes() + pauli_list = [] + coeffs = [] + + # Reward term: -_i (1 - Z_i)/2 = -n/2 + 0.5*_i Z_i + # We only need the Z_i terms (constant doesn't affect optimization) + for i in range(n_nodes): + pauli_str = ['I'] * n_nodes + pauli_str[i] = 'Z' + pauli_list.append(''.join(pauli_str)) + coeffs.append(0.5) # Reward for selecting node + + # Penalty term: prevent adjacent nodes from being selected + penalty = 2.0 + for (i, j) in G.edges(): + # (1 - Z_i)(1 - Z_j)/4 = (1 - Z_i - Z_j + Z_i Z_j)/4 + # Constant term ignored, we add: -Z_i/4 - Z_j/4 + Z_i Z_j/4 + + # Z_i Z_j term (penalty for both selected) + pauli_str = ['I'] * n_nodes + pauli_str[i] = 'Z' + pauli_str[j] = 'Z' + pauli_list.append(''.join(pauli_str)) + coeffs.append(penalty * 0.25) + + if len(pauli_list) == 0: + pauli_list = ['I' * n_nodes] + coeffs = [0.0] + + return SparsePauliOp(pauli_list, coeffs=coeffs) + + def graph_to_vertex_cover_hamiltonian(self, G: nx.Graph) -> SparsePauliOp: + """ + Vertex Cover Hamiltonian: + H = _i (1 - Z_i)/2 + penalty * _{(i,j)E} (1 + Z_i)(1 + Z_j)/4 + + Goal: Minimize number of nodes while covering all edges + Penalty: Ensure every edge has at least one endpoint selected + """ + n_nodes = G.number_of_nodes() + pauli_list = [] + coeffs = [] + + # Cost term: _i (1 - Z_i)/2 = n/2 - 0.5*_i Z_i + # Minimize number of selected nodes + for i in range(n_nodes): + pauli_str = ['I'] * n_nodes + pauli_str[i] = 'Z' + pauli_list.append(''.join(pauli_str)) + coeffs.append(-0.5) # Negative = minimize + + # Penalty term: ensure each edge is covered + penalty = 3.0 + for (i, j) in G.edges(): + # (1 + Z_i)(1 + Z_j)/4 = (1 + Z_i + Z_j + Z_i Z_j)/4 + # We add: Z_i Z_j/4 (penalize if both NOT selected) + pauli_str = ['I'] * n_nodes + pauli_str[i] = 'Z' + pauli_str[j] = 'Z' + pauli_list.append(''.join(pauli_str)) + coeffs.append(penalty * 0.25) + + if len(pauli_list) == 0: + pauli_list = ['I' * n_nodes] + coeffs = [0.0] + + return SparsePauliOp(pauli_list, coeffs=coeffs) + + def graph_to_graph_coloring_hamiltonian(self, G: nx.Graph, n_colors=3) -> SparsePauliOp: + """ + Graph Coloring Hamiltonian (3 colors) + + Goal: Assign one of 3 colors to each node such that adjacent nodes have different colors + + Encoding: Each node uses 2 qubits to represent 3 colors: + Color 0 (Red): |00> (Z1=+1, Z2=+1) + Color 1 (Green): |01> (Z1=+1, Z2=-1) + Color 2 (Blue): |10> (Z1=-1, Z2=+1) + Invalid: |11> (Z1=-1, Z2=-1) <- Must penalize + + Hamiltonian: + H = penalty1 * Sum over edges: penalty if same color + + penalty2 * Sum over nodes: penalty if invalid state |11> + + Note: This requires 2*n_nodes qubits total + """ + n_nodes = G.number_of_nodes() + n_qubits = 2 * n_nodes # 2 qubits per node + pauli_list = [] + coeffs = [] + + penalty_same_color = 5.0 + penalty_invalid = 3.0 + + # Helper: Get qubit indices for node i + def get_qubit_indices(node): + return (2 * node, 2 * node + 1) # (q1, q2) for node + + # Penalty 1: Adjacent nodes must have different colors + # For each edge (i,j), we penalize if they have the same color + for (i, j) in G.edges(): + qi1, qi2 = get_qubit_indices(i) + qj1, qj2 = get_qubit_indices(j) + + # Same color detection: Sum of products for each color combination + # Color 0-0: (1+Zi1)/2 * (1+Zi2)/2 * (1+Zj1)/2 * (1+Zj2)/2 + # Color 1-1: (1+Zi1)/2 * (1-Zi2)/2 * (1+Zj1)/2 * (1-Zj2)/2 + # Color 2-2: (1-Zi1)/2 * (1+Zi2)/2 * (1-Zj1)/2 * (1+Zj2)/2 + + # Simplified: We penalize when Zi1*Zj1 and Zi2*Zj2 have same sign + # This happens when colors match + + # Z_i1 * Z_j1 term + pauli_str = ['I'] * n_qubits + pauli_str[qi1] = 'Z' + pauli_str[qj1] = 'Z' + pauli_list.append(''.join(pauli_str)) + coeffs.append(penalty_same_color * 0.25) + + # Z_i2 * Z_j2 term + pauli_str = ['I'] * n_qubits + pauli_str[qi2] = 'Z' + pauli_str[qj2] = 'Z' + pauli_list.append(''.join(pauli_str)) + coeffs.append(penalty_same_color * 0.25) + + # Z_i1 * Z_j1 * Z_i2 * Z_j2 term (4-qubit interaction) + pauli_str = ['I'] * n_qubits + pauli_str[qi1] = 'Z' + pauli_str[qi2] = 'Z' + pauli_str[qj1] = 'Z' + pauli_str[qj2] = 'Z' + pauli_list.append(''.join(pauli_str)) + coeffs.append(penalty_same_color * 0.25) + + # Penalty 2: No node should be in invalid state |11> + # |11> means Z1=-1 and Z2=-1, so Z1*Z2 = +1 + # We penalize (1 + Z1*Z2)/2 + for i in range(n_nodes): + qi1, qi2 = get_qubit_indices(i) + + # Z_i1 * Z_i2 term + pauli_str = ['I'] * n_qubits + pauli_str[qi1] = 'Z' + pauli_str[qi2] = 'Z' + pauli_list.append(''.join(pauli_str)) + coeffs.append(penalty_invalid * 0.5) + + if len(pauli_list) == 0: + pauli_list = ['I' * n_qubits] + coeffs = [0.0] + + return SparsePauliOp(pauli_list, coeffs=coeffs) + + def graph_to_hamiltonian(self, G: nx.Graph, problem_type: str) -> SparsePauliOp: + """ + Convert graph to Hamiltonian based on problem type + + Args: + G: NetworkX graph + problem_type: 'maxcut', 'independent_set', 'vertex_cover', or 'graph_coloring' + + Returns: + SparsePauliOp Hamiltonian + """ + if problem_type == 'maxcut': + return self.graph_to_maxcut_hamiltonian(G) + elif problem_type == 'independent_set': + return self.graph_to_independent_set_hamiltonian(G) + elif problem_type == 'vertex_cover': + return self.graph_to_vertex_cover_hamiltonian(G) + elif problem_type == 'graph_coloring': + return self.graph_to_graph_coloring_hamiltonian(G) + else: + raise ValueError(f"Unknown problem type: {problem_type}") + + # ======================================================================== + # HAMILTONIAN FEATURES (Problem-agnostic!) + # ======================================================================== + + def extract_hamiltonian_features(self, hamiltonian: SparsePauliOp) -> np.ndarray: + """ + Extract problem-agnostic features from Hamiltonian + These features describe the STRUCTURE of the optimization problem, + not the specific problem type. This enables generalization! + + Features (10 total): + 1. Number of terms in Hamiltonian + 2. Mean of coefficients + 3. Std of coefficients + 4. Min coefficient + 5. Max coefficient + 6. Fraction of single-qubit terms (Z_i) + 7. Fraction of two-qubit terms (Z_i Z_j) + 8. Fraction of multi-qubit terms (3+ qubits) + 9. Coefficient range (max - min) + 10. Coefficient sparsity (fraction of zero coefficients) + + Returns: + Feature vector (10,) + """ + features = np.zeros(10) + + # Get Pauli terms and coefficients + coeffs = np.real(hamiltonian.coeffs) + paulis = hamiltonian.paulis + + # Feature 0: Number of terms + features[0] = len(coeffs) + + # Features 1-4: Coefficient statistics + features[1] = np.mean(coeffs) + features[2] = np.std(coeffs) + features[3] = np.min(coeffs) + features[4] = np.max(coeffs) + + # Features 5-7: Term type distribution + single_qubit = 0 + two_qubit = 0 + multi_qubit = 0 + + for pauli in paulis: + # Count number of non-identity operators + n_ops = sum(1 for p in pauli.to_label() if p != 'I') + if n_ops == 1: + single_qubit += 1 + elif n_ops == 2: + two_qubit += 1 + else: + multi_qubit += 1 + + total_terms = len(paulis) + features[5] = single_qubit / total_terms if total_terms > 0 else 0 + features[6] = two_qubit / total_terms if total_terms > 0 else 0 + features[7] = multi_qubit / total_terms if total_terms > 0 else 0 + + # Feature 8: Coefficient range + features[8] = features[4] - features[3] # max - min + + # Feature 9: Sparsity (fraction near zero) + features[9] = np.sum(np.abs(coeffs) < 1e-6) / len(coeffs) if len(coeffs) > 0 else 0 + + return features + + # ======================================================================== + # QAOA OPTIMIZATION + # ======================================================================== + + def get_warm_start_params(self, G: nx.Graph, problem_type: str, p: int) -> np.ndarray: + """ + Smart initialization based on graph structure and problem type + Different problems have different optimal parameter ranges + """ + avg_clustering = nx.average_clustering(G) + avg_degree = np.mean([d for n, d in G.degree()]) + n_nodes = G.number_of_nodes() + + # Problem-specific heuristics + if problem_type == 'maxcut': + gamma_init = 0.4 + 0.3 * avg_clustering + beta_init = 0.3 - 0.15 * (avg_degree / n_nodes) + elif problem_type == 'independent_set': + gamma_init = 0.5 + 0.2 * (1 - avg_clustering) + beta_init = 0.25 + elif problem_type == 'vertex_cover': + gamma_init = 0.35 + 0.25 * avg_clustering + beta_init = 0.35 - 0.1 * (avg_degree / n_nodes) + elif problem_type == 'graph_coloring': + # Graph coloring benefits from strong mixing + gamma_init = 0.45 + 0.2 * (1 - avg_clustering) + beta_init = 0.4 # Higher beta for more exploration + else: + gamma_init = 0.4 + beta_init = 0.3 + + initial_point = [] + for layer in range(p): + gamma = gamma_init * (1 - 0.1 * layer / max(p, 1)) + beta = beta_init * (1 + 0.1 * layer / max(p, 1)) + initial_point.extend([gamma, beta]) + + return np.array(initial_point) + + def simulate_qaoa_optimization(self, G: nx.Graph, problem_type: str, + p: int = 1, timeout: int = 60, + fast_mode: bool = True) -> Dict: + """ + Run REAL QAOA optimization with Qiskit + + Args: + G: Graph + problem_type: 'maxcut', 'independent_set', or 'vertex_cover' + p: QAOA depth + timeout: Max time + fast_mode: Use SPSA (faster) vs COBYLA + """ + start_time = time.time() + + # Build Hamiltonian for specific problem + hamiltonian = self.graph_to_hamiltonian(G, problem_type) + + # Smart initialization + initial_point = self.get_warm_start_params(G, problem_type, p) + + # Choose optimizer + if fast_mode: + optimizer = SPSA(maxiter=50) + else: + optimizer = COBYLA(maxiter=30) + + try: + # REAL QAOA SIMULATION + qaoa = QAOA( + sampler=self.sampler, + optimizer=optimizer, + reps=p, + initial_point=initial_point + ) + + result = qaoa.compute_minimum_eigenvalue(hamiltonian) + + elapsed = time.time() - start_time + if elapsed > timeout: + raise TimeoutError(f"Exceeded {timeout}s") + + optimal_point = result.optimal_point + gamma_values = optimal_point[:p] + beta_values = optimal_point[p:2*p] + energy = result.optimal_value + success = True + + except Exception as e: + # Fallback heuristic (only if QAOA fails) + avg_clustering = nx.average_clustering(G) + + if problem_type == 'maxcut': + gamma_values = [0.35 + 0.40 * avg_clustering] + beta_values = [0.20 + 0.25 * (1 - avg_clustering)] + elif problem_type == 'independent_set': + gamma_values = [0.5] + beta_values = [0.25] + elif problem_type == 'vertex_cover': + gamma_values = [0.4] + beta_values = [0.3] + + energy = None + success = False + + elapsed = time.time() - start_time + + return { + 'gamma': np.array(gamma_values), + 'beta': np.array(beta_values), + 'energy': float(energy) if energy is not None else None, + 'success': success, + 'optimization_time': elapsed + } + + # ======================================================================== + # SAMPLE GENERATION + # ======================================================================== + + def generate_single_sample(self, sample_id: int, n_nodes: int, + edge_prob: float, problem_type: str, + p: int, seed: int) -> Optional[Dict]: + """Generate a single sample for any problem type""" + try: + G = self.generate_erdos_renyi_graph(n_nodes, edge_prob, seed) + + if not nx.is_connected(G): + return None + + # Graph features (problem-agnostic) + adj_matrix = self.graph_to_adjacency_matrix(G) + node_features = self.compute_node_features(G) + + # Hamiltonian features (problem-agnostic!) + hamiltonian = self.graph_to_hamiltonian(G, problem_type) + hamiltonian_features = self.extract_hamiltonian_features(hamiltonian) + + # QAOA optimization + optimal_params = self.simulate_qaoa_optimization(G, problem_type, p) + + sample = { + 'id': sample_id, + 'problem_type': problem_type, # NEW: Problem identifier + 'n_nodes': n_nodes, + 'n_edges': G.number_of_edges(), + 'adjacency_matrix': adj_matrix.tolist(), + 'node_features': node_features.tolist(), + 'hamiltonian_features': hamiltonian_features.tolist(), # NEW! + 'optimal_gamma': optimal_params['gamma'].tolist(), + 'optimal_beta': optimal_params['beta'].tolist(), + 'qaoa_energy': optimal_params['energy'], + 'quantum_optimized': optimal_params['success'], + 'optimization_time': optimal_params['optimization_time'], + 'graph_type': 'erdos_renyi', + 'edge_probability': edge_prob + } + + return sample + + except Exception as e: + print(f"Sample {sample_id} error: {str(e)[:50]}") + return None + + +def worker_generate_sample(args): + """Worker function for multiprocessing""" + sample_id, n_nodes, edge_prob, problem_type, p, seed, use_gpu = args + + gen = MultiProblemQuantumGenerator(seed=seed, use_gpu=False, n_workers=1) + sample = gen.generate_single_sample(sample_id, n_nodes, edge_prob, problem_type, p, seed) + + if sample: + status = "" if sample['quantum_optimized'] else "" + time_str = f"{sample['optimization_time']:.1f}s" + problem_emoji = {'maxcut': '', 'independent_set': '', 'vertex_cover': ''} + emoji = problem_emoji.get(problem_type, '') + print(f" {status} {emoji} Sample {sample_id} ({problem_type}, n={n_nodes}) - {time_str}") + + return sample + + +def generate_multiproblem_dataset( + total_samples: int, + n_nodes_range: Tuple[int, int], + problem_distribution: Dict[str, float] = None, + p: int = 1, + save_path: str = "Dataset/qaoa_multiproblem.json", + checkpoint_every: int = 10, + n_workers: int = None, + use_gpu: bool = True, + seed: int = 42 +) -> List[Dict]: + """ + Generate multi-problem dataset with hybrid strategy + + Args: + total_samples: Number of samples + n_nodes_range: (min_nodes, max_nodes) + problem_distribution: Dict like {'maxcut': 0.7, 'independent_set': 0.15, 'vertex_cover': 0.15} + If None, defaults to hybrid strategy (70% maxcut) + p: QAOA depth + save_path: Output path + checkpoint_every: Checkpoint interval + n_workers: Parallel workers + use_gpu: GPU acceleration + seed: Random seed + """ + + # Default hybrid distribution: 70% MaxCut, 30% others + if problem_distribution is None: + problem_distribution = { + 'maxcut': 0.70, + 'independent_set': 0.15, + 'vertex_cover': 0.15 + } + + if n_workers is None: + n_workers = max(1, mp.cpu_count() - 1) + + print(f"\n{'='*70}") + print(f" MULTI-PROBLEM QUANTUM QAOA DATASET GENERATION") + print(f"{'='*70}") + print(f"Total samples: {total_samples}") + print(f"Node range: {n_nodes_range[0]}-{n_nodes_range[1]}") + print(f"QAOA depth: {p}") + print(f"Problem distribution:") + for prob, pct in problem_distribution.items(): + print(f" - {prob}: {pct*100:.0f}%") + print(f"Parallel workers: {n_workers}") + print(f"GPU: {'Enabled' if use_gpu else 'Disabled'}") + print(f"{'='*70}\n") + + # Load existing dataset + dataset = [] + save_file = Path(save_path) + if save_file.exists(): + with open(save_path, 'r') as f: + dataset = json.load(f) + print(f" Loaded {len(dataset)} existing samples") + + start_id = len(dataset) + if start_id >= total_samples: + print(" Dataset already complete!") + return dataset + + save_file.parent.mkdir(parents=True, exist_ok=True) + + # Prepare worker arguments with problem distribution + np.random.seed(seed + start_id) + worker_args = [] + + problems = list(problem_distribution.keys()) + probs = list(problem_distribution.values()) + + for i in range(start_id, total_samples + 1000): + n_nodes = np.random.randint(n_nodes_range[0], n_nodes_range[1] + 1) + edge_prob = np.random.uniform(0.3, 0.9) + problem_type = np.random.choice(problems, p=probs) + sample_seed = seed + i + worker_args.append((i, n_nodes, edge_prob, problem_type, p, sample_seed, use_gpu)) + + # Run parallel generation + start_time = time.time() + + print(f" Starting parallel generation with {n_workers} workers...\n") + + with mp.Pool(n_workers) as pool: + chunk_size = checkpoint_every + + for chunk_start in range(0, len(worker_args), chunk_size): + if len(dataset) >= total_samples: + break + + chunk_end = min(chunk_start + chunk_size, len(worker_args)) + chunk_args = worker_args[chunk_start:chunk_end] + + results = pool.map(worker_generate_sample, chunk_args) + + for result in results: + if result is not None and len(dataset) < total_samples: + dataset.append(result) + + # Checkpoint + if len(dataset) >= start_id + checkpoint_every or len(dataset) >= total_samples: + with open(save_path, 'w') as f: + json.dump(dataset, f, indent=2) + + elapsed = time.time() - start_time + progress = len(dataset) - start_id + rate = progress / elapsed if elapsed > 0 else 0 + remaining = total_samples - len(dataset) + eta = remaining / rate if rate > 0 else 0 + + # Count by problem type + problem_counts = {} + for s in dataset: + pt = s['problem_type'] + problem_counts[pt] = problem_counts.get(pt, 0) + 1 + + print(f"\n Checkpoint: {len(dataset)}/{total_samples}") + print(f" Problem breakdown:") + for prob, count in problem_counts.items(): + print(f" {prob}: {count} ({100*count/len(dataset):.1f}%)") + print(f" Elapsed: {elapsed/60:.1f} min") + print(f" Rate: {rate*60:.1f} samples/min") + print(f" ETA: {eta/60:.1f} min\n") + + # Final save + dataset = dataset[:total_samples] + with open(save_path, 'w') as f: + json.dump(dataset, f, indent=2) + + total_time = time.time() - start_time + quantum_success = sum(1 for s in dataset if s.get('quantum_optimized', False)) + + print(f"\n{'='*70}") + print(f" MULTI-PROBLEM DATASET COMPLETE!") + print(f"{'='*70}") + print(f"Total samples: {len(dataset)}") + print(f"Quantum optimized: {quantum_success} ({100*quantum_success/len(dataset):.1f}%)") + + # Final problem distribution + problem_counts = {} + for s in dataset: + pt = s['problem_type'] + problem_counts[pt] = problem_counts.get(pt, 0) + 1 + + print(f"\nFinal problem distribution:") + for prob, count in problem_counts.items(): + print(f" {prob}: {count} ({100*count/len(dataset):.1f}%)") + + print(f"\nTotal time: {total_time/60:.1f} min ({total_time/3600:.2f}h)") + print(f"Average: {total_time/len(dataset):.1f}s per sample") + print(f"Saved to: {save_path}") + print(f"{'='*70}\n") + + return dataset + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description='Multi-problem quantum dataset generation') + parser.add_argument('--samples', type=int, default=100, help='Number of samples') + parser.add_argument('--min_nodes', type=int, default=6, help='Min nodes') + parser.add_argument('--max_nodes', type=int, default=12, help='Max nodes') + parser.add_argument('--p', type=int, default=1, help='QAOA depth') + parser.add_argument('--output', type=str, default='Dataset/qaoa_multiproblem_hybrid.json') + parser.add_argument('--checkpoint', type=int, default=10) + parser.add_argument('--workers', type=int, default=None) + parser.add_argument('--no-gpu', action='store_true') + parser.add_argument('--seed', type=int, default=42) + parser.add_argument('--maxcut-ratio', type=float, default=0.70, help='Ratio of MaxCut samples (0.70 = 70 percent)') + + args = parser.parse_args() + + if not QISKIT_AVAILABLE: + print(" Qiskit not installed!") + print("Install: pip install qiskit qiskit-algorithms qiskit-aer") + exit(1) + + # Hybrid distribution + other_ratio = 1.0 - args.maxcut_ratio + problem_dist = { + 'maxcut': args.maxcut_ratio, + 'independent_set': other_ratio / 2, + 'vertex_cover': other_ratio / 2 + } + + print(f"\n Hybrid Strategy:") + print(f" MaxCut: {problem_dist['maxcut']*100:.0f}%") + print(f" Independent Set: {problem_dist['independent_set']*100:.0f}%") + print(f" Vertex Cover: {problem_dist['vertex_cover']*100:.0f}%") + + dataset = generate_multiproblem_dataset( + total_samples=args.samples, + n_nodes_range=(args.min_nodes, args.max_nodes), + problem_distribution=problem_dist, + p=args.p, + save_path=args.output, + checkpoint_every=args.checkpoint, + n_workers=args.workers, + use_gpu=not args.no_gpu, + seed=args.seed + ) + + print(" Multi-problem dataset ready!") + print("\n Features included:") + print(" - 7 graph structure features (problem-agnostic)") + print(" - 10 Hamiltonian features (problem-agnostic)") + print(" - Optimal QAOA parameters (gamma, beta)") + print("\n Your GNN can now generalize across multiple problems!")