From eaab86d204bcd00bd8f71add2185576d83bc41b2 Mon Sep 17 00:00:00 2001
From: Lorenzo Mangani <lorenzo.mangani@gmail.com>
Date: Mon, 16 Mar 2026 01:49:25 +0100
Subject: [PATCH 1/4] Add CUDA fallback handling for non-CUDA builds

Implement CUDA fallback handling for non-CUDA PyTorch builds, including safe no-op implementations for CUDA functions and logging.
---
 backend/ltx2_server.py | 87 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/backend/ltx2_server.py b/backend/ltx2_server.py
index ad1308489..7f2bb87e4 100644
--- a/backend/ltx2_server.py
+++ b/backend/ltx2_server.py
@@ -42,12 +42,94 @@
 logging.basicConfig(level=logging.INFO, handlers=[console_handler])
 logger = logging.getLogger(__name__)
 
+# ============================================================
+# CUDA Fallback Handling (for non-CUDA PyTorch builds on MPS/CPU)
+# ============================================================
+
+def _setup_cuda_fallback() -> None:
+    """
+    Monkey-patch torch.cuda functions to handle cases where PyTorch is not
+    compiled with CUDA support (e.g., running on MPS or CPU).
+    
+    The ltx-pipelines library calls torch.cuda.synchronize() unconditionally,
+    which fails with "Torch not compiled with CUDA enabled" on non-CUDA builds.
+    """
+    # Check if we're on a device that doesn't have full CUDA support
+    device_type = DEVICE.type
+    
+    if device_type == "cuda":
+        # True CUDA - no fallback needed
+        return
+    
+    logger.info(f"Setup CUDA fallback for device type: {device_type}")
+    
+    # Create safe no-op implementations for CUDA functions
+    def safe_cuda_synchronize() -> None:
+        """No-op synchronize for non-CUDA devices."""
+        if device_type == "mps":
+            try:
+                torch.mps.synchronize()
+            except Exception:
+                pass
+    
+    def safe_cuda_empty_cache() -> None:
+        """No-op empty_cache for non-CUDA devices."""
+        if device_type == "mps":
+            try:
+                torch.mps.empty_cache()
+            except Exception:
+                pass
+    
+    def safe_cuda_memory_reserved() -> int:
+        """Return 0 for memory reserved on non-CUDA devices."""
+        return 0
+    
+    def safe_cuda_memory_allocated() -> int:
+        """Return 0 for memory allocated on non-CUDA devices."""
+        return 0
+    
+    def safe_cuda_get_device_name(device: object = None) -> str:
+        """Return device name for non-CUDA devices."""
+        if device_type == "mps" and hasattr(torch, 'mps'):
+            return "Apple Silicon MPS"
+        return "CPU"
+    
+    def safe_cuda_get_device_capability(device: object = None) -> tuple[int, int]:
+        """Return (0, 0) for non-CUDA devices."""
+        return (0, 0)
+    
+    # Patch torch.cuda module
+    if not hasattr(torch.cuda, "_ltx_original_synchronize"):
+        # Store original functions if they exist
+        try:
+            torch.cuda._ltx_original_synchronize = torch.cuda.synchronize  # type: ignore[attr-defined]
+        except AttributeError:
+            pass
+    
+    # Replace with safe implementations
+    torch.cuda.synchronize = safe_cuda_synchronize  # type: ignore[assignment]
+    torch.cuda.empty_cache = safe_cuda_empty_cache  # type: ignore[assignment]
+    torch.cuda.memory_reserved = safe_cuda_memory_reserved  # type: ignore[assignment]
+    torch.cuda.memory_allocated = safe_cuda_memory_allocated  # type: ignore[assignment]
+    torch.cuda.get_device_name = safe_cuda_get_device_name  # type: ignore[assignment]
+    torch.cuda.get_device_capability = safe_cuda_get_device_capability  # type: ignore[assignment]
+    
+    logger.info("CUDA fallback patch applied successfully")
+
+
 # ============================================================
 # SageAttention Integration
 # ============================================================
 use_sage_attention = os.environ.get("USE_SAGE_ATTENTION", "1") == "1"
 _sageattention_runtime_fallback_logged = False
 
+# Check for MPS device - SageAttention doesn't support MPS
+_is_mps_device = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
+
+if use_sage_attention and _is_mps_device:
+    logger.info("SageAttention disabled - MPS device detected (not supported by SageAttention)")
+    use_sage_attention = False
+
 if use_sage_attention:
     try:
         from sageattention import sageattn  # type: ignore[reportMissingImports]
@@ -107,12 +189,17 @@ def _get_device() -> torch.device:
         return torch.device("cuda")
     if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
         return torch.device("mps")
+    # Fallback to CPU if no GPU available
+    logger.warning("No CUDA or MPS device available, using CPU")
     return torch.device("cpu")
 
 
 DEVICE = _get_device()
 DTYPE = torch.bfloat16
 
+# Setup CUDA fallback for non-CUDA PyTorch builds (MPS/CPU support)
+_setup_cuda_fallback()
+
 def _resolve_app_data_dir() -> Path:
     env_path = os.environ.get("LTX_APP_DATA_DIR")
     if not env_path:

From 80ca911c7cf077397867eaa4774de13e42faab7f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Mar 2026 01:00:17 +0000
Subject: [PATCH 2/4] Initial plan


From bcd6f8d64d25e6d6a0cb038f8197ebf9fba02074 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Mar 2026 01:02:51 +0000
Subject: [PATCH 3/4] Fix CUDA fallback wrapper signatures and exception
 handling

- Add optional `device` param to safe_cuda_synchronize, safe_cuda_memory_reserved,
  and safe_cuda_memory_allocated to match the real torch.cuda API signatures
- Replace bare `except Exception: pass` with specific (RuntimeError, AttributeError)
  catches and DEBUG-level logging in MPS fallback wrappers
- Update _setup_cuda_fallback docstring to reflect runtime-device-based condition

Co-authored-by: lmangani <1423657+lmangani@users.noreply.github.com>
---
 backend/ltx2_server.py | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/backend/ltx2_server.py b/backend/ltx2_server.py
index 7f2bb87e4..ef89fd48d 100644
--- a/backend/ltx2_server.py
+++ b/backend/ltx2_server.py
@@ -48,43 +48,45 @@
 
 def _setup_cuda_fallback() -> None:
     """
-    Monkey-patch torch.cuda functions to handle cases where PyTorch is not
-    compiled with CUDA support (e.g., running on MPS or CPU).
-    
+    Monkey-patch torch.cuda functions to handle cases where the active device
+    is not CUDA (e.g., MPS or CPU).  This is triggered at runtime based on the
+    selected device, so it also applies to CUDA-built PyTorch installations
+    when no CUDA GPU is in use.
+
     The ltx-pipelines library calls torch.cuda.synchronize() unconditionally,
     which fails with "Torch not compiled with CUDA enabled" on non-CUDA builds.
     """
     # Check if we're on a device that doesn't have full CUDA support
     device_type = DEVICE.type
-    
+
     if device_type == "cuda":
         # True CUDA - no fallback needed
         return
-    
+
     logger.info(f"Setup CUDA fallback for device type: {device_type}")
-    
+
     # Create safe no-op implementations for CUDA functions
-    def safe_cuda_synchronize() -> None:
-        """No-op synchronize for non-CUDA devices."""
+    def safe_cuda_synchronize(device: object = None) -> None:
+        """No-op synchronize for non-CUDA devices; delegates to MPS when available."""
         if device_type == "mps":
             try:
                 torch.mps.synchronize()
-            except Exception:
-                pass
-    
+            except (RuntimeError, AttributeError) as exc:
+                logger.debug("MPS synchronize fallback failed: %s", exc)
+
     def safe_cuda_empty_cache() -> None:
-        """No-op empty_cache for non-CUDA devices."""
+        """No-op empty_cache for non-CUDA devices; delegates to MPS when available."""
         if device_type == "mps":
             try:
                 torch.mps.empty_cache()
-            except Exception:
-                pass
-    
-    def safe_cuda_memory_reserved() -> int:
+            except (RuntimeError, AttributeError) as exc:
+                logger.debug("MPS empty_cache fallback failed: %s", exc)
+
+    def safe_cuda_memory_reserved(device: object = None) -> int:
         """Return 0 for memory reserved on non-CUDA devices."""
         return 0
-    
-    def safe_cuda_memory_allocated() -> int:
+
+    def safe_cuda_memory_allocated(device: object = None) -> int:
         """Return 0 for memory allocated on non-CUDA devices."""
         return 0
     

From 468dc932d85b81cf12f97361b78d1b51399954b4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Mar 2026 01:07:44 +0000
Subject: [PATCH 4/4] Gate CUDA fallback on torch.version.cuda is None

Only apply torch.cuda.* monkey-patches when PyTorch was compiled without
CUDA support (torch.version.cuda is None). CUDA-capable builds running on
CPU due to driver issues now surface real errors instead of silently no-oping.
Update docstring to explain the intent precisely.

Co-authored-by: lmangani <1423657+lmangani@users.noreply.github.com>
---
 backend/ltx2_server.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/backend/ltx2_server.py b/backend/ltx2_server.py
index ef89fd48d..6c44ec8f7 100644
--- a/backend/ltx2_server.py
+++ b/backend/ltx2_server.py
@@ -48,22 +48,23 @@
 
 def _setup_cuda_fallback() -> None:
     """
-    Monkey-patch torch.cuda functions to handle cases where the active device
-    is not CUDA (e.g., MPS or CPU).  This is triggered at runtime based on the
-    selected device, so it also applies to CUDA-built PyTorch installations
-    when no CUDA GPU is in use.
+    Monkey-patch torch.cuda functions when PyTorch was not compiled with CUDA
+    support (i.e., torch.version.cuda is None, as on MPS-only or CPU-only
+    wheels).
 
     The ltx-pipelines library calls torch.cuda.synchronize() unconditionally,
-    which fails with "Torch not compiled with CUDA enabled" on non-CUDA builds.
+    which raises "Torch not compiled with CUDA enabled" on non-CUDA builds.
+    This patch is intentionally limited to non-CUDA builds so that CUDA-capable
+    installations that happen to be running on CPU (e.g., driver temporarily
+    unavailable) still surface real CUDA misconfiguration errors instead of
+    silently no-oping them.
     """
-    # Check if we're on a device that doesn't have full CUDA support
-    device_type = DEVICE.type
-
-    if device_type == "cuda":
-        # True CUDA - no fallback needed
+    # Only patch when PyTorch has no CUDA support compiled in.
+    if torch.version.cuda is not None:
         return
 
-    logger.info(f"Setup CUDA fallback for device type: {device_type}")
+    device_type = DEVICE.type
+    logger.info(f"Setup CUDA fallback for non-CUDA PyTorch build (device: {device_type})")
 
     # Create safe no-op implementations for CUDA functions
     def safe_cuda_synchronize(device: object = None) -> None: