From eaab86d204bcd00bd8f71add2185576d83bc41b2 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 16 Mar 2026 01:49:25 +0100 Subject: [PATCH 1/4] Add CUDA fallback handling for non-CUDA builds Implement CUDA fallback handling for non-CUDA PyTorch builds, including safe no-op implementations for CUDA functions and logging. --- backend/ltx2_server.py | 87 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/backend/ltx2_server.py b/backend/ltx2_server.py index ad1308489..7f2bb87e4 100644 --- a/backend/ltx2_server.py +++ b/backend/ltx2_server.py @@ -42,12 +42,94 @@ logging.basicConfig(level=logging.INFO, handlers=[console_handler]) logger = logging.getLogger(__name__) +# ============================================================ +# CUDA Fallback Handling (for non-CUDA PyTorch builds on MPS/CPU) +# ============================================================ + +def _setup_cuda_fallback() -> None: + """ + Monkey-patch torch.cuda functions to handle cases where PyTorch is not + compiled with CUDA support (e.g., running on MPS or CPU). + + The ltx-pipelines library calls torch.cuda.synchronize() unconditionally, + which fails with "Torch not compiled with CUDA enabled" on non-CUDA builds. + """ + # Check if we're on a device that doesn't have full CUDA support + device_type = DEVICE.type + + if device_type == "cuda": + # True CUDA - no fallback needed + return + + logger.info(f"Setup CUDA fallback for device type: {device_type}") + + # Create safe no-op implementations for CUDA functions + def safe_cuda_synchronize() -> None: + """No-op synchronize for non-CUDA devices.""" + if device_type == "mps": + try: + torch.mps.synchronize() + except Exception: + pass + + def safe_cuda_empty_cache() -> None: + """No-op empty_cache for non-CUDA devices.""" + if device_type == "mps": + try: + torch.mps.empty_cache() + except Exception: + pass + + def safe_cuda_memory_reserved() -> int: + """Return 0 for memory reserved on non-CUDA devices.""" + return 0 + + def safe_cuda_memory_allocated() -> int: + """Return 0 for memory allocated on non-CUDA devices.""" + return 0 + + def safe_cuda_get_device_name(device: object = None) -> str: + """Return device name for non-CUDA devices.""" + if device_type == "mps" and hasattr(torch, 'mps'): + return "Apple Silicon MPS" + return "CPU" + + def safe_cuda_get_device_capability(device: object = None) -> tuple[int, int]: + """Return (0, 0) for non-CUDA devices.""" + return (0, 0) + + # Patch torch.cuda module + if not hasattr(torch.cuda, "_ltx_original_synchronize"): + # Store original functions if they exist + try: + torch.cuda._ltx_original_synchronize = torch.cuda.synchronize # type: ignore[attr-defined] + except AttributeError: + pass + + # Replace with safe implementations + torch.cuda.synchronize = safe_cuda_synchronize # type: ignore[assignment] + torch.cuda.empty_cache = safe_cuda_empty_cache # type: ignore[assignment] + torch.cuda.memory_reserved = safe_cuda_memory_reserved # type: ignore[assignment] + torch.cuda.memory_allocated = safe_cuda_memory_allocated # type: ignore[assignment] + torch.cuda.get_device_name = safe_cuda_get_device_name # type: ignore[assignment] + torch.cuda.get_device_capability = safe_cuda_get_device_capability # type: ignore[assignment] + + logger.info("CUDA fallback patch applied successfully") + + # ============================================================ # SageAttention Integration # ============================================================ use_sage_attention = os.environ.get("USE_SAGE_ATTENTION", "1") == "1" _sageattention_runtime_fallback_logged = False +# Check for MPS device - SageAttention doesn't support MPS +_is_mps_device = hasattr(torch.backends, "mps") and torch.backends.mps.is_available() + +if use_sage_attention and _is_mps_device: + logger.info("SageAttention disabled - MPS device detected (not supported by SageAttention)") + use_sage_attention = False + if use_sage_attention: try: from sageattention import sageattn # type: ignore[reportMissingImports] @@ -107,12 +189,17 @@ def _get_device() -> torch.device: return torch.device("cuda") if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): return torch.device("mps") + # Fallback to CPU if no GPU available + logger.warning("No CUDA or MPS device available, using CPU") return torch.device("cpu") DEVICE = _get_device() DTYPE = torch.bfloat16 +# Setup CUDA fallback for non-CUDA PyTorch builds (MPS/CPU support) +_setup_cuda_fallback() + def _resolve_app_data_dir() -> Path: env_path = os.environ.get("LTX_APP_DATA_DIR") if not env_path: From 80ca911c7cf077397867eaa4774de13e42faab7f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 01:00:17 +0000 Subject: [PATCH 2/4] Initial plan From bcd6f8d64d25e6d6a0cb038f8197ebf9fba02074 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 01:02:51 +0000 Subject: [PATCH 3/4] Fix CUDA fallback wrapper signatures and exception handling - Add optional `device` param to safe_cuda_synchronize, safe_cuda_memory_reserved, and safe_cuda_memory_allocated to match the real torch.cuda API signatures - Replace bare `except Exception: pass` with specific (RuntimeError, AttributeError) catches and DEBUG-level logging in MPS fallback wrappers - Update _setup_cuda_fallback docstring to reflect runtime-device-based condition Co-authored-by: lmangani <1423657+lmangani@users.noreply.github.com> --- backend/ltx2_server.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/backend/ltx2_server.py b/backend/ltx2_server.py index 7f2bb87e4..ef89fd48d 100644 --- a/backend/ltx2_server.py +++ b/backend/ltx2_server.py @@ -48,43 +48,45 @@ def _setup_cuda_fallback() -> None: """ - Monkey-patch torch.cuda functions to handle cases where PyTorch is not - compiled with CUDA support (e.g., running on MPS or CPU). - + Monkey-patch torch.cuda functions to handle cases where the active device + is not CUDA (e.g., MPS or CPU). This is triggered at runtime based on the + selected device, so it also applies to CUDA-built PyTorch installations + when no CUDA GPU is in use. + The ltx-pipelines library calls torch.cuda.synchronize() unconditionally, which fails with "Torch not compiled with CUDA enabled" on non-CUDA builds. """ # Check if we're on a device that doesn't have full CUDA support device_type = DEVICE.type - + if device_type == "cuda": # True CUDA - no fallback needed return - + logger.info(f"Setup CUDA fallback for device type: {device_type}") - + # Create safe no-op implementations for CUDA functions - def safe_cuda_synchronize() -> None: - """No-op synchronize for non-CUDA devices.""" + def safe_cuda_synchronize(device: object = None) -> None: + """No-op synchronize for non-CUDA devices; delegates to MPS when available.""" if device_type == "mps": try: torch.mps.synchronize() - except Exception: - pass - + except (RuntimeError, AttributeError) as exc: + logger.debug("MPS synchronize fallback failed: %s", exc) + def safe_cuda_empty_cache() -> None: - """No-op empty_cache for non-CUDA devices.""" + """No-op empty_cache for non-CUDA devices; delegates to MPS when available.""" if device_type == "mps": try: torch.mps.empty_cache() - except Exception: - pass - - def safe_cuda_memory_reserved() -> int: + except (RuntimeError, AttributeError) as exc: + logger.debug("MPS empty_cache fallback failed: %s", exc) + + def safe_cuda_memory_reserved(device: object = None) -> int: """Return 0 for memory reserved on non-CUDA devices.""" return 0 - - def safe_cuda_memory_allocated() -> int: + + def safe_cuda_memory_allocated(device: object = None) -> int: """Return 0 for memory allocated on non-CUDA devices.""" return 0 From 468dc932d85b81cf12f97361b78d1b51399954b4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 01:07:44 +0000 Subject: [PATCH 4/4] Gate CUDA fallback on torch.version.cuda is None Only apply torch.cuda.* monkey-patches when PyTorch was compiled without CUDA support (torch.version.cuda is None). CUDA-capable builds running on CPU due to driver issues now surface real errors instead of silently no-oping. Update docstring to explain the intent precisely. Co-authored-by: lmangani <1423657+lmangani@users.noreply.github.com> --- backend/ltx2_server.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/backend/ltx2_server.py b/backend/ltx2_server.py index ef89fd48d..6c44ec8f7 100644 --- a/backend/ltx2_server.py +++ b/backend/ltx2_server.py @@ -48,22 +48,23 @@ def _setup_cuda_fallback() -> None: """ - Monkey-patch torch.cuda functions to handle cases where the active device - is not CUDA (e.g., MPS or CPU). This is triggered at runtime based on the - selected device, so it also applies to CUDA-built PyTorch installations - when no CUDA GPU is in use. + Monkey-patch torch.cuda functions when PyTorch was not compiled with CUDA + support (i.e., torch.version.cuda is None, as on MPS-only or CPU-only + wheels). The ltx-pipelines library calls torch.cuda.synchronize() unconditionally, - which fails with "Torch not compiled with CUDA enabled" on non-CUDA builds. + which raises "Torch not compiled with CUDA enabled" on non-CUDA builds. + This patch is intentionally limited to non-CUDA builds so that CUDA-capable + installations that happen to be running on CPU (e.g., driver temporarily + unavailable) still surface real CUDA misconfiguration errors instead of + silently no-oping them. """ - # Check if we're on a device that doesn't have full CUDA support - device_type = DEVICE.type - - if device_type == "cuda": - # True CUDA - no fallback needed + # Only patch when PyTorch has no CUDA support compiled in. + if torch.version.cuda is not None: return - logger.info(f"Setup CUDA fallback for device type: {device_type}") + device_type = DEVICE.type + logger.info(f"Setup CUDA fallback for non-CUDA PyTorch build (device: {device_type})") # Create safe no-op implementations for CUDA functions def safe_cuda_synchronize(device: object = None) -> None: