diff --git a/backend/handlers/generation_handler.py b/backend/handlers/generation_handler.py index 7484929a4..774c64972 100644 --- a/backend/handlers/generation_handler.py +++ b/backend/handlers/generation_handler.py @@ -193,25 +193,25 @@ def fail_generation(self, error: str) -> None: case "gpu": match self.state.gpu_slot: case GpuSlot(generation=GenerationRunning(id=generation_id)) as gpu_slot: - logger.error("Generation %s failed: %s", generation_id, error) + logger.exception("Generation %s failed: %s", generation_id, error) gpu_slot.generation = GenerationError(id=generation_id, error=error) case _: - logger.error("Generation failed without active running job: %s", error) + logger.exception("Generation failed without active running job: %s", error) return case "api": match self.state.api_generation: case GenerationRunning(id=generation_id): - logger.error("Generation %s failed: %s", generation_id, error) + logger.exception("Generation %s failed: %s", generation_id, error) self.state.api_generation = GenerationError(id=generation_id, error=error) case _: - logger.error("Generation failed without active running job: %s", error) + logger.exception("Generation failed without active running job: %s", error) return case _: if isinstance(self._gpu_generation(), GenerationCancelled) or isinstance( self.state.api_generation, GenerationCancelled ): return - logger.error("Generation failed without active running job: %s", error) + logger.exception("Generation failed without active running job: %s", error) return @with_state_lock diff --git a/backend/handlers/queue_worker.py b/backend/handlers/queue_worker.py index e684633e3..fc19237fe 100644 --- a/backend/handlers/queue_worker.py +++ b/backend/handlers/queue_worker.py @@ -69,7 +69,7 @@ def _run_job(self, job: QueueJob, executor: JobExecutor, slot: str) -> None: result_paths = executor.execute(job) self._queue.update_job(job.id, status="complete", progress=100, phase="complete", result_paths=result_paths) except Exception as exc: - logger.error("Job %s failed: %s", job.id, exc) + logger.exception("Job %s failed: %s", job.id, exc) self._queue.update_job(job.id, status="error", error=str(exc)) finally: with self._lock: diff --git a/backend/handlers/video_generation_handler.py b/backend/handlers/video_generation_handler.py index 562cef526..4b2a40c0a 100644 --- a/backend/handlers/video_generation_handler.py +++ b/backend/handlers/video_generation_handler.py @@ -223,6 +223,7 @@ def get_9_16_size(res: str) -> tuple[int, int]: logger.info("Generation cancelled by user") return GenerateVideoResponse(status="cancelled") + logger.exception("[i2v] Generation failed with exception: %s", e) raise HTTPError(500, str(e)) from e def generate_video( diff --git a/backend/ltx2_server.py b/backend/ltx2_server.py index ad1308489..7f2bb87e4 100644 --- a/backend/ltx2_server.py +++ b/backend/ltx2_server.py @@ -42,12 +42,94 @@ logging.basicConfig(level=logging.INFO, handlers=[console_handler]) logger = logging.getLogger(__name__) +# ============================================================ +# CUDA Fallback Handling (for non-CUDA PyTorch builds on MPS/CPU) +# ============================================================ + +def _setup_cuda_fallback() -> None: + """ + Monkey-patch torch.cuda functions to handle cases where PyTorch is not + compiled with CUDA support (e.g., running on MPS or CPU). + + The ltx-pipelines library calls torch.cuda.synchronize() unconditionally, + which fails with "Torch not compiled with CUDA enabled" on non-CUDA builds. + """ + # Check if we're on a device that doesn't have full CUDA support + device_type = DEVICE.type + + if device_type == "cuda": + # True CUDA - no fallback needed + return + + logger.info(f"Setup CUDA fallback for device type: {device_type}") + + # Create safe no-op implementations for CUDA functions + def safe_cuda_synchronize() -> None: + """No-op synchronize for non-CUDA devices.""" + if device_type == "mps": + try: + torch.mps.synchronize() + except Exception: + pass + + def safe_cuda_empty_cache() -> None: + """No-op empty_cache for non-CUDA devices.""" + if device_type == "mps": + try: + torch.mps.empty_cache() + except Exception: + pass + + def safe_cuda_memory_reserved() -> int: + """Return 0 for memory reserved on non-CUDA devices.""" + return 0 + + def safe_cuda_memory_allocated() -> int: + """Return 0 for memory allocated on non-CUDA devices.""" + return 0 + + def safe_cuda_get_device_name(device: object = None) -> str: + """Return device name for non-CUDA devices.""" + if device_type == "mps" and hasattr(torch, 'mps'): + return "Apple Silicon MPS" + return "CPU" + + def safe_cuda_get_device_capability(device: object = None) -> tuple[int, int]: + """Return (0, 0) for non-CUDA devices.""" + return (0, 0) + + # Patch torch.cuda module + if not hasattr(torch.cuda, "_ltx_original_synchronize"): + # Store original functions if they exist + try: + torch.cuda._ltx_original_synchronize = torch.cuda.synchronize # type: ignore[attr-defined] + except AttributeError: + pass + + # Replace with safe implementations + torch.cuda.synchronize = safe_cuda_synchronize # type: ignore[assignment] + torch.cuda.empty_cache = safe_cuda_empty_cache # type: ignore[assignment] + torch.cuda.memory_reserved = safe_cuda_memory_reserved # type: ignore[assignment] + torch.cuda.memory_allocated = safe_cuda_memory_allocated # type: ignore[assignment] + torch.cuda.get_device_name = safe_cuda_get_device_name # type: ignore[assignment] + torch.cuda.get_device_capability = safe_cuda_get_device_capability # type: ignore[assignment] + + logger.info("CUDA fallback patch applied successfully") + + # ============================================================ # SageAttention Integration # ============================================================ use_sage_attention = os.environ.get("USE_SAGE_ATTENTION", "1") == "1" _sageattention_runtime_fallback_logged = False +# Check for MPS device - SageAttention doesn't support MPS +_is_mps_device = hasattr(torch.backends, "mps") and torch.backends.mps.is_available() + +if use_sage_attention and _is_mps_device: + logger.info("SageAttention disabled - MPS device detected (not supported by SageAttention)") + use_sage_attention = False + if use_sage_attention: try: from sageattention import sageattn # type: ignore[reportMissingImports] @@ -107,12 +189,17 @@ def _get_device() -> torch.device: return torch.device("cuda") if hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): return torch.device("mps") + # Fallback to CPU if no GPU available + logger.warning("No CUDA or MPS device available, using CPU") return torch.device("cpu") DEVICE = _get_device() DTYPE = torch.bfloat16 +# Setup CUDA fallback for non-CUDA PyTorch builds (MPS/CPU support) +_setup_cuda_fallback() + def _resolve_app_data_dir() -> Path: env_path = os.environ.get("LTX_APP_DATA_DIR") if not env_path: