waive a ray test

Superjomn · Superjomn · commit eff7d181362c · 2025-11-24T15:43:03.000+08:00
Signed-off-by: Yan Chunwei &lt;328693+Superjomn@users.noreply.github.com&gt;
diff --git a/tests/unittest/llmapi/test_llm_pytorch.py b/tests/unittest/llmapi/test_llm_pytorch.py
@@ -360,6 +360,7 @@ def _check_llama_7b_multi_lora_evict_load_new_adapters(
 
 
 @skip_gpu_memory_less_than_40gb
+@skip_ray  # https://nvbugs/5682551
 def test_llama_7b_multi_lora_evict_and_reload_lora_gpu_cache():
     """Test eviction and re-loading a previously evicted adapter from the LoRA GPU cache, within a single
     llm.generate call, that's repeated twice.
@@ -454,6 +455,7 @@ def test_llama_7b_peft_cache_config_affects_peft_cache_size():
             cuda_graph_config=None)
 
 
+@skip_ray  # https://nvbugs/5682551
 @skip_gpu_memory_less_than_40gb
 def test_llama_7b_lora_config_overrides_peft_cache_config():
     """Tests that cache size args in lora_config LLM arg override the cache size