@@ -203,8 +203,8 @@ def initialize(self, **kwargs) -> None:
203203 # check this when updating unsloth in the future.
204204 enforce_eager = self .config .training .use_unsloth is True
205205
206+ model = self .config .training .pretrained_model
206207 vllm_kwargs = dict (
207- model = self .config .training .pretrained_model ,
208208 gpu_memory_utilization = max_vram ,
209209 enable_lora = True ,
210210 max_lora_rank = self .config .training .lora_r ,
@@ -214,16 +214,16 @@ def initialize(self, **kwargs) -> None:
214214 # attention_config was added in vLLM 0.12+ but is not present in NGC
215215 # container builds (e.g. nvcr.io/nvidia/vllm:26.02-py3 ships 0.15.1 without it).
216216 # Fall back to VLLM_ATTENTION_BACKEND env var if the kwarg is not accepted.
217- with heartbeat ("Model loading" , logger_name = __name__ , model = self . config . training . pretrained_model ):
217+ with heartbeat ("Model loading" , logger_name = __name__ , model = model ):
218218 if attention_config is not None :
219219 try :
220- self .llm = vLLM (** vllm_kwargs , attention_config = attention_config )
220+ self .llm = vLLM (model , ** vllm_kwargs , attention_config = attention_config )
221221 except TypeError :
222222 if attn_backend not in (None , "auto" ):
223223 os .environ ["VLLM_ATTENTION_BACKEND" ] = attn_backend
224- self .llm = vLLM (** vllm_kwargs )
224+ self .llm = vLLM (model , ** vllm_kwargs )
225225 else :
226- self .llm = vLLM (** vllm_kwargs )
226+ self .llm = vLLM (model , ** vllm_kwargs )
227227
228228 def _build_structured_output_params (self ) -> StructuredOutputsParams | None :
229229 """Build structured output parameters based on generation config.
0 commit comments