diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index 0853b3aec5cd..aff5a50b93d3 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -237,6 +237,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
         else:
             dit_config["vec_in_dim"] = None
 
+        dit_config["num_heads"] = dit_config["hidden_size"] // sum(dit_config["axes_dim"])
+
         dit_config["depth"] = count_blocks(state_dict_keys, '{}double_blocks.'.format(key_prefix) + '{}.')
         dit_config["depth_single_blocks"] = count_blocks(state_dict_keys, '{}single_blocks.'.format(key_prefix) + '{}.')
         if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma
diff --git a/comfy/sd.py b/comfy/sd.py
index 5a7221620290..b689c0dfcbab 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -1059,9 +1059,9 @@ def detect_te_model(sd):
         return TEModel.JINA_CLIP_2
     if "encoder.block.23.layer.1.DenseReluDense.wi_1.weight" in sd:
         weight = sd["encoder.block.23.layer.1.DenseReluDense.wi_1.weight"]
-        if weight.shape[-1] == 4096:
+        if weight.shape[0] == 10240:
             return TEModel.T5_XXL
-        elif weight.shape[-1] == 2048:
+        elif weight.shape[0] == 5120:
             return TEModel.T5_XL
     if 'encoder.block.23.layer.1.DenseReluDense.wi.weight' in sd:
         return TEModel.T5_XXL_OLD
diff --git a/comfy/text_encoders/cosmos.py b/comfy/text_encoders/cosmos.py
index 448381fa9f9c..f4b40ac68c33 100644
--- a/comfy/text_encoders/cosmos.py
+++ b/comfy/text_encoders/cosmos.py
@@ -36,7 +36,7 @@ def __init__(self, device="cpu", dtype=None, model_options={}):
             if t5_quantization_metadata is not None:
                 model_options = model_options.copy()
                 model_options["t5xxl_quantization_metadata"] = t5_quantization_metadata
-            if dtype is None:
+            if dtype_t5 is not None:
                 dtype = dtype_t5
             super().__init__(device=device, dtype=dtype, model_options=model_options)
     return CosmosTEModel_
diff --git a/comfy/text_encoders/genmo.py b/comfy/text_encoders/genmo.py
index 5daea81355c1..2d7a3fbce80a 100644
--- a/comfy/text_encoders/genmo.py
+++ b/comfy/text_encoders/genmo.py
@@ -32,7 +32,7 @@ def __init__(self, device="cpu", dtype=None, model_options={}):
             if t5_quantization_metadata is not None:
                 model_options = model_options.copy()
                 model_options["t5xxl_quantization_metadata"] = t5_quantization_metadata
-            if dtype is None:
+            if dtype_t5 is not None:
                 dtype = dtype_t5
             super().__init__(device=device, dtype=dtype, model_options=model_options)
     return MochiTEModel_
diff --git a/comfy/text_encoders/pixart_t5.py b/comfy/text_encoders/pixart_t5.py
index e5e5f18bed0c..51c6e50c7be4 100644
--- a/comfy/text_encoders/pixart_t5.py
+++ b/comfy/text_encoders/pixart_t5.py
@@ -36,7 +36,7 @@ def __init__(self, device="cpu", dtype=None, model_options={}):
             if t5_quantization_metadata is not None:
                 model_options = model_options.copy()
                 model_options["t5xxl_quantization_metadata"] = t5_quantization_metadata
-            if dtype is None:
+            if dtype_t5 is not None:
                 dtype = dtype_t5
             super().__init__(device=device, dtype=dtype, model_options=model_options)
     return PixArtTEModel_