Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions tests/unittest/_torch/modeling/test_modeling_qwen2_5vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import unittest
from copy import deepcopy
from dataclasses import dataclass
from pathlib import Path
from typing import List

import torch
Expand Down Expand Up @@ -106,20 +107,24 @@ def __repr__(self) -> str:
class TestQwen2_5_VL(unittest.TestCase):

def get_test_inputs(self, modality: str):

test_data_root = Path(
os.path.join(llm_models_root(), "multimodals", "test_data"))

if modality == "image":
return ["Describe the natural environment in the image."], \
["https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/seashore.png"]
[str(test_data_root / "seashore.png")]
elif modality == "multiple_image":
return ["Describe the difference between the two images."], \
["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png",
"https://huggingface.co/datasets/Sayali9141/traffic_signal_images/resolve/main/61.jpg"]
[str(test_data_root / "inpaint.png"),
str(test_data_root / "61.jpg")]
elif modality == "video":
return ["Tell me what you see in the video briefly."], \
["https://huggingface.co/datasets/Efficient-Large-Model/VILA-inference-demos/resolve/main/OAI-sora-tokyo-walk.mp4"]
[str(test_data_root / "OAI-sora-tokyo-walk.mp4")]
elif modality == "mixture_text_image":
return ["Describe the scene in the image briefly.",
"Who invented the internet?"], \
["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png",
[str(test_data_root / "inpaint.png"),
""]
elif modality == "text":
return ["Who invented the internet?"], []
Expand Down