[DICP][Ascend] Add some lightllm op tests. (#792)

Reinerzhou · web-flow · commit 5e7a35b77986 · 2024-04-26T21:54:19.000+08:00
diff --git a/dicp/test/ascend_scripts/ops/static.ini b/dicp/test/ascend_scripts/ops/static.ini
@@ -35,6 +35,10 @@ python_files =
                test_index.py
                test_le.py
             ;    test_lift_fresh_copy.py
+               test_lightllm_copy_with_offset.py
+               test_lightllm_incre_attention.py
+               test_lightllm_prompt_attention.py
+               test_lightllm_rotary_emb.py
             ;    test_log.py
                test_logical_or.py
                test_lt.py
diff --git a/dicp/test/op/test_lightllm_copy_with_offset.py b/dicp/test/op/test_lightllm_copy_with_offset.py
@@ -0,0 +1,46 @@
+import pytest
+
+from dicp.vendor.AscendGraph import ext_ops
+from ..common.utils import (
+    torch,
+    dynamo,
+    parse_args,
+    compile_model,
+    get_device,
+    Size,
+    update_dynamo_config,
+)
+
+
+class OpModule(torch.nn.Module):
+    def forward(self, out, k, start_dim, end_dim):
+        res = torch.ops.lightllm.copy_with_offset.default(out, k, start_dim, end_dim)
+        return res
+
+
+model = OpModule()
+args = parse_args()
+compiled_model = compile_model(model, args.backend, args.dynamic)
+
+
+class TestLightllmCopyWithOffset():
+    @pytest.mark.parametrize("dtype", [torch.float32])
+    @pytest.mark.parametrize("sizes", [Size(((8, 8, 16), (6, 8, 16)), ((8, 8, 16), (6, 8, 16))), Size(((8, 16, 32), (6, 16, 32)), ((8, 16, 32), (6, 16, 32)))])
+    @pytest.mark.parametrize("compiled_model", compiled_model)
+    def test_lighllm_copy_with_offset(self, sizes, dtype, compiled_model):
+        device = get_device()
+        size = sizes.dynamic if compiled_model.dynamic else sizes.static
+        input1 = torch.randn(size[0], dtype=dtype)
+        input2 = torch.randn(size[1], dtype=dtype)
+        start_dim = 0
+        end_dim = 6
+
+        dicp_input1 = input1.to(device)
+        dicp_input2 = input2.to(device)
+
+        output = model(input1, input2, start_dim, end_dim)
+        dynamo.reset()
+        update_dynamo_config(compiled_model.dynamic)
+        dicp_output = compiled_model.model(dicp_input1, dicp_input2, start_dim, end_dim)
+
+        assert torch.allclose(output, dicp_output.cpu(), rtol=1e-02, atol=1e-02, equal_nan=True)
diff --git a/dicp/test/op/test_lightllm_incre_attention.py b/dicp/test/op/test_lightllm_incre_attention.py
@@ -0,0 +1,49 @@
+import pytest
+
+from dicp.vendor.AscendGraph import ext_ops
+from ..common.utils import (
+    torch,
+    dynamo,
+    parse_args,
+    compile_model,
+    get_device,
+    Size,
+    update_dynamo_config,
+)
+
+
+class OpModule(torch.nn.Module):
+    def forward(self, q, k, v, int_index_list, max_seq_length):
+        res = torch.ops.lightllm.flash_attention_inference.default(q, k, v, int_index_list, max_seq_length)
+        return res
+
+
+model = OpModule()
+args = parse_args()
+compiled_model = compile_model(model, args.backend, args.dynamic)
+
+
+class TestLightllmIncreAttention():
+    @pytest.mark.parametrize("dtype", [torch.float32])
+    @pytest.mark.parametrize("sizes", [Size(((8, 16), (9,)), ((8, 16), (9,))), Size(((8, 32), (9,)), ((8, 32), (9,)))])
+    @pytest.mark.parametrize("compiled_model", compiled_model)
+    def test_lightllm_incre_attention(self, sizes, dtype, compiled_model):
+        device = get_device()
+        size = sizes.dynamic if compiled_model.dynamic else sizes.static
+        input1 = torch.randn((1,) + size[0], dtype=dtype)
+        input2 = torch.randn(size[1] + size[0], dtype=dtype)
+        input3 = torch.randn(size[1] + size[0], dtype=dtype)
+        input4 = list(size[1])
+        max_seq_length = size[1][0]
+
+        dicp_input1 = input1.to(device)
+        dicp_input2 = input2.to(device)
+        dicp_input3 = input3.to(device)
+        dicp_input4 = input4
+
+        output = model(input1, input2, input3, input4, max_seq_length)
+        dynamo.reset()
+        update_dynamo_config(compiled_model.dynamic)
+        dicp_output = compiled_model.model(dicp_input1, dicp_input2, dicp_input3, dicp_input4, max_seq_length)
+
+        assert torch.allclose(output, dicp_output.cpu(), rtol=1e-02, atol=1e-02, equal_nan=True)
diff --git a/dicp/test/op/test_lightllm_prompt_attention.py b/dicp/test/op/test_lightllm_prompt_attention.py
@@ -0,0 +1,50 @@
+import pytest
+
+from dicp.vendor.AscendGraph import ext_ops
+from ..common.utils import (
+    torch,
+    dynamo,
+    parse_args,
+    compile_model,
+    get_device,
+    Size,
+    update_dynamo_config,
+)
+
+
+class OpModule(torch.nn.Module):
+    def forward(self, q, k, v, seqlen, num_head, head_dim):
+        res = torch.ops.lightllm.prompt_attention_inference.default(q, k, v, seqlen, num_head, head_dim)
+        return res
+
+
+model = OpModule()
+args = parse_args()
+compiled_model = compile_model(model, args.backend, args.dynamic)
+
+
+class TestLightllmPromptAttention():
+    @pytest.mark.parametrize("dtype", [torch.float16])
+    @pytest.mark.parametrize("sizes", [Size(((1, 32, 16, 32), (32,)), ((1, 32, 16, 32), (32,))), Size(((1, 32, 16, 64), (32,)), ((1, 32, 16, 64), (32,)))])
+    @pytest.mark.parametrize("compiled_model", compiled_model)
+    def test_lightllm_prompt_attention(self, sizes, dtype, compiled_model):
+        device = get_device()
+        size = sizes.dynamic if compiled_model.dynamic else sizes.static
+        input1 = torch.randn(size[0], dtype=dtype)
+        input2 = torch.randn(size[0], dtype=dtype)
+        input3 = torch.randn(size[0], dtype=dtype)
+        input4 = torch.tensor(size[1], dtype=torch.int32)
+        num_head = size[0][2]
+        head_dim = size[0][3]
+
+        dicp_input1 = input1.to(device)
+        dicp_input2 = input2.to(device)
+        dicp_input3 = input3.to(device)
+        dicp_input4 = input4.to(device)
+
+        output = model(input1, input2, input3, input4, num_head, head_dim).view(size[1][0], num_head * head_dim).half()
+        dynamo.reset()
+        update_dynamo_config(compiled_model.dynamic)
+        dicp_output = compiled_model.model(dicp_input1.view(1, -1, num_head * head_dim), dicp_input2.view(1, -1, num_head * head_dim), dicp_input3.view(1, -1, num_head * head_dim), dicp_input4, num_head, head_dim).view(size[1][0], num_head * head_dim)
+
+        assert torch.allclose(output, dicp_output.cpu(), rtol=1e-02, atol=1e-02, equal_nan=True)
diff --git a/dicp/test/op/test_lightllm_rotary_emb.py b/dicp/test/op/test_lightllm_rotary_emb.py
@@ -0,0 +1,46 @@
+import pytest
+
+from dicp.vendor.AscendGraph import ext_ops
+from ..common.utils import (
+    torch,
+    dynamo,
+    parse_args,
+    compile_model,
+    get_device,
+    Size,
+    update_dynamo_config,
+)
+
+
+class OpModule(torch.nn.Module):
+    def forward(self, x, cos, sin):
+        res = torch.ops.lightllm.rotary_emb.default(x, cos, sin)
+        return res
+
+
+model = OpModule()
+args = parse_args()
+compiled_model = compile_model(model, args.backend, args.dynamic)
+
+
+class TestLightllmRotaryEmb():
+    @pytest.mark.parametrize("dtype", [torch.float32])
+    @pytest.mark.parametrize("sizes", [Size(((2, 32, 64), (2, 32), (2, 32)), ((2, 32, 64), (2, 32), (2, 32))), Size(((2, 32, 128), (2, 64), (2, 64)), ((2, 32, 128), (2, 64), (2, 64)))])
+    @pytest.mark.parametrize("compiled_model", compiled_model)
+    def test_lightllm_rotary_emb(self, sizes, dtype, compiled_model):
+        device = get_device()
+        size = sizes.dynamic if compiled_model.dynamic else sizes.static
+        input1 = torch.randn(size[0], dtype=dtype)
+        input2 = torch.randn(size[1], dtype=dtype)
+        input3 = torch.randn(size[2], dtype=dtype)
+
+        dicp_input1 = input1.to(device)
+        dicp_input2 = input2.to(device)
+        dicp_input3 = input3.to(device)
+
+        output = model(input1, input2, input3)
+        dynamo.reset()
+        update_dynamo_config(compiled_model.dynamic)
+        dicp_output = compiled_model.model(dicp_input1, dicp_input2, dicp_input3)
+
+        assert torch.allclose(output, dicp_output.cpu(), rtol=1e-02, atol=1e-02, equal_nan=True)