tile-ai
diff --git a/‎.github/workflows/ci.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/ci.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎3rdparty/tvm b/‎3rdparty/tvm
diff --git a/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/blocksparse_attention/benchmark_library_dense_fmha.py
Lines changed: 1 addition & 1 deletion b/‎benchmark/blocksparse_attention/benchmark_library_dense_fmha.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/blocksparse_attention/benchmark_tilelang_block_sparse_fmha.py
Lines changed: 1 addition & 1 deletion b/‎benchmark/blocksparse_attention/benchmark_tilelang_block_sparse_fmha.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/blocksparse_attention/benchmark_torch_block_sparse_fmha.py
Lines changed: 1 addition & 1 deletion b/‎benchmark/blocksparse_attention/benchmark_torch_block_sparse_fmha.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/blocksparse_attention/benchmark_triton_block_sparse_fmha.py
Lines changed: 1 addition & 1 deletion b/‎benchmark/blocksparse_attention/benchmark_triton_block_sparse_fmha.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/matmul/benchmark_matmul_intrinsic.py
Lines changed: 1 addition & 1 deletion b/‎benchmark/matmul/benchmark_matmul_intrinsic.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/blocksparse_attention/block_sparse_attn_triton.py
Lines changed: 1 addition & 1 deletion b/‎examples/blocksparse_attention/block_sparse_attn_triton.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/convolution/example_convolution.py
Lines changed: 7 additions & 6 deletions b/‎examples/convolution/example_convolution.py
Lines changed: 7 additions & 6 deletions
diff --git a/‎examples/deepseek_deepgemm/example_deepgemm_fp8_2xAcc.py
Lines changed: 5 additions & 6 deletions b/‎examples/deepseek_deepgemm/example_deepgemm_fp8_2xAcc.py
Lines changed: 5 additions & 6 deletions
@@ -67,4 +67,5 @@ jobs:
       run: |
         source tilelang_ci/bin/activate
         cd testing/python
+        export TILELANG_CLEAR_CACHE=1
         python -m pytest
@@ -1,4 +1,4 @@
-# Copyright(c) Microsoft Corporation.
+# Copyright(c) Tile-AI Corporation.
 # Licensed under the MIT License.
 # Learn a lot from the MLC - LLM Project
 # https: // github.com/mlc-ai/mlc-llm/blob/main/CMakeLists.txt
 
@@ -87,7 +87,7 @@ Or install locally:
 sudo apt-get update
 sudo apt-get install -y python3-setuptools gcc libtinfo-dev zlib1g-dev build-essential cmake libedit-dev libxml2-dev
 
-pip install .  # with -e option if you want to install in editable mode
+pip install -e . -v # remove -e option if you don't want to install in editable mode, -v for verbose output
 ```
 
 ### Method 2: Build from Source
 
@@ -1,4 +1,4 @@
-# Copyright (c) Microsoft Corporation.
+# Copyright (c) Tile-AI Corporation.
 # Licensed under the MIT License.
 # ruff: noqa
 import torch
 
@@ -1,4 +1,4 @@
-# Copyright (c) Microsoft Corporation.
+# Copyright (c) Tile-AI Corporation.
 # Licensed under the MIT License.
 # ruff: noqa
 import math
 
@@ -1,4 +1,4 @@
-# Copyright (c) Microsoft Corporation.
+# Copyright (c) Tile-AI Corporation.
 # Licensed under the MIT License.
 # ruff: noqa
 import math
 
@@ -1,4 +1,4 @@
-# Copyright (c) Microsoft Corporation.
+# Copyright (c) Tile-AI Corporation.
 # Licensed under the MIT License.
 # ruff: noqa
 import math
 
@@ -1,4 +1,4 @@
-# Copyright (c) Microsoft Corporation.
+# Copyright (c) Tile-AI Corporation.
 # Licensed under the MIT License.
 
 import argparse
 
@@ -1,4 +1,4 @@
-# Copyright (c) Microsoft Corporation.
+# Copyright (c) Tile-AI Corporation.
 # Licensed under the MIT License.
 # ruff: noqa: E712
 import math
 
@@ -1,6 +1,7 @@
+# Copyright (c) Tile-AI Corporation.
+# Licensed under the MIT License.
 import torch
 import tilelang
-from tilelang import Profiler
 from tilelang.autotuner import *
 import tilelang.language as T
 import itertools
@@ -145,14 +146,14 @@ def ref_program(A, B, stride, padding, dilation):
             N, C, H, W, F, K, S, D, P, tune=args.tune)(
                 block_M=256, block_N=128, block_K=64, num_stages=4, threads=256)
         ref_program = partial(ref_program, stride=S, padding=P, dilation=D)
-        mod, params = tilelang.lower(program)
-        mod = Profiler(mod, params, [2], tilelang.TensorSupplyType.Normal)
-        mod.assert_allclose(ref_program, rtol=0.01, atol=0.01)
+        kernel = tilelang.compile(program, out_idx=[2])
+        profiler = kernel.get_profiler(tilelang.TensorSupplyType.Normal)
+        profiler.assert_allclose(ref_program, rtol=0.01, atol=0.01)
         print("All checks pass.")
-        latency = mod.do_bench(ref_program, warmup=500)
+        latency = profiler.do_bench(ref_program, warmup=500)
         print("Ref: {:.2f} ms".format(latency))
         print("Ref: {:.2f} TFlops".format(total_flops / latency * 1e-9))
-        latency = mod.do_bench(mod.func, warmup=500)
+        latency = profiler.do_bench(warmup=500)
         print("Tile-lang: {:.2f} ms".format(latency))
         print("Tile-lang: {:.2f} TFlops".format(total_flops / latency * 1e-9))
     else:
 
@@ -148,8 +148,8 @@ def calc_diff(x, y):
 
 def assert_tl_gemm_correctness(M, N, K, in_dtype, out_dtype, accum_dtype):
     gemm = tl_gemm(M, N, K, in_dtype, out_dtype, accum_dtype)
-    mod, params = TL.lower(gemm)
-    src_code = mod.imported_modules[0].get_source()
+    kernel = TL.compile(gemm, out_idx=[])
+    src_code = kernel.get_kernel_source()
 
     # src_code is the generated cuda source
     assert src_code is not None
@@ -165,16 +165,15 @@ def assert_tl_gemm_correctness(M, N, K, in_dtype, out_dtype, accum_dtype):
 
     C = torch.zeros(M, N, device="cuda", dtype=out_dtype)
 
-    mod = TL.Profiler(mod, params, [], TL.TensorSupplyType.Integer)
-
-    mod(A_fp8, B_fp8, C, A_scale, B_scale)
+    kernel(A_fp8, B_fp8, C, A_scale, B_scale)
     # Get Reference Result
     ref_c = ref_deepgemm_fp8(A_fp8, B_fp8, A_scale, B_scale, out_dtype)
     diff = calc_diff(C, ref_c)
     print(f"diff: {diff}")
     assert diff < 1e-3
 
-    latency = mod.do_bench(mod.func, warmup=25)
+    profiler = kernel.get_profiler()
+    latency = profiler.do_bench(warmup=25)
     # Ensure that the latency is not None
     assert latency is not None
     print(f"latency: {latency} ms")
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Copyright(c) Microsoft Corporation.`
	`1`	`+# Copyright(c) Tile-AI Corporation.`
`2`	`2`	`# Licensed under the MIT License.`
`3`	`3`	`# Learn a lot from the MLC - LLM Project`
`4`	`4`	`# https: // github.com/mlc-ai/mlc-llm/blob/main/CMakeLists.txt`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Copyright (c) Microsoft Corporation.`
	`1`	`+# Copyright (c) Tile-AI Corporation.`
`2`	`2`	`# Licensed under the MIT License.`
`3`	`3`	`# ruff: noqa`
`4`	`4`	`import torch`