Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
95 commits
Select commit Hold shift + click to select a range
6e9fb85
Perf支持调Gems
jiahuiwen-baai Dec 23, 2024
ca432fb
test
jiahuiwen-baai Dec 24, 2024
bec5e58
test
jiahuiwen-baai Dec 24, 2024
828de19
test
jiahuiwen-baai Dec 24, 2024
2d5abd6
test
jiahuiwen-baai Dec 24, 2024
e63dbc3
rename
jiahuiwen-baai Dec 25, 2024
02c5177
add log
jiahuiwen-baai Dec 25, 2024
a150002
add log
jiahuiwen-baai Dec 25, 2024
d796265
test
jiahuiwen-baai Dec 25, 2024
d9331bb
test
jiahuiwen-baai Dec 26, 2024
66669b0
test
jiahuiwen-baai Dec 26, 2024
21cc84a
add log
jiahuiwen-baai Dec 30, 2024
bc1bd1a
test
jiahuiwen-baai Dec 30, 2024
af942f0
test
jiahuiwen-baai Dec 30, 2024
393aee1
test
jiahuiwen-baai Dec 30, 2024
fe48eb1
'test'
jiahuiwen-baai Dec 30, 2024
272752d
test
jiahuiwen-baai Dec 30, 2024
ab8c55b
test
jiahuiwen-baai Dec 31, 2024
4dbb506
Changes
jiahuiwen-baai Dec 31, 2024
893de0f
Merge branch 'main' of https://github.com/FlagOpen/FlagPerf into jhw-…
jiahuiwen-baai Jan 3, 2025
930331c
Merge pull request #1 from FlagOpen/main
jiahuiwen-baai Jan 3, 2025
eab2e12
test
jiahuiwen-baai Jan 3, 2025
7b1a3db
test
jiahuiwen-baai Jan 6, 2025
f54ca51
test
jiahuiwen-baai Jan 6, 2025
825db4a
test
jiahuiwen-baai Jan 6, 2025
fa8a58f
test
jiahuiwen-baai Jan 6, 2025
7c2b934
test + log
jiahuiwen-baai Jan 6, 2025
c8ac5b1
test
jiahuiwen-baai Jan 6, 2025
e696a2c
test
jiahuiwen-baai Jan 6, 2025
e55b090
test
jiahuiwen-baai Jan 6, 2025
a0845f0
test
jiahuiwen-baai Jan 6, 2025
e50fd81
test add log
jiahuiwen-baai Jan 6, 2025
c298033
test
jiahuiwen-baai Jan 6, 2025
a1d8c9c
test
jiahuiwen-baai Jan 6, 2025
c8f2676
test
jiahuiwen-baai Jan 6, 2025
b511b7e
test
jiahuiwen-baai Jan 7, 2025
09bc435
test
jiahuiwen-baai Jan 7, 2025
8d78640
test
jiahuiwen-baai Jan 7, 2025
8879d86
test
jiahuiwen-baai Jan 7, 2025
25e32fe
test
jiahuiwen-baai Jan 7, 2025
501a80b
test
jiahuiwen-baai Jan 7, 2025
a96cf88
test
jiahuiwen-baai Jan 7, 2025
234647f
test
jiahuiwen-baai Jan 7, 2025
a220710
test
jiahuiwen-baai Jan 7, 2025
0171e42
test
jiahuiwen-baai Jan 7, 2025
4aafdfe
test
jiahuiwen-baai Jan 7, 2025
8a46ca0
Changes
jiahuiwen-baai Jan 8, 2025
f09bff7
test
jiahuiwen-baai Jan 8, 2025
83c3d2a
test
jiahuiwen-baai Jan 8, 2025
4e996dc
test
githublic Jan 8, 2025
08d3a9d
test
jiahuiwen-baai Jan 9, 2025
ff84256
test
jiahuiwen-baai Jan 9, 2025
1cb0f0a
test
jiahuiwen-baai Jan 9, 2025
dc086cf
test
jiahuiwen-baai Jan 9, 2025
872ae8a
test
jiahuiwen-baai Jan 10, 2025
c9f0727
test
jiahuiwen-baai Jan 10, 2025
6a83f4d
test
jiahuiwen-baai Jan 13, 2025
1e12413
test
jiahuiwen-baai Jan 13, 2025
9e7f5ac
test
jiahuiwen-baai Jan 13, 2025
a85addf
test
jiahuiwen-baai Jan 14, 2025
50a49a4
test
jiahuiwen-baai Jan 14, 2025
fea2644
test
jiahuiwen-baai Jan 14, 2025
60bc23c
test
jiahuiwen-baai Jan 15, 2025
587aea5
test
jiahuiwen-baai Jan 15, 2025
fd09e05
test
jiahuiwen-baai Jan 15, 2025
8db26c0
test
jiahuiwen-baai Jan 23, 2025
0bfd71a
test
jiahuiwen-baai Jan 23, 2025
733bae9
test
jiahuiwen-baai Jan 23, 2025
ae6acde
test
jiahuiwen-baai Jan 23, 2025
c3d8359
test
jiahuiwen-baai Jan 23, 2025
48a05da
test
jiahuiwen-baai Jan 23, 2025
720d28a
test
jiahuiwen-baai Jan 23, 2025
9edbf2d
test
jiahuiwen-baai Jan 23, 2025
e992b49
Merge pull request #2 from FlagOpen/main
jiahuiwen-baai Feb 14, 2025
9fd6765
update
jiahuiwen-baai Feb 14, 2025
10909b7
delate venv
jiahuiwen-baai Feb 14, 2025
0131d25
update
jiahuiwen-baai Feb 17, 2025
3f86f43
Merge pull request #3 from jiahuiwen-baai/jhw-baai
jiahuiwen-baai Feb 19, 2025
a37b848
test
jiahuiwen-baai Apr 9, 2025
1acf9b3
test
jiahuiwen-baai Apr 9, 2025
de362cc
test
jiahuiwen-baai Apr 10, 2025
8943d49
调试
jiahuiwen-baai Apr 14, 2025
7ec5985
调试
jiahuiwen-baai Apr 15, 2025
c0a45c3
调试
jiahuiwen-baai Apr 17, 2025
bf12dfc
调试
jiahuiwen-baai Apr 17, 2025
0117aea
调试
jiahuiwen-baai Apr 17, 2025
a210031
调试
jiahuiwen-baai Apr 17, 2025
4533581
调试
jiahuiwen-baai Apr 17, 2025
330b618
调试
jiahuiwen-baai Apr 18, 2025
867f1f1
test
jiahuiwen-baai Apr 18, 2025
f666596
test
jiahuiwen-baai Apr 18, 2025
c4dd250
test
jiahuiwen-baai Apr 18, 2025
0f0a719
Merge pull request #4 from jiahuiwen-baai/jhw-baai
jiahuiwen-baai Apr 18, 2025
628cde9
test
jiahuiwen-baai Apr 21, 2025
ff24eee
Merge pull request #5 from jiahuiwen-baai/jhw-baai
jiahuiwen-baai Apr 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion operation/benchmarks/drivers/calculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import time
from loguru import logger
from triton.testing import do_bench as kernel_bench
import os
import subprocess


# test operation correctness
def do_correctness(operation):
flaggems_dir = os.getenv("FLAGGEMS_WORK_DIR", "/")
gems_repo = subprocess.check_output(
["find", flaggems_dir, "-type", "d", "-name", "FlagGems"], text=True).strip()

p = subprocess.Popen(
f"cd {os.path.join(gems_repo, 'tests')} && python3 test_named_ops.py --name {operation} --device cpu ",
shell=True
Expand All @@ -22,8 +23,44 @@ def do_correctness(operation):

return p.returncode


# test operation performance
def do_performance(mode, warmup, result_log_dir):
flaggems_dir = os.getenv("FLAGGEMS_WORK_DIR", "/")
gems_repo = subprocess.check_output(
["find", flaggems_dir, "-type", "d", "-name", "FlagGems"], text=True).strip()
del_file_path = os.path.join(gems_repo, 'benchmark')
# 删除历史日志
# del_file = os.path.join(del_file_path,
# f"result_test_distribution_perf--level_core--mode_{mode}--warmup_{warmup}--record_log.log")
del_file = os.path.join(del_file_path,
f"result--level_core--mode_{mode}--warmup_{warmup}--record_log.log")
del_process = subprocess.Popen(["rm", del_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
del_process.communicate()
p = subprocess.Popen(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

开始前先删除一下之前的log 日志,以免混淆

# 执行所有算子命令
f"cd {os.path.join(gems_repo, 'benchmark')} && pytest --level core --mode {mode} --warmup {warmup} --record log",
# 执行单个算子命令
# f"cd {os.path.join(gems_repo, 'benchmark')} && pytest -m mm --level core --mode {mode} --warmup {warmup} --record log -s",
# 执行单个文件命令
# f"cd {os.path.join(gems_repo, 'benchmark')} && pytest test_distribution_perf.py --level core --mode {mode} --warmup {warmup} --record log",
shell=True
)
p.wait()

# 全量执行日志路径
log_dir = os.path.join(gems_repo, "benchmark",
f"result--level_core--mode_{mode}--warmup_{warmup}--record_log.log")
# 仅执行单个文件日志路径
# log_dir = os.path.join(gems_repo, "benchmark",
# f"result_test_distribution_perf--level_core--mode_{mode}--warmup_{warmup}--record_log.log")
cp_subprocess = subprocess.run(["cp", f"{log_dir}", f"{result_log_dir}/result.log.txt"], check=True)
return p.returncode, cp_subprocess.returncode


grad_outputs = None


def do(exec_func, exec_args, bp=False):
global grad_outputs
if bp:
Expand Down
103 changes: 103 additions & 0 deletions operation/benchmarks/drivers/parse_log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Copyright (c) 2024 BAAI. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License")
# !/usr/bin/env python3
# -*- coding: UTF-8 -*-

import json
import os
from collections import defaultdict
from loguru import logger


def parse_log_file(spectflops, mode, warmup, log_dir, result_log_path):
log_file = os.path.join(log_dir, "result.log.txt")
save_log_path = os.path.join(result_log_path, "result.json")
if os.path.isfile(save_log_path):
with open(save_log_path, 'r+', encoding='utf-8') as file_r:
try:
file_r_json = file_r.read()
res = json.loads(file_r_json)
result_data = get_result_data(log_file, res, spectflops, mode, warmup)
file_r.seek(0)
file_r.write(json.dumps(result_data, ensure_ascii=False))
file_r.truncate()
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里是想合并两次的结果?

except json.decoder.JSONDecodeError:
print("JSONDecodeError json file content is None")
else:
with open(save_log_path, 'w') as file_w:
res = defaultdict(dict)
result_data = get_result_data(log_file, res, spectflops, mode, warmup)
file_w.write(json.dumps(result_data, ensure_ascii=False))


""" 参数说明
# 时延:1 无预热时延 Latency-No warmup:no_warmup_latency,2 预热时延 Latency-Warmup:warmup_latency
# 吞吐率:3 Raw-Throughput原始吞吐:raw_throughput, 4 Core-Throughput是核心吞吐:core_throughput
# 算力:5 实际算力开销:ctflops, 6 实际算力利用率:cfu, 7 实际算力开销-内核时间:ktflops, 8 实际算力利用率-内核时间:kfu
"""
def get_result_data(log_file, res, spectflops, mode, warmup):
with open(log_file, 'r') as file_r:
lines = file_r.readlines()
for line in lines:
if line.startswith("[INFO]"):
json_data = line[6:].strip()
try:
data = json.loads(json_data)
op_name = data.get("op_name")
dtype = data.get("dtype")
results = data.get("result")
for result in results:
shape_detail = result.get("shape_detail")
latency_base = result.get("latency_base")
if mode == "cpu" and warmup == "0":
no_warmup_latency = result.get("latency")
parse_data = {
"op_name": op_name,
"dtype": dtype,
"shape_detail": shape_detail,
"latency_base_cpu_nowarm": latency_base,
"no_warmup_latency": no_warmup_latency
}
res[f"{op_name}_{dtype}_{shape_detail}"].update(parse_data)
elif mode == "cpu" and warmup != "0":
warmup_latency = result.get("latency")
raw_throughput = 1 / float(warmup_latency)
ctflops = result.get("tflops")
if ctflops is None:
cfu = None
else:
cfu = round(100.0 * float(ctflops) / 1e12 / float(spectflops), 2)
parse_data = {
"op_name": op_name,
"dtype": dtype,
"shape_detail": shape_detail,
"latency_base_cpu_warm": latency_base,
"warmup_latency": warmup_latency,
"raw_throughput": raw_throughput,
"ctflops": ctflops,
"cfu": cfu
}
res[f"{op_name}_{dtype}_{shape_detail}"].update(parse_data)
elif mode == "cuda" and warmup != "0":
kerneltime = result.get("latency")
core_throughput = 1 / float(kerneltime)
ktflops = result.get("tflops")
if ktflops is None:
kfu = None
else:
kfu = round(100.0 * float(ktflops) / 1E12 / float(spectflops), 2)
parse_data = {
"op_name": op_name,
"dtype": dtype,
"shape_detail": shape_detail,
"latency_base_cuda_warm": latency_base,
"kerneltime": kerneltime,
"core_throughput": core_throughput,
"ktflops": ktflops,
"kfu": kfu
}
res[f"{op_name}_{dtype}_{shape_detail}"].update(parse_data)
except json.JSONDecodeError as e:
logger.error(f"Error decoding JSON: {e}")
return res
128 changes: 128 additions & 0 deletions operation/benchmarks/opv2/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Copyright (c) 2024 BAAI. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License")
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import torch
import os
import time
from argparse import ArgumentParser, Namespace
import yaml
import sys
import subprocess

sys.path.append("..")
from drivers.utils import *
from drivers.calculate import *
from drivers.parse_log import *


def parse_args():
parser = ArgumentParser(description=" ")

parser.add_argument("--vendor",
type=str,
required=True,
help="vendor name like nvidia")
parser.add_argument("--case_name",
type=str,
required=True,
help="op name like mm")
parser.add_argument("--spectflops",
type=str,
required=True,
help="spectflops of current dataformat")

parser.add_argument("--dataformat",
type=str,
required=True,
help="like FP32,FP16")

parser.add_argument("--oplib",
type=str,
required=True,
help="impl like pytorch/flaggems/cpp")

parser.add_argument("--chip",
type=str,
required=True,
help="chip like A100_40_SXM")

parser.add_argument("--mode",
type=str,
required=True,
help="mode like cpu")

parser.add_argument("--warmup",
type=str,
required=True,
help="warmup")

parser.add_argument("--log_dir",
type=str,
required=True,
help="abs log dir")

parser.add_argument("--result_log_path",
type=str,
required=True,
help="result log path for FlagPerf/operation/result")

args, unknown_args = parser.parse_known_args()
args.unknown_args = unknown_args
return args


def main(config):
correctness = do_correctness(config.case_name)
correctness = correctness == 0

# test operation performance
performance = do_performance(config.mode, config.warmup, config.log_dir)
performance = performance == 0
parse_log_file(config.spectflops, config.mode, config.warmup, config.log_dir, config.result_log_path)

# dtype = {
# "FP32": torch.float32,
# "FP16": torch.float16,
# "BF16": torch.bfloat16,
# "INT32": torch.int32,
# "INT16": torch.int16,
# "BOOL": torch.bool
# }
# set_ieee_float32(config.vendor)
#
#
# m = case_config.Melements
#
#
# a = torch.randn(m, 1024, 1024, dtype=dtype[config.dataformat]).to(0)
#
# latency_nowarm, latency_warm, cputime, kerneltime = do_test(
# torch.abs, (a, ), host_device_sync, config, case_config)
#
# op2flops = lambda x: x * m * 1024 * 1024
#
# perf_result = cal_perf(cputime, kerneltime, op2flops,
# config.spectflops)
# print_result(config, config.case_name, *perf_result, correctness,
# latency_nowarm, latency_warm)

if __name__ == "__main__":
config = parse_args()
# with open("case_config.yaml", "r") as file:
# case_config = yaml.safe_load(file)
# adapt_torch(config.vendor)
# with open(os.path.join(config.vendor, config.chip, "case_config.yaml"),
# "r") as file:
# case_config_vendor = yaml.safe_load(file)
# case_config.update(case_config_vendor)
# case_config = Namespace(**case_config)

if config.oplib == "flaggems":
import flag_gems
flag_gems.enable()
print("Using flaggems")
else:
print("Using nativetorch")
main(config)
5 changes: 4 additions & 1 deletion operation/configs/host.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,11 @@ CLEAR_CACHES: True
ACCE_VISIBLE_DEVICE_ENV_NAME: "CUDA_VISIBLE_DEVICES"
# "operation:dataFormat:chip": "docker_images"
# now only support flaggems and nativepytorch
MODE: "cpu"
WARMUP: 1000
CASES:
"mm:FP16:312:nativetorch:A100_40_SXM": "ngctorch2403"
# "mm:FP16:312:nativetorch:A100_40_SXM": "ngctorch2403"
"opv2:mm:FP16:312:flaggems:A100_40_SXM": "ngctorch2403"
# "mm:FP16:flaggems:A100_40_SXM": "ngctorch2403"
# "mm:FP16:nativetorch:A100_40_SXM": "ngctorch2403"
# 'exp:FP32:nativetorch:R300p" : "xpytorch029"
Expand Down
26 changes: 21 additions & 5 deletions operation/container_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@ def parse_args():
required=True,
help="vendor name like nvidia")

parser.add_argument("--mode",
type=str,
required=True,
help="mode like cpu")

parser.add_argument("--warmup",
type=str,
required=True,
help="warmup")

parser.add_argument("--log_level",
type=str,
required=True,
Expand All @@ -63,6 +73,11 @@ def parse_args():
required=True,
help="abs path for FlagPerf/base")

parser.add_argument("--result_log_path",
type=str,
required=True,
help="result log path for FlagPerf/operation/result")

args, unknown_args = parser.parse_known_args()
args.unknown_args = unknown_args
return args
Expand Down Expand Up @@ -96,21 +111,22 @@ def write_pid_file(pid_file_path, pid_file):
logger.info("Success Writing PID file at " +
os.path.join(config.log_dir, "start_base_task.pid"))

op, dataformat, spectflops, oplib, chip = config.case_name.split(":")
test_file, op, dataformat, spectflops, oplib, chip = config.case_name.split(":")

case_dir = os.path.join(config.perf_path, "benchmarks", op)
case_dir = os.path.join(config.perf_path, "benchmarks", test_file)
start_cmd = "cd " + case_dir + ";python3 main.py "
start_cmd += " --vendor=" + config.vendor
start_cmd += " --case_name=" + op
start_cmd += " --spectflops=" + spectflops
start_cmd += " --dataformat=" + dataformat
start_cmd += " --oplib=" + oplib
start_cmd += " --chip=" + chip

start_cmd += " --mode=" + config.mode
start_cmd += " --warmup=" + config.warmup
start_cmd += " --log_dir=" + config.log_dir
start_cmd += " --result_log_path=" + config.result_log_path
script_log_file = os.path.join(os.path.dirname(logfile),
"operation.log.txt")

logger.info(start_cmd)
logger.info(script_log_file)

f = open(script_log_file, "w")
Expand Down
21 changes: 21 additions & 0 deletions operation/result_data_format/formatMDfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os


def render(extracted_values, readme_file_path, vendor, shm_size, chip):
json_data = []
for key, value in extracted_values.items():
json_data.append(value)
dest_file_path = os.path.join(readme_file_path, "README.md")
markdown_table = creat_markdown_table(json_data, vendor, shm_size, chip)
with open(dest_file_path, 'w') as file:
file.write(markdown_table)


def creat_markdown_table(data, vendor, shm_size, chip):
v_chip = f'{vendor}_{chip}'
table = f"# 参评AI芯片信息\n\n * 厂商:{vendor}\n * 产品名称:{v_chip}\n * 产品型号:{chip}\n * SHM_SIZE:{shm_size}\n\n\n\n"
table += "# 评测结果\n\n"
table += "| op_name | dtype | shape_detail | 无预热时延(Latency-No warmup) | 预热时延(Latency-Warmup) | 原始吞吐(Raw-Throughput)| 核心吞吐(Core-Throughput) | 实际算力开销 | 实际算力利用率 | 实际算力开销(内核时间) | 实际算力利用率(内核时间) |\n| --- | ---| --- | ---| --- | ---| --- | ---| --- | ---| --- |\n"
for row in data:
table += f"| {row['op_name']} | {row['dtype']} | {row['shape_detail']} | {row['no_warmup_latency']} | {row['warmup_latency']} | {row['raw_throughput']} | {row['core_throughput']} | {row['ctflops']} | {row['cfu']} | {row['ktflops']} | {row['kfu']} |\n"
return table
Loading