diff --git a/examples/perf_benchmark/batch_benchmark.py b/examples/perf_benchmark/batch_benchmark.py index e489b7d0d..94ef01059 100644 --- a/examples/perf_benchmark/batch_benchmark.py +++ b/examples/perf_benchmark/batch_benchmark.py @@ -1,4 +1,5 @@ -from benchmark_plotter import plot_batch_benchmark +from benchmark_report_generator import generate_report +from benchmark_configs import BenchmarkConfigs import argparse import subprocess import os @@ -7,8 +8,13 @@ # Create a struct to store the arguments class BenchmarkArgs: - def __init__(self, renderer_name, rasterizer, n_envs, n_steps, resX, resY, camera_posX, camera_posY, camera_posZ, camera_lookatX, camera_lookatY, camera_lookatZ, camera_fov, mjcf, benchmark_result_file_path): - self.renderer_name = renderer_name + def __init__(self, + renderer, rasterizer, n_envs, n_steps, resX, resY, + camera_posX, camera_posY, camera_posZ, + camera_lookatX, camera_lookatY, camera_lookatZ, + camera_fov, mjcf, benchmark_result_file, benchmark_config_file, + max_bounce, spp, gui=False, benchmark_script=None, renderer_timeout=None): + self.renderer = renderer self.rasterizer = rasterizer self.n_envs = n_envs self.n_steps = n_steps @@ -22,46 +28,50 @@ def __init__(self, renderer_name, rasterizer, n_envs, n_steps, resX, resY, camer self.camera_lookatZ = camera_lookatZ self.camera_fov = camera_fov self.mjcf = mjcf - self.benchmark_result_file_path = benchmark_result_file_path + self.benchmark_result_file = benchmark_result_file + self.benchmark_config_file = benchmark_config_file + self.max_bounce = max_bounce + self.spp = spp + self.gui = gui + self.benchmark_script = benchmark_script + self.renderer_timeout = renderer_timeout @staticmethod - def parse_args(): + def parse_benchmark_args(): parser = argparse.ArgumentParser() - parser.add_argument("-d", "--renderer_name", type=str, default="batch_renderer") + parser.add_argument("-d", "--renderer", required=True, type=str) parser.add_argument("-r", "--rasterizer", action="store_true", default=False) - parser.add_argument("-n", "--n_envs", type=int, default=1024) - parser.add_argument("-s", "--n_steps", type=int, default=1) - parser.add_argument("-x", "--resX", type=int, default=1024) - parser.add_argument("-y", "--resY", type=int, default=1024) - parser.add_argument("-i", "--camera_posX", type=float, default=1.5) - parser.add_argument("-j", "--camera_posY", type=float, default=0.5) - parser.add_argument("-k", "--camera_posZ", type=float, default=1.5) - parser.add_argument("-l", "--camera_lookatX", type=float, default=0.0) - parser.add_argument("-m", "--camera_lookatY", type=float, default=0.0) - parser.add_argument("-o", "--camera_lookatZ", type=float, default=0.5) - parser.add_argument("-v", "--camera_fov", type=float, default=45) - parser.add_argument("-f", "--mjcf", type=str, default="xml/franka_emika_panda/panda.xml") - parser.add_argument("-g", "--benchmark_result_file_path", type=str, default="benchmark.csv") + parser.add_argument("-n", "--n_envs", required=True, type=int) + parser.add_argument("-x", "--resX", required=True, type=int) + parser.add_argument("-y", "--resY", required=True, type=int) + parser.add_argument("-f", "--mjcf", required=True, type=str) + parser.add_argument("-g", "--benchmark_result_file", required=True, type=str) + parser.add_argument("-c", "--benchmark_config_file", required=True, type=str) args = parser.parse_args() + benchmark_config = BenchmarkConfigs(args.benchmark_config_file) benchmark_args = BenchmarkArgs( - renderer_name=args.renderer_name, + renderer=args.renderer, rasterizer=args.rasterizer, n_envs=args.n_envs, - n_steps=args.n_steps, + n_steps=benchmark_config.n_steps, resX=args.resX, resY=args.resY, - camera_posX=args.camera_posX, - camera_posY=args.camera_posY, - camera_posZ=args.camera_posZ, - camera_lookatX=args.camera_lookatX, - camera_lookatY=args.camera_lookatY, - camera_lookatZ=args.camera_lookatZ, - camera_fov=args.camera_fov, + camera_posX=benchmark_config.camera_pos[0], + camera_posY=benchmark_config.camera_pos[1], + camera_posZ=benchmark_config.camera_pos[2], + camera_lookatX=benchmark_config.camera_lookat[0], + camera_lookatY=benchmark_config.camera_lookat[1], + camera_lookatZ=benchmark_config.camera_lookat[2], + camera_fov=benchmark_config.camera_fov, mjcf=args.mjcf, - benchmark_result_file_path=args.benchmark_result_file_path, + benchmark_result_file=args.benchmark_result_file, + benchmark_config_file=args.benchmark_config_file, + max_bounce=benchmark_config.max_bounce, + spp=benchmark_config.spp, + gui=benchmark_config.gui, ) print(f"Benchmark with args:") - print(f" renderer_name: {benchmark_args.renderer_name}") + print(f" renderer: {benchmark_args.renderer}") print(f" rasterizer: {benchmark_args.rasterizer}") print(f" n_envs: {benchmark_args.n_envs}") print(f" n_steps: {benchmark_args.n_steps}") @@ -70,89 +80,63 @@ def parse_args(): print(f" camera_lookat: ({benchmark_args.camera_lookatX}, {benchmark_args.camera_lookatY}, {benchmark_args.camera_lookatZ})") print(f" camera_fov: {benchmark_args.camera_fov}") print(f" mjcf: {benchmark_args.mjcf}") - print(f" benchmark_result_file_path: {benchmark_args.benchmark_result_file_path}") + print(f" benchmark_result_file: {benchmark_args.benchmark_result_file}") + print(f" benchmark_config_file: {benchmark_args.benchmark_config_file}") + print(f" max_bounce: {benchmark_args.max_bounce}") + print(f" spp: {benchmark_args.spp}") + print(f" gui: {benchmark_args.gui}") return benchmark_args class BatchBenchmarkArgs: - def __init__(self, use_full_list, continue_from): - self.use_full_list = use_full_list + def __init__(self, config_file, continue_from): + self.config_file = config_file self.continue_from = continue_from -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("-f", "--use_full_list", action="store_true", default=False) - parser.add_argument("-c", "--continue_from", type=str, default=None) - args = parser.parse_args() - return BatchBenchmarkArgs(use_full_list=args.use_full_list, continue_from=args.continue_from) + def parse_batch_benchmark_args(): + parser = argparse.ArgumentParser() + parser.add_argument("-f", "--config_file", type=str, default="benchmark_config_smoke_test.yml") + parser.add_argument("-c", "--continue_from", type=str, default=None) + args = parser.parse_args() + return BatchBenchmarkArgs( + config_file=args.config_file, + continue_from=args.continue_from + ) -def create_batch_args(benchmark_result_file_path, use_full_list=False): + +def create_batch_args(benchmark_result_file, config_file): # Ensure the directory exists - os.makedirs(os.path.dirname(benchmark_result_file_path), exist_ok=True) + os.makedirs(os.path.dirname(benchmark_result_file), exist_ok=True) - # Create a list of all the possible combinations of arguments - # and return them as a list of BenchmarkArgs - full_mjcf_list = ["xml/franka_emika_panda/panda.xml", "xml/unitree_g1/g1.xml", "xml/unitree_go2/go2.xml"] - full_renderer_list = ["batch_renderer", "pyrender"] - full_rasterizer_list = [True, False] - full_batch_size_list = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384] - square_resolution_list = [ - (64, 64), (128, 128), (256, 256), (512, 512), (1024, 1024), (2048, 2048), (4096, 4096), (8192, 8192) - ] - four_three_resolution_list = [ - (320, 240), (640, 480), (800, 600), (1024, 768), (1280, 960), (1600, 1200), (1920, 1440), (2048, 1536), (2560, 1920), (3200, 2400), (4096, 3072), (8192, 6144), - ] - sixteen_nine_resolution_list = [ - (320, 180), (640, 360), (800, 450), (1024, 576), (1280, 720), (1600, 900), (1920, 1080), (2048, 1152), (2560, 1440), (3200, 1800), (4096, 2304), (8192, 4608), - ] - full_resolution_list = square_resolution_list + four_three_resolution_list + sixteen_nine_resolution_list - - # Minimal mjcf, resolution, and batch size - minimal_renderer_list = ["batch_renderer", "pyrender"] - minimal_rasterizer_list = [True] - minimal_mjcf_list = [ - "xml/franka_emika_panda/panda.xml" - ] - minimal_batch_size_list = [ - #2048, 3072, 4096, 6144, 8192, 12288, 16384 - 1024, 2048 - ] - #minimal_batch_size_list = full_batch_size_list - minimal_resolution_list = [ - (128, 128), - (256, 256), - ] - - if use_full_list: - renderer_list = full_renderer_list - rasterizer_list = full_rasterizer_list - mjcf_list = full_mjcf_list - resolution_list = full_resolution_list - batch_size_list = full_batch_size_list - else: - renderer_list = minimal_renderer_list - rasterizer_list = minimal_rasterizer_list - mjcf_list = minimal_mjcf_list - resolution_list = minimal_resolution_list - batch_size_list = minimal_batch_size_list + # Load configuration + config = BenchmarkConfigs(config_file) + mjcf_list = config.mjcf_list + renderer_list = config.renderer_list + rasterizer_list = config.rasterizer_list + batch_size_list = config.batch_size_list + resolution_list = config.resolution_list + n_steps = config.n_steps + camera_pos = config.camera_pos + camera_lookat = config.camera_lookat + camera_fov = config.camera_fov + max_bounce = config.max_bounce + spp = config.spp + gui = config.gui # Batch data for resolution and batch size needs to be sorted in ascending order of resX x resY # so that if one resolution fails, all the resolutions, which are larger, will be skipped. resolution_list.sort(key=lambda x: x[0] * x[1]) - # Hardcoded parameters - n_steps = 1 - camera_pos = (1.5, 0.5, 1.5) - camera_lookat = (0.0, 0.0, 0.5) - camera_fov = 45 - # Create a hierarchical dictionary to store all combinations batch_args_dict = {} # Build hierarchical structure - for renderer in renderer_list: + for renderer_info in renderer_list: + renderer = renderer_info['renderer'] + benchmark_script = renderer_info['benchmark_script'] + renderer_timeout = renderer_info['timeout'] batch_args_dict[renderer] = {} for rasterizer in rasterizer_list: - batch_args_dict[renderer][rasterizer] = {} + batch_args_dict[renderer][rasterizer] = {} for mjcf in mjcf_list: batch_args_dict[renderer][rasterizer][mjcf] = {} for batch_size in batch_size_list: @@ -161,7 +145,7 @@ def create_batch_args(benchmark_result_file_path, use_full_list=False): resX, resY = resolution # Create benchmark args for this combination args = BenchmarkArgs( - renderer_name=renderer, + renderer=renderer, rasterizer=rasterizer, n_envs=batch_size, n_steps=n_steps, @@ -175,36 +159,42 @@ def create_batch_args(benchmark_result_file_path, use_full_list=False): camera_lookatZ=camera_lookat[2], camera_fov=camera_fov, mjcf=mjcf, - benchmark_result_file_path=benchmark_result_file_path + benchmark_result_file=benchmark_result_file, + benchmark_config_file=config_file, + max_bounce=max_bounce, + spp=spp, + gui=gui, + benchmark_script=benchmark_script, + renderer_timeout=renderer_timeout, ) batch_args_dict[renderer][rasterizer][mjcf][batch_size][(resX,resY)] = args return batch_args_dict -def create_benchmark_result_file(continue_from_file_path): - if continue_from_file_path is not None: - if not os.path.exists(continue_from_file_path): - raise FileNotFoundError(f"Continue from file not found: {continue_from_file_path}") - print(f"Continuing from file: {continue_from_file_path}") - return continue_from_file_path +def create_benchmark_result_file(continue_from_file): + if continue_from_file is not None: + if not os.path.exists(continue_from_file): + raise FileNotFoundError(f"Continue from file not found: {continue_from_file}") + print(f"Continuing from file: {continue_from_file}") + return continue_from_file else: # Create benchmark result data file with header benchmark_data_directory = "logs/benchmark" if not os.path.exists(benchmark_data_directory): os.makedirs(benchmark_data_directory) benchmark_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - benchmark_result_file_path = f"{benchmark_data_directory}/batch_benchmark_{benchmark_timestamp}.csv" - with open(benchmark_result_file_path, "w") as f: - f.write("result,mjcf,renderer,rasterizer,n_envs,n_steps,resX,resY,camera_posX,camera_posY,camera_posZ,camera_lookatX,camera_lookatY,camera_lookatZ,camera_fov,time_taken,time_taken_per_env,fps,fps_per_env\n") - print(f"Created new benchmark result file: {benchmark_result_file_path}") - return benchmark_result_file_path + benchmark_result_file = f"{benchmark_data_directory}/batch_benchmark_{benchmark_timestamp}.csv" + with open(benchmark_result_file, "w") as f: + f.write("result,mjcf,renderer,rasterizer,n_envs,n_steps,resX,resY,camera_posX,camera_posY,camera_posZ,camera_lookatX,camera_lookatY,camera_lookatZ,camera_fov,time_taken_gpu,time_taken_per_env_gpu,time_taken_cpu,time_taken_per_env_cpu,fps,fps_per_env\n") + print(f"Created new benchmark result file: {benchmark_result_file}") + return benchmark_result_file -def get_previous_runs(continue_from_file_path): - if continue_from_file_path is None: +def get_previous_runs(continue_from_file): + if continue_from_file is None: return [] # Read the existing benchmark data file - df = pd.read_csv(continue_from_file_path) + df = pd.read_csv(continue_from_file) # Create a list of tuples containing run info and status previous_runs = [] @@ -213,6 +203,7 @@ def get_previous_runs(continue_from_file_path): run_info = ( row['mjcf'], row['renderer'], + row['rasterizer'], row['n_envs'], (row['resX'], row['resY']), row['result'] # 'succeeded' or 'failed' @@ -221,25 +212,12 @@ def get_previous_runs(continue_from_file_path): return previous_runs -def get_benchmark_script_path(renderer_name): - current_dir = os.path.dirname(os.path.abspath(__file__)) - if renderer_name == "batch_renderer": - return f"{current_dir}/benchmark.py" - elif renderer_name == "pyrender": - return f"{current_dir}/benchmark_pyrender.py" - else: - raise ValueError(f"Invalid renderer name: {renderer_name}") - def run_batch_benchmark(batch_args_dict, previous_runs=None): if previous_runs is None: previous_runs = [] for renderer in batch_args_dict: - benchmark_script_path = get_benchmark_script_path(renderer) - if not os.path.exists(benchmark_script_path): - raise FileNotFoundError(f"Benchmark script not found: {benchmark_script_path}") - print(f"Running benchmark for {renderer}") - + print(f"Running benchmark for {renderer}") for rasterizer in batch_args_dict[renderer]: for mjcf in batch_args_dict[renderer][rasterizer]: for batch_size in batch_args_dict[renderer][rasterizer][mjcf]: @@ -249,11 +227,11 @@ def run_batch_benchmark(batch_args_dict, previous_runs=None): break # Check if this run was in a previous execution - run_info = (mjcf, rasterizer, batch_size, resolution) + run_info = (mjcf, renderer, rasterizer, batch_size, resolution) skip_this_run = False for prev_run in previous_runs: - if run_info == prev_run[:4]: # Compare only the run parameters, not the status + if run_info == prev_run[:5]: # Compare only the run parameters, not the status skip_this_run = True if prev_run[4] == 'failed': # Skip this and subsequent resolutions if it failed before @@ -267,51 +245,75 @@ def run_batch_benchmark(batch_args_dict, previous_runs=None): batch_args = batch_args_dict[renderer][rasterizer][mjcf][batch_size][resolution] # launch a process to run the benchmark + current_dir = os.path.dirname(os.path.abspath(__file__)) + benchmark_script_path = os.path.join(current_dir, batch_args.benchmark_script) + if not os.path.exists(benchmark_script_path): + raise FileNotFoundError(f"Benchmark script not found: {benchmark_script_path}") cmd = ["python3", benchmark_script_path] if batch_args.rasterizer: - cmd.append("-r") + cmd.append("--rasterizer") cmd.extend([ - "-d", batch_args.renderer_name, - "-n", str(batch_args.n_envs), - "-s", str(batch_args.n_steps), - "-x", str(batch_args.resX), - "-y", str(batch_args.resY), - "-i", str(batch_args.camera_posX), - "-j", str(batch_args.camera_posY), - "-k", str(batch_args.camera_posZ), - "-l", str(batch_args.camera_lookatX), - "-m", str(batch_args.camera_lookatY), - "-o", str(batch_args.camera_lookatZ), - "-v", str(batch_args.camera_fov), - "-f", batch_args.mjcf, - "-g", batch_args.benchmark_result_file_path + "--renderer", batch_args.renderer, + "--n_envs", str(batch_args.n_envs), + "--resX", str(batch_args.resX), + "--resY", str(batch_args.resY), + "--mjcf", batch_args.mjcf, + "--benchmark_result_file", batch_args.benchmark_result_file, + "--benchmark_config_file", batch_args.benchmark_config_file, ]) try: + # Read timeout from config process = subprocess.Popen(cmd) - return_code = process.wait() - if return_code != 0: - raise subprocess.CalledProcessError(return_code, cmd) + try: + # Hack to avoid omniverse runs to take forever. + timeout = batch_args.renderer_timeout + return_code = process.wait(timeout=timeout) + if return_code != 0: + raise subprocess.CalledProcessError(return_code, cmd) + except subprocess.TimeoutExpired: + process.kill() + process.wait() # Wait for the process to be killed + raise TimeoutError(f"Process did not complete within {timeout} seconds") except Exception as e: print(f"Error running benchmark: {str(e)}") - last_resolution_failed = True + if isinstance(e, subprocess.CalledProcessError): + last_resolution_failed = True # Write failed result without timing data - with open(batch_args.benchmark_result_file_path, 'a') as f: - f.write(f'failed,{batch_args.mjcf},{batch_args.renderer_name},{batch_args.rasterizer},{batch_args.n_envs},{batch_args.n_steps},{batch_args.resX},{batch_args.resY},{batch_args.camera_posX},{batch_args.camera_posY},{batch_args.camera_posZ},{batch_args.camera_lookatX},{batch_args.camera_lookatY},{batch_args.camera_lookatZ},{batch_args.camera_fov},,,,\n') - break + with open(batch_args.benchmark_result_file, 'a') as f: + f.write(f'failed,{batch_args.mjcf},{batch_args.renderer},{batch_args.rasterizer},{batch_args.n_envs},{batch_args.n_steps},{batch_args.resX},{batch_args.resY},{batch_args.camera_posX},{batch_args.camera_posY},{batch_args.camera_posZ},{batch_args.camera_lookatX},{batch_args.camera_lookatY},{batch_args.camera_lookatZ},{batch_args.camera_fov},,,,,,\n') + +def sort_and_dedupe_benchmark_result_file(benchmark_result_file): + # Sort by mjcf asc, renderer asc, rasterizer desc, n_envs asc, resX asc, resY asc, n_envs asc + df = pd.read_csv(benchmark_result_file) + df = df.sort_values( + by=['mjcf', 'renderer', 'rasterizer', 'resX', 'resY', 'n_envs', 'result'], + ascending=[True, True, False, True, True, True, False] + ) + + # Deduplicate by keeping the first occurrence of each unique combination of mjcf, renderer, rasterizer, resX, resY, n_envs + # Keep succeeded runs if there are multiple runs for the same combination. + df = df.drop_duplicates( + subset=['mjcf', 'renderer', 'rasterizer', 'resX', 'resY', 'n_envs'], + keep='first' + ) + df.to_csv(benchmark_result_file, index=False) def main(): - batch_benchmark_args = parse_args() - benchmark_result_file_path = create_benchmark_result_file(batch_benchmark_args.continue_from) + batch_benchmark_args = BatchBenchmarkArgs.parse_batch_benchmark_args() + benchmark_result_file = create_benchmark_result_file(batch_benchmark_args.continue_from) # Get list of previous runs if continuing from a previous run previous_runs = get_previous_runs(batch_benchmark_args.continue_from) # Run benchmark in batch - batch_args_dict = create_batch_args(benchmark_result_file_path, use_full_list=batch_benchmark_args.use_full_list) + batch_args_dict = create_batch_args(benchmark_result_file, config_file=batch_benchmark_args.config_file) run_batch_benchmark(batch_args_dict, previous_runs) + # Sort benchmark result file + sort_and_dedupe_benchmark_result_file(benchmark_result_file) + # Generate plots - plot_batch_benchmark(benchmark_result_file_path) + generate_report(benchmark_result_file, config_file=batch_benchmark_args.config_file) if __name__ == "__main__": main() diff --git a/examples/perf_benchmark/benchmark_configs.py b/examples/perf_benchmark/benchmark_configs.py new file mode 100644 index 000000000..290b487f3 --- /dev/null +++ b/examples/perf_benchmark/benchmark_configs.py @@ -0,0 +1,44 @@ +import os +import yaml + +class BenchmarkConfigs: + def __init__(self, config_file): + self.load_from_config_file(config_file) + + def load_from_config_file(self, config_file): + self.config_path = os.path.join(os.path.dirname(__file__), "configs", config_file) + if not os.path.exists(self.config_path): + raise FileNotFoundError(f"Config file not found: {self.config_path}") + with open(self.config_path, 'r') as f: + config = yaml.safe_load(f) + + self.mjcf_list = config['mjcf_list'] + self.rasterizer_list = config['rasterizer_list'] + self.batch_size_list = config['batch_size_list'] + self.resolution_list = config['resolution_list'] + self.gui = config.get('gui', False) + + # Get renderer list with defaults + self.renderer_list = config['renderer_list'] + + # Get raytracer config with defaults + raytracer_config = config.get('raytracer', {}) + self.max_bounce = raytracer_config.get('max_bounce', 2) + self.spp = raytracer_config.get('spp', 1) + + # Get simulation config with defaults + simulation_config = config.get('simulation', {}) + self.n_steps = simulation_config.get('n_steps', 1) + + # Get camera config with defaults + camera_config = config.get('camera', {}) + self.camera_pos = camera_config.get('position', [1.5, 0.5, 1.5]) + self.camera_lookat = camera_config.get('lookat', [0.0, 0.0, 0.5]) + self.camera_fov = camera_config.get('fov', 45.0) + + # Get display config with defaults + display_config = config.get('display', {}) + self.gui = display_config.get('gui', False) + + # Get comparison list with defaults + self.comparison_list = config.get('comparison_list', []) \ No newline at end of file diff --git a/examples/perf_benchmark/benchmark.py b/examples/perf_benchmark/benchmark_madrona.py similarity index 60% rename from examples/perf_benchmark/benchmark.py rename to examples/perf_benchmark/benchmark_madrona.py index 4508e8f85..90284ade9 100644 --- a/examples/perf_benchmark/benchmark.py +++ b/examples/perf_benchmark/benchmark_madrona.py @@ -5,6 +5,8 @@ import genesis as gs import torch from batch_benchmark import BenchmarkArgs +import benchmark_utils +from benchmark_profiler import BenchmarkProfiler def init_gs(benchmark_args): ########################## init ########################## @@ -33,9 +35,9 @@ def init_gs(benchmark_args): ) ########################## entities ########################## - #plane = scene.add_entity( - # gs.morphs.Plane(), - #) + plane = scene.add_entity( + gs.morphs.Plane(), + ) franka = scene.add_entity( gs.morphs.MJCF(file=benchmark_args.mjcf), visualize_contact=False, @@ -67,25 +69,6 @@ def init_gs(benchmark_args): scene.build(n_envs=benchmark_args.n_envs) return scene -def add_noise_to_all_cameras(scene): - for cam in scene.visualizer.cameras: - cam.set_pose( - pos=cam.pos_all_envs + torch.rand((cam.n_envs, 3), device=cam.pos_all_envs.device) * 0.002 - 0.001, - lookat=cam.lookat_all_envs + torch.rand((cam.n_envs, 3), device=cam.lookat_all_envs.device) * 0.002 - 0.001, - up=cam.up_all_envs + torch.rand((cam.n_envs, 3), device=cam.up_all_envs.device) * 0.002 - 0.001, - ) - -def fill_gpu_cache_with_random_data(): - # 100 MB of random data - dummy_data =torch.rand(100, 1024, 1024, device="cuda") - # Make some random data manipulation to the entire tensor - dummy_data = dummy_data + 1 - dummy_data = dummy_data * 2 - dummy_data = dummy_data - 1 - dummy_data = dummy_data / 2 - dummy_data = dummy_data.abs() - dummy_data = dummy_data.sqrt() - def run_benchmark(scene, benchmark_args): try: n_envs = benchmark_args.n_envs @@ -96,39 +79,38 @@ def run_benchmark(scene, benchmark_args): rgb, depth, _, _ = scene.render_all_cams() # fill gpu cache with random data - fill_gpu_cache_with_random_data() - - # timer - from time import time - start_time = time() + # benchmark_utils.fill_gpu_cache_with_random_data() + # Profiler + profiler = BenchmarkProfiler(n_steps, n_envs) for i in range(n_steps): - rgb, depth, _, _ = scene.render_all_cams(force_render=True) + profiler.on_simulation_start() + scene.step() + profiler.on_rendering_start() + rgb, depth, _, _ = scene.render_all_cams() + profiler.on_rendering_end() + + profiler.end() + profiler.print_summary() - end_time = time() - time_taken = end_time - start_time - time_taken_per_env = time_taken / n_envs - fps = n_envs * n_steps / time_taken - fps_per_env = n_steps / time_taken - - print(f'Time taken: {time_taken} seconds') - print(f'Time taken per env: {time_taken_per_env} seconds') - print(f'FPS: {fps}') - print(f'FPS per env: {fps_per_env}') - - # Ensure the directory exists - os.makedirs(os.path.dirname(benchmark_args.benchmark_result_file_path), exist_ok=True) + time_taken_gpu = profiler.get_total_rendering_gpu_time() + time_taken_cpu = profiler.get_total_rendering_cpu_time() + time_taken_per_env_gpu = profiler.get_total_rendering_gpu_time_per_env() + time_taken_per_env_cpu = profiler.get_total_rendering_cpu_time_per_env() + fps = profiler.get_rendering_fps() + fps_per_env = profiler.get_rendering_fps_per_env() # Append a line with all args and results in csv format - with open(benchmark_args.benchmark_result_file_path, 'a') as f: - f.write(f'succeeded,{benchmark_args.mjcf},{benchmark_args.renderer_name},{benchmark_args.rasterizer},{benchmark_args.n_envs},{benchmark_args.n_steps},{benchmark_args.resX},{benchmark_args.resY},{benchmark_args.camera_posX},{benchmark_args.camera_posY},{benchmark_args.camera_posZ},{benchmark_args.camera_lookatX},{benchmark_args.camera_lookatY},{benchmark_args.camera_lookatZ},{benchmark_args.camera_fov},{time_taken},{time_taken_per_env},{fps},{fps_per_env}\n') + os.makedirs(os.path.dirname(benchmark_args.benchmark_result_file), exist_ok=True) + with open(benchmark_args.benchmark_result_file, 'a') as f: + f.write(f'succeeded,{benchmark_args.mjcf},{benchmark_args.renderer},{benchmark_args.rasterizer},{benchmark_args.n_envs},{benchmark_args.n_steps},{benchmark_args.resX},{benchmark_args.resY},{benchmark_args.camera_posX},{benchmark_args.camera_posY},{benchmark_args.camera_posZ},{benchmark_args.camera_lookatX},{benchmark_args.camera_lookatY},{benchmark_args.camera_lookatZ},{benchmark_args.camera_fov},{time_taken_gpu},{time_taken_per_env_gpu},{time_taken_cpu},{time_taken_per_env_cpu},{fps},{fps_per_env}\n') except Exception as e: print(f"Error during benchmark: {e}") raise def main(): ######################## Parse arguments ####################### - benchmark_args = BenchmarkArgs.parse_args() + benchmark_args = BenchmarkArgs.parse_benchmark_args() ######################## Initialize scene ####################### scene = init_gs(benchmark_args) diff --git a/examples/perf_benchmark/benchmark_omni.py b/examples/perf_benchmark/benchmark_omni.py new file mode 100644 index 000000000..2522fcbd8 --- /dev/null +++ b/examples/perf_benchmark/benchmark_omni.py @@ -0,0 +1,403 @@ +# Before running, convert the assets like: +# python examples/perf_benchmark/process_xml.py \ +# --file ./genesis/assets/xml/franka_emika_panda/panda.xml + +######################## Parse arguments ####################### +# Create a struct to store the arguments +import argparse +from batch_benchmark import BenchmarkArgs +benchmark_args = BenchmarkArgs.parse_benchmark_args() + +######################## Launch app ####################### +from isaaclab.app import AppLauncher +app = AppLauncher( + headless=not benchmark_args.gui, + enable_cameras=True, + device="cuda:0", + rendering_mode="performance", +).app + +import carb +import isaaclab.sim as sim_utils +import isaacsim.core.utils.prims as prim_utils +import isaacsim.core.utils.stage as stage_utils +from isaaclab.sensors.camera import TiledCamera, TiledCameraCfg +from isaaclab.sim.converters import ( + MjcfConverter, MjcfConverterCfg, + UrdfConverter, UrdfConverterCfg +) +from isaaclab.utils.math import ( + create_rotation_matrix_from_view, + quat_from_matrix, +) +import omni.replicator.core as rep +from pxr import UsdLux, PhysxSchema + +from isaacsim.core.utils.extensions import enable_extension +enable_extension("isaacsim.asset.importer.mjcf") + +import os +import math +import numpy as np +import torch +import psutil +import pynvml +from scipy.spatial.transform import Rotation as R +from genesis.utils.image_exporter import FrameImageExporter +import benchmark_utils +from benchmark_profiler import BenchmarkProfiler + + +def load_mjcf(mjcf_path): + return MjcfConverter( + MjcfConverterCfg( + asset_path=mjcf_path, + fix_base=True, + force_usd_conversion=True + ) + ).usd_path + +def load_urdf(urdf_path): + return UrdfConverter( + UrdfConverterCfg( + asset_path=urdf_path, + joint_drive=None, + fix_base=True, + force_usd_conversion=True + ) + ).usd_path + +def apply_benchmark_carb_settings(print_changes=False): + settings = carb.settings.get_settings() + # Print settings before applying the settings + if print_changes: + print("Before settings:") + print("Render mode:", settings.get("/rtx/rendermode")) + print("Sample per pixel:", settings.get("/rtx/pathtracing/spp")) + print("Total spp:", settings.get("/rtx/pathtracing/totalSpp")) + print("Clamp spp:", settings.get("/rtx/pathtracing/clampSpp")) + print("Max bounce:", settings.get("/rtx/pathtracing/maxBounces")) + print("Optix Denoiser", settings.get("/rtx/pathtracing/optixDenoiser/enabled")) + print("Shadows", settings.get("/rtx/shadows/enabled")) + print("dlss/enabled:", settings.get("/rtx/post/dlss/enabled")) + print("dlss/auto:", settings.get("/rtx/post/dlss/auto")) + print("upscaling/enabled:", settings.get("/rtx/post/upscaling/enabled")) + print("aa/denoiser/enabled:", settings.get("/rtx/post/aa/denoiser/enabled")) + print("aa/taa/enabled:", settings.get("/rtx/post/aa/taa/enabled")) + print("motionBlur/enabled:", settings.get("/rtx/post/motionBlur/enabled")) + print("dof/enabled:", settings.get("/rtx/post/dof/enabled")) + print("bloom/enabled:", settings.get("/rtx/post/bloom/enabled")) + print("tonemap/enabled:", settings.get("/rtx/post/tonemap/enabled")) + print("exposure/enabled:", settings.get("/rtx/post/exposure/enabled")) + print("vsync:", settings.get("/app/window/vsync")) + + # Options: https://docs.omniverse.nvidia.com/materials-and-rendering/latest/rtx-renderer_pt.html + if benchmark_args.rasterizer: + # carb_settings.set("/rtx/rendermode", "Hydra Storm") + settings.set("/rtx/rendermode", "RayTracedLighting") + else: + settings.set("/rtx/rendermode", "PathTracing") + settings.set("/rtx/shadows/enabled", False) + + # Path tracing settings + settings.set("/rtx/pathtracing/spp", benchmark_args.spp) + settings.set("/rtx/pathtracing/totalSpp", benchmark_args.spp) + settings.set("/rtx/pathtracing/clampSpp", benchmark_args.spp) + settings.set("/rtx/pathtracing/maxBounces", benchmark_args.max_bounce) + settings.set("/rtx/pathtracing/optixDenoiser/enabled", False) + settings.set("/rtx/pathtracing/adaptiveSampling/enabled", False) + + # Disable DLSS & upscaling + settings.set("/rtx-transient/dlssg/enabled", False) + settings.set("/rtx/post/dlss/enabled", False) + settings.set("/rtx/post/dlss/auto", False) + settings.set("/rtx/post/upscaling/enabled", False) + + # Disable post-processing + settings.set("/rtx/post/aa/denoiser/enabled", False) + settings.set("/rtx/post/aa/taa/enabled", False) + settings.set("/rtx/post/motionBlur/enabled", False) + settings.set("/rtx/post/dof/enabled", False) + settings.set("/rtx/post/bloom/enabled", False) + settings.set("/rtx/post/tonemap/enabled", False) + settings.set("/rtx/post/exposure/enabled", False) + + # Disable VSync + settings.set("/app/window/vsync", False) + + # Print settings after applying the settings + if print_changes: + print("After settings:") + print("Render mode:", settings.get("/rtx/rendermode")) + print("Sample per pixel:", settings.get("/rtx/pathtracing/spp")) + print("Total spp:", settings.get("/rtx/pathtracing/totalSpp")) + print("Clamp spp:", settings.get("/rtx/pathtracing/clampSpp")) + print("Max bounce:", settings.get("/rtx/pathtracing/maxBounces")) + print("Optix Denoiser", settings.get("/rtx/pathtracing/optixDenoiser/enabled")) + print("Shadows", settings.get("/rtx/shadows/enabled")) + print("dlss/enabled:", settings.get("/rtx/post/dlss/enabled")) + print("dlss/auto:", settings.get("/rtx/post/dlss/auto")) + print("upscaling/enabled:", settings.get("/rtx/post/upscaling/enabled")) + print("aa/denoiser/enabled:", settings.get("/rtx/post/aa/denoiser/enabled")) + print("aa/taa/enabled:", settings.get("/rtx/post/aa/taa/enabled")) + print("motionBlur/enabled:", settings.get("/rtx/post/motionBlur/enabled")) + print("dof/enabled:", settings.get("/rtx/post/dof/enabled")) + print("bloom/enabled:", settings.get("/rtx/post/bloom/enabled")) + print("tonemap/enabled:", settings.get("/rtx/post/tonemap/enabled")) + print("exposure/enabled:", settings.get("/rtx/post/exposure/enabled")) + print("vsync:", settings.get("/app/window/vsync")) + +def init_isaac(benchmark_args): + ########################## init ########################## + stage_utils.create_new_stage() + stage = stage_utils.get_current_stage() + scene = sim_utils.SimulationContext( + sim_utils.SimulationCfg(device="cuda:0", dt=0.01,) + ) + cam_eye = ( + benchmark_args.camera_posX, + benchmark_args.camera_posY, + benchmark_args.camera_posZ + ) + cam_target = ( + benchmark_args.camera_lookatX, + benchmark_args.camera_lookatY, + benchmark_args.camera_lookatZ + ) + scene.set_camera_view(eye=cam_eye, target=cam_target) + cam_eye = torch.Tensor(cam_eye).reshape(-1, 3) + cam_target = torch.Tensor(cam_target).reshape(-1, 3) + + physxSceneAPI = PhysxSchema.PhysxSceneAPI.Apply(stage.GetPrimAtPath("/physicsScene")) + physxSceneAPI.CreateGpuTempBufferCapacityAttr(16 * 1024 * 1024 * 2) + physxSceneAPI.CreateGpuHeapCapacityAttr(64 * 1024 * 1024 * 2) + physxSceneAPI.CreateGpuMaxRigidPatchCountAttr(8388608) + physxSceneAPI.CreateGpuMaxRigidContactCountAttr(16777216) + + rep.settings.set_render_rtx_realtime() + apply_benchmark_carb_settings() + + ########################## entities ########################## + spacing_row = np.array((2.0, -6.0)) + spacing_col = np.array((-6.0, -2.0)) + n_cols = int(math.sqrt(benchmark_args.n_envs)) + offsets = [] + for i in range(benchmark_args.n_envs): + col = i % n_cols + row = i // n_cols + offset_XY = (row * spacing_row + col * spacing_col) + offset = np.array([*offset_XY, 0.0]) + offsets.append(offset) + prim_utils.create_prim( + f"/World/Origin{i:05d}", "Xform", translation=offset + ) + offsets = np.array(offsets) + + # load objects + plane_path = os.path.abspath(os.path.join("genesis/assets", "urdf/plane_usd/plane.usd")) + print(plane_path) + plane_cfg = sim_utils.UsdFileCfg(usd_path=plane_path) + plane_cfg.func("/World/Origin.*/plane", plane_cfg) + + robot_name = f"{os.path.splitext(benchmark_args.mjcf)[0]}_new.xml" + robot_path = load_mjcf(os.path.join("genesis/assets", robot_name)) + print("Robot asset:", robot_path) + robot_cfg = sim_utils.UsdFileCfg(usd_path=robot_path) + robot_cfg.func("/World/Origin.*/robot", robot_cfg) + + cam_fov = math.radians(benchmark_args.camera_fov) + cam_hapert = 20.955 + cam_fol = cam_hapert / (2 * math.tan(cam_fov / 2)) + cam_quat = quat_from_matrix( + create_rotation_matrix_from_view( + cam_target, cam_eye, stage_utils.get_stage_up_axis() + ) @ R.from_euler('z', 180, degrees=True).as_matrix() + ) + cam_eye = tuple(cam_eye.detach().cpu().squeeze().numpy()) + cam_quat = tuple(cam_quat.detach().cpu().squeeze().numpy()) + + print(cam_eye, cam_quat) + print(type(cam_eye), type(cam_quat)) + + cam_0 = TiledCamera( + TiledCameraCfg( + height=benchmark_args.resX, + width=benchmark_args.resY, + offset=TiledCameraCfg.OffsetCfg( + pos=cam_eye, + rot=cam_quat, + convention="ros" + ), + prim_path="/World/Origin.*/camera", + update_period=0, + data_types=["rgb", "depth"], + spawn=sim_utils.PinholeCameraCfg( + focal_length=cam_fol, + ), + ) + ) + + ########################## cameras ########################## + dir_light_pos = torch.Tensor([[0.0, 0.0, 1.5]]) + dir_light_quat = quat_from_matrix( + create_rotation_matrix_from_view( + dir_light_pos, + torch.Tensor([[1.0, 1.0, -2.0]]), + stage_utils.get_stage_up_axis())) + dir_light_pos = tuple(dir_light_pos.detach().cpu().squeeze().numpy()) + dir_light_quat = tuple(dir_light_quat.detach().cpu().squeeze().numpy()) + dir_light_cfg = sim_utils.DistantLightCfg(intensity=500.0, angle=45.0) + dir_light_prim = dir_light_cfg.func( + "/World/DirectionalLight", dir_light_cfg, + translation=dir_light_pos, + orientation=dir_light_quat) + + cone_light_pos = torch.Tensor([[4, -4, 4]]) + cone_light_quat = quat_from_matrix( + create_rotation_matrix_from_view( + cone_light_pos, + torch.Tensor([[-1, 1, -1]]), + stage_utils.get_stage_up_axis())) + cone_light_cfg = sim_utils.SphereLightCfg(intensity=1000.0, radius=0.1) + cone_light_pos = tuple(cone_light_pos.detach().cpu().squeeze().numpy()) + cone_light_quat = tuple(cone_light_quat.detach().cpu().squeeze().numpy()) + cone_light_prim = cone_light_cfg.func( + "/World/ConeLight", cone_light_cfg, + translation=cone_light_pos, + orientation=cone_light_quat) + cone_light = UsdLux.LightAPI(cone_light_prim) + UsdLux.ShapingAPI.Apply(cone_light_prim) + cone_light_prim.SetTypeName("SphereLight") + + return scene, cam_0 + +def get_utilization_percentages(reset: bool = False, max_values: list[float] = [0.0, 0.0, 0.0, 0.0]) -> list[float]: + """Get the maximum CPU, RAM, GPU utilization (processing), and + GPU memory usage percentages since the last time reset was true.""" + if reset: + max_values[:] = [0, 0, 0, 0] # Reset the max values + + # CPU utilization + cpu_usage = psutil.cpu_percent(interval=0.1) + max_values[0] = max(max_values[0], cpu_usage) + + # RAM utilization + memory_info = psutil.virtual_memory() + ram_usage = memory_info.percent + max_values[1] = max(max_values[1], ram_usage) + + # GPU utilization using pynvml + if torch.cuda.is_available(): + pynvml.nvmlInit() # Initialize NVML + for i in range(torch.cuda.device_count()): + handle = pynvml.nvmlDeviceGetHandleByIndex(i) + + # GPU Utilization + gpu_utilization = pynvml.nvmlDeviceGetUtilizationRates(handle) + gpu_processing_utilization_percent = gpu_utilization.gpu # GPU core utilization + max_values[2] = max(max_values[2], gpu_processing_utilization_percent) + + # GPU Memory Usage + memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle) + gpu_memory_total = memory_info.total + gpu_memory_used = memory_info.used + gpu_memory_utilization_percent = (gpu_memory_used / gpu_memory_total) * 100 + max_values[3] = max(max_values[3], gpu_memory_utilization_percent) + + pynvml.nvmlShutdown() # Shutdown NVML after usage + else: + gpu_processing_utilization_percent = None + gpu_memory_utilization_percent = None + return max_values + +def fill_gpu_cache_with_random_data(): + # 100 MB of random data + dummy_data = torch.rand(100, 1024, 1024, device="cuda") + # Make some random data manipulation to the entire tensor + dummy_data = dummy_data.sqrt() + +def run_benchmark(scene, camera, benchmark_args): + try: + n_envs = benchmark_args.n_envs + n_steps = benchmark_args.n_steps + + # warmup + system_utilization_analytics = get_utilization_percentages() + print( + f"| CPU:{system_utilization_analytics[0]}% | " + f"RAM:{system_utilization_analytics[1]}% | " + f"GPU Compute:{system_utilization_analytics[2]}% | " + f"GPU Memory: {system_utilization_analytics[3]:.2f}% |" + ) + + scene.reset() + dt = scene.get_physics_dt() + for i in range(3): + scene.step() + camera.update(dt) + _ = camera.data + print("Env and steps:", n_envs, n_steps) + + if benchmark_args.gui: + while True: + scene.step() + + # fill gpu cache with random data + # benchmark_utils.fill_gpu_cache_with_random_data() + + # Create an image exporter + image_dir = os.path.splitext(benchmark_args.benchmark_result_file)[0] + exporter = FrameImageExporter(image_dir) + + # Profiler + profiler = BenchmarkProfiler(n_steps, n_envs) + for i in range(n_steps): + profiler.on_simulation_start() + scene.step(render=False) + profiler.on_rendering_start() + scene.render() + # camera.update(dt, force_recompute=True) + # rgb_tiles = camera.data.output.get("rgb") + # depth_tiles = camera.data.output.get("depth") + profiler.on_rendering_end() + # exporter.export_frame_single_cam(i, 0, rgb=rgb_tiles, depth=depth_tiles) + + profiler.end() + profiler.print_summary() + + time_taken_gpu = profiler.get_total_rendering_gpu_time() + time_taken_cpu = profiler.get_total_rendering_cpu_time() + time_taken_per_env_gpu = profiler.get_total_rendering_gpu_time_per_env() + time_taken_per_env_cpu = profiler.get_total_rendering_cpu_time_per_env() + fps = profiler.get_rendering_fps() + fps_per_env = profiler.get_rendering_fps_per_env() + + print( + f"| CPU:{system_utilization_analytics[0]}% | " + f"RAM:{system_utilization_analytics[1]}% | " + f"GPU Compute:{system_utilization_analytics[2]}% | " + f" GPU Memory: {system_utilization_analytics[3]:.2f}% |" + ) + + # Append a line with all args and results in csv format + os.makedirs(os.path.dirname(benchmark_args.benchmark_result_file), exist_ok=True) + with open(benchmark_args.benchmark_result_file, 'a') as f: + f.write(f'succeeded,{benchmark_args.mjcf},{benchmark_args.renderer},{benchmark_args.rasterizer},{benchmark_args.n_envs},{benchmark_args.n_steps},{benchmark_args.resX},{benchmark_args.resY},{benchmark_args.camera_posX},{benchmark_args.camera_posY},{benchmark_args.camera_posZ},{benchmark_args.camera_lookatX},{benchmark_args.camera_lookatY},{benchmark_args.camera_lookatZ},{benchmark_args.camera_fov},{time_taken_gpu},{time_taken_per_env_gpu},{time_taken_cpu},{time_taken_per_env_cpu},{fps},{fps_per_env}\n') + + print("App closing..") + # app.close() + print("App closed!") + + except Exception as e: + print(f"Error during benchmark: {e}") + raise + +def main(): + ######################## Initialize scene ####################### + scene, camera = init_isaac(benchmark_args) + + ######################## Run benchmark ####################### + run_benchmark(scene, camera, benchmark_args) + +if __name__ == "__main__": + main() diff --git a/examples/perf_benchmark/benchmark_profiler.py b/examples/perf_benchmark/benchmark_profiler.py new file mode 100644 index 000000000..6a245cd82 --- /dev/null +++ b/examples/perf_benchmark/benchmark_profiler.py @@ -0,0 +1,223 @@ +import torch +import numpy as np +import time + +class BenchmarkProfiler: + def __init__(self, n_steps, n_envs): + self.reset(n_steps) + self.n_envs = n_envs + + def reset(self, n_steps): + self.n_steps = n_steps + # Create arrays of CUDA events for each step + # Each step has 3 events: simulation_start, render_start, render_end + self.events = [] + # CPU timing arrays + self.cpu_times = [] + for _ in range(n_steps): + step_events = { + 'simulation_start': torch.cuda.Event(enable_timing=True), + 'render_start': torch.cuda.Event(enable_timing=True), + 'render_end': torch.cuda.Event(enable_timing=True) + } + self.events.append(step_events) + # Initialize CPU timing structure for each step + self.cpu_times.append({ + 'simulation_start': 0.0, + 'render_start': 0.0, + 'render_end': 0.0 + }) + self.current_step = 0 + + # Synchronize all previous GPU events + torch.cuda.synchronize() + self.is_synchronized = False + ######################## Profiling Events ####################### + def on_simulation_start(self): + """Record the start of simulation for current step""" + if self.current_step >= self.n_steps: + raise Exception("All steps have been profiled") + self.events[self.current_step]['simulation_start'].record() + self.cpu_times[self.current_step]['simulation_start'] = time.time() + + def on_rendering_start(self): + """Record the start of rendering for current step""" + if self.current_step >= self.n_steps: + raise Exception("All steps have been profiled") + self.events[self.current_step]['render_start'].record() + self.cpu_times[self.current_step]['render_start'] = time.time() + + def on_rendering_end(self): + """Record the end of rendering for current step""" + if self.current_step >= self.n_steps: + raise Exception("All steps have been profiled") + self.events[self.current_step]['render_end'].record() + self.cpu_times[self.current_step]['render_end'] = time.time() + self.current_step += 1 + + def end(self): + """End the profiler""" + self._synchronize() + + def _synchronize(self): + """Synchronize GPU to ensure all events are recorded""" + torch.cuda.synchronize() + self.is_synchronized = True + + ######################## Simulation Performance ####################### + def get_total_simulation_gpu_time(self): + """Calculate total simulation GPU time across all steps in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + total_time = 0.0 + for step in range(self.current_step): + events = self.events[step] + total_time += events['simulation_start'].elapsed_time(events['render_start']) + return total_time / 1000.0 + + def get_total_simulation_cpu_time(self): + """Calculate total simulation CPU time across all steps in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + total_time = 0.0 + for step in range(self.current_step): + cpu_times = self.cpu_times[step] + total_time += (cpu_times['render_start'] - cpu_times['simulation_start']) * 1000 # Convert to ms + return total_time / 1000.0 + + def get_simulation_fps(self): + """Get the FPS for the current step""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.n_envs * self.n_steps / self.get_total_simulation_gpu_time() + + def get_simulation_fps_per_env(self): + """Get the FPS per env for the current step""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.n_steps / self.get_total_simulation_gpu_time() + + ######################## Rendering Performance ####################### + def get_total_rendering_gpu_time(self): + """Calculate total rendering GPU time across all steps in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + total_time = 0.0 + for step in range(self.current_step): + events = self.events[step] + total_time += events['render_start'].elapsed_time(events['render_end']) + return total_time / 1000.0 + + def get_total_rendering_cpu_time(self): + """Calculate total rendering CPU time across all steps in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + total_time = 0.0 + for step in range(self.current_step): + cpu_times = self.cpu_times[step] + total_time += (cpu_times['render_end'] - cpu_times['render_start']) * 1000 # Convert to ms + return total_time / 1000.0 + + def get_rendering_fps(self): + """Get the FPS for the current step""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.n_envs * self.n_steps / self.get_total_rendering_gpu_time() + + def get_rendering_fps_per_env(self): + """Get the FPS per env for the current step""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.n_steps / self.get_total_rendering_gpu_time() + + def get_total_rendering_gpu_time_per_env(self): + """Get the total rendering GPU time per env for the current step in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.get_total_rendering_gpu_time() / self.n_envs + + def get_total_rendering_cpu_time_per_env(self): + """Get the total rendering CPU time per env for the current step in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.get_total_rendering_cpu_time() / self.n_envs + + ######################## Total Performance ####################### + def get_total_gpu_time(self): + """Calculate total GPU time across all steps in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + total_time = 0.0 + for step in range(self.current_step): + events = self.events[step] + total_time += events['simulation_start'].elapsed_time(events['render_end']) + return total_time / 1000.0 + + def get_total_cpu_time(self): + """Calculate total CPU time across all steps in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + total_time = 0.0 + for step in range(self.current_step): + cpu_times = self.cpu_times[step] + total_time += (cpu_times['render_end'] - cpu_times['simulation_start']) * 1000 # Convert to ms + return total_time / 1000.0 + + def get_total_gpu_time_per_env(self): + """Get the total GPU time per env for the current step in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.get_total_gpu_time() / self.n_envs + + def get_total_cpu_time_per_env(self): + """Get the total CPU time per env for the current step in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.get_total_cpu_time() / self.n_envs + + def get_step_times(self, step_idx): + """Get detailed timing for a specific step in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + if step_idx >= self.current_step: + raise Exception(f"Step {step_idx} has not been profiled yet") + + events = self.events[step_idx] + cpu_times = self.cpu_times[step_idx] + + return { + 'simulation': { + 'gpu_ms': events['simulation_start'].elapsed_time(events['render_start']), + 'cpu_ms': (cpu_times['render_start'] - cpu_times['simulation_start']) * 1000 + }, + 'rendering': { + 'gpu_ms': events['render_start'].elapsed_time(events['render_end']), + 'cpu_ms': (cpu_times['render_end'] - cpu_times['render_start']) * 1000 + }, + 'total': { + 'gpu_ms': events['simulation_start'].elapsed_time(events['render_end']), + 'cpu_ms': (cpu_times['render_end'] - cpu_times['simulation_start']) * 1000 + } + } + + ######################## Print Summary ####################### + def print_rendering_summary(self): + """Print a summary of the profiler""" + print(f"Total rendering GPU time: {self.get_total_rendering_gpu_time()} seconds") + print(f"Total rendering CPU time: {self.get_total_rendering_cpu_time()} seconds") + print(f"Total rendering GPU time per env: {self.get_total_rendering_gpu_time_per_env()} seconds") + print(f"Total rendering CPU time per env: {self.get_total_rendering_cpu_time_per_env()} seconds") + print(f"Rendering FPS: {self.get_rendering_fps()}") + print(f"Rendering FPS per env: {self.get_rendering_fps_per_env()}") + + def print_simulation_summary(self): + """Print a summary of the profiler""" + print(f"Total simulation GPU time: {self.get_total_simulation_gpu_time()} seconds") + print(f"Total simulation CPU time: {self.get_total_simulation_cpu_time()} seconds") + print(f"Simulation FPS: {self.get_simulation_fps()}") + print(f"Simulation FPS per env: {self.get_simulation_fps_per_env()}") + + def print_summary(self): + """Print a summary of the profiler""" + self.print_rendering_summary() + self.print_simulation_summary() diff --git a/examples/perf_benchmark/benchmark_pyrender.py b/examples/perf_benchmark/benchmark_pyrender.py index e0301c4be..63036916c 100644 --- a/examples/perf_benchmark/benchmark_pyrender.py +++ b/examples/perf_benchmark/benchmark_pyrender.py @@ -5,6 +5,8 @@ import genesis as gs import torch from batch_benchmark import BenchmarkArgs +import benchmark_utils +from benchmark_profiler import BenchmarkProfiler def init_gs(benchmark_args): ########################## init ########################## @@ -55,12 +57,6 @@ def init_gs(benchmark_args): scene.build() return scene -def fill_gpu_cache_with_random_data(): - # 100 MB of random data - dummy_data =torch.rand(100, 1024, 1024, device="cuda") - # Make some random data manipulation to the entire tensor - dummy_data = dummy_data.sqrt() - def run_benchmark(scene, benchmark_args): try: n_envs = benchmark_args.n_envs @@ -71,40 +67,38 @@ def run_benchmark(scene, benchmark_args): rgb, depth, _, _ = scene.visualizer.cameras[0].render(rgb=True, depth=True) # fill gpu cache with random data - fill_gpu_cache_with_random_data() - - # timer - from time import time - start_time = time() + # benchmark_utils.fill_gpu_cache_with_random_data() + # Profiler + profiler = BenchmarkProfiler(n_steps, n_envs) for i in range(n_steps): - for i_env in range(n_envs): - rgb, depth, _, _ = scene.visualizer.cameras[0].render(rgb=True, depth=True) + profiler.on_simulation_start() + scene.step() + profiler.on_rendering_start() + rgb, depth, _, _ = scene.visualizer.cameras[0].render(rgb=True, depth=True) + profiler.on_rendering_end() + + profiler.end() + profiler.print_summary() - end_time = time() - time_taken = end_time - start_time - time_taken_per_env = time_taken / n_envs - fps = n_envs * n_steps / time_taken - fps_per_env = n_steps / time_taken - - print(f'Time taken: {time_taken} seconds') - print(f'Time taken per env: {time_taken_per_env} seconds') - print(f'FPS: {fps}') - print(f'FPS per env: {fps_per_env}') - - # Ensure the directory exists - os.makedirs(os.path.dirname(benchmark_args.benchmark_result_file_path), exist_ok=True) + time_taken_gpu = profiler.get_total_rendering_gpu_time() + time_taken_cpu = profiler.get_total_rendering_cpu_time() + time_taken_per_env_gpu = profiler.get_total_rendering_gpu_time_per_env() + time_taken_per_env_cpu = profiler.get_total_rendering_cpu_time_per_env() + fps = profiler.get_rendering_fps() + fps_per_env = profiler.get_rendering_fps_per_env() # Append a line with all args and results in csv format - with open(benchmark_args.benchmark_result_file_path, 'a') as f: - f.write(f'succeeded,{benchmark_args.mjcf},{benchmark_args.renderer_name},{benchmark_args.rasterizer},{benchmark_args.n_envs},{benchmark_args.n_steps},{benchmark_args.resX},{benchmark_args.resY},{benchmark_args.camera_posX},{benchmark_args.camera_posY},{benchmark_args.camera_posZ},{benchmark_args.camera_lookatX},{benchmark_args.camera_lookatY},{benchmark_args.camera_lookatZ},{benchmark_args.camera_fov},{time_taken},{time_taken_per_env},{fps},{fps_per_env}\n') + os.makedirs(os.path.dirname(benchmark_args.benchmark_result_file), exist_ok=True) + with open(benchmark_args.benchmark_result_file, 'a') as f: + f.write(f'succeeded,{benchmark_args.mjcf},{benchmark_args.renderer},{benchmark_args.rasterizer},{benchmark_args.n_envs},{benchmark_args.n_steps},{benchmark_args.resX},{benchmark_args.resY},{benchmark_args.camera_posX},{benchmark_args.camera_posY},{benchmark_args.camera_posZ},{benchmark_args.camera_lookatX},{benchmark_args.camera_lookatY},{benchmark_args.camera_lookatZ},{benchmark_args.camera_fov},{time_taken_gpu},{time_taken_per_env_gpu},{time_taken_cpu},{time_taken_per_env_cpu},{fps},{fps_per_env}\n') except Exception as e: print(f"Error during benchmark: {e}") raise def main(): ######################## Parse arguments ####################### - benchmark_args = BenchmarkArgs.parse_args() + benchmark_args = BenchmarkArgs.parse_benchmark_args() ######################## Initialize scene ####################### scene = init_gs(benchmark_args) diff --git a/examples/perf_benchmark/benchmark_plotter.py b/examples/perf_benchmark/benchmark_report_generator.py similarity index 58% rename from examples/perf_benchmark/benchmark_plotter.py rename to examples/perf_benchmark/benchmark_report_generator.py index 2aef8626a..95c9317c8 100644 --- a/examples/perf_benchmark/benchmark_plotter.py +++ b/examples/perf_benchmark/benchmark_report_generator.py @@ -5,10 +5,111 @@ import pandas as pd import matplotlib.pyplot as plt -from matplotlib.ticker import ScalarFormatter import numpy as np +from benchmark_configs import BenchmarkConfigs -def generatePlotHtml(plots_dir): +def generate_table_html(plot_table_data): + # Add CSS styling for the table + html_table = """ + +
| Renderer | " + for batch_size in sorted_batch_sizes: + html_table += f"{batch_size} | " + html_table += "|||
|---|---|---|---|---|
| {html.escape(renderer)} | " + row_data = [] + for batch_size in sorted_batch_sizes: + if renderer not in plot_table_data or batch_size not in plot_table_data[renderer]: + row_data.append(None) + html_table += "N/A | " + else: + fps = plot_table_data[renderer][batch_size] + row_data.append(fps) + html_table += f"{fps:.1f} | " + html_table += "||
| Speedup | " + last_renderer_data = [None, None] + for i in range(len(sorted_batch_sizes)): + if (renderer_data[-2][i] is not None and + renderer_data[-1][i] is not None): + ratio = renderer_data[-1][i] / renderer_data[-2][i] + last_renderer_data[-2] = renderer_data[-2][i] + last_renderer_data[-1] = renderer_data[-1][i] + html_table += f"{ratio:.1f}x | " + elif (renderer_data[-2][i] is not None and + renderer_data[-1][i] is None): + ratio = last_renderer_data[-1] / renderer_data[-2][i] + last_renderer_data[-2] = renderer_data[-2][i] + html_table += f"{ratio:.1f}x | " + elif (renderer_data[-2][i] is None and + renderer_data[-1][i] is not None): + ratio = renderer_data[-1][i] / last_renderer_data[-2] + last_renderer_data[-1] = renderer_data[-1][i] + html_table += f"{ratio:.1f}x | " + else: + html_table += "N/A | " + html_table += "