diff --git a/examples/perf_benchmark/batch_benchmark.py b/examples/perf_benchmark/batch_benchmark.py index e489b7d0d..94ef01059 100644 --- a/examples/perf_benchmark/batch_benchmark.py +++ b/examples/perf_benchmark/batch_benchmark.py @@ -1,4 +1,5 @@ -from benchmark_plotter import plot_batch_benchmark +from benchmark_report_generator import generate_report +from benchmark_configs import BenchmarkConfigs import argparse import subprocess import os @@ -7,8 +8,13 @@ # Create a struct to store the arguments class BenchmarkArgs: - def __init__(self, renderer_name, rasterizer, n_envs, n_steps, resX, resY, camera_posX, camera_posY, camera_posZ, camera_lookatX, camera_lookatY, camera_lookatZ, camera_fov, mjcf, benchmark_result_file_path): - self.renderer_name = renderer_name + def __init__(self, + renderer, rasterizer, n_envs, n_steps, resX, resY, + camera_posX, camera_posY, camera_posZ, + camera_lookatX, camera_lookatY, camera_lookatZ, + camera_fov, mjcf, benchmark_result_file, benchmark_config_file, + max_bounce, spp, gui=False, benchmark_script=None, renderer_timeout=None): + self.renderer = renderer self.rasterizer = rasterizer self.n_envs = n_envs self.n_steps = n_steps @@ -22,46 +28,50 @@ def __init__(self, renderer_name, rasterizer, n_envs, n_steps, resX, resY, camer self.camera_lookatZ = camera_lookatZ self.camera_fov = camera_fov self.mjcf = mjcf - self.benchmark_result_file_path = benchmark_result_file_path + self.benchmark_result_file = benchmark_result_file + self.benchmark_config_file = benchmark_config_file + self.max_bounce = max_bounce + self.spp = spp + self.gui = gui + self.benchmark_script = benchmark_script + self.renderer_timeout = renderer_timeout @staticmethod - def parse_args(): + def parse_benchmark_args(): parser = argparse.ArgumentParser() - parser.add_argument("-d", "--renderer_name", type=str, default="batch_renderer") + parser.add_argument("-d", "--renderer", required=True, type=str) parser.add_argument("-r", "--rasterizer", action="store_true", default=False) - parser.add_argument("-n", "--n_envs", type=int, default=1024) - parser.add_argument("-s", "--n_steps", type=int, default=1) - parser.add_argument("-x", "--resX", type=int, default=1024) - parser.add_argument("-y", "--resY", type=int, default=1024) - parser.add_argument("-i", "--camera_posX", type=float, default=1.5) - parser.add_argument("-j", "--camera_posY", type=float, default=0.5) - parser.add_argument("-k", "--camera_posZ", type=float, default=1.5) - parser.add_argument("-l", "--camera_lookatX", type=float, default=0.0) - parser.add_argument("-m", "--camera_lookatY", type=float, default=0.0) - parser.add_argument("-o", "--camera_lookatZ", type=float, default=0.5) - parser.add_argument("-v", "--camera_fov", type=float, default=45) - parser.add_argument("-f", "--mjcf", type=str, default="xml/franka_emika_panda/panda.xml") - parser.add_argument("-g", "--benchmark_result_file_path", type=str, default="benchmark.csv") + parser.add_argument("-n", "--n_envs", required=True, type=int) + parser.add_argument("-x", "--resX", required=True, type=int) + parser.add_argument("-y", "--resY", required=True, type=int) + parser.add_argument("-f", "--mjcf", required=True, type=str) + parser.add_argument("-g", "--benchmark_result_file", required=True, type=str) + parser.add_argument("-c", "--benchmark_config_file", required=True, type=str) args = parser.parse_args() + benchmark_config = BenchmarkConfigs(args.benchmark_config_file) benchmark_args = BenchmarkArgs( - renderer_name=args.renderer_name, + renderer=args.renderer, rasterizer=args.rasterizer, n_envs=args.n_envs, - n_steps=args.n_steps, + n_steps=benchmark_config.n_steps, resX=args.resX, resY=args.resY, - camera_posX=args.camera_posX, - camera_posY=args.camera_posY, - camera_posZ=args.camera_posZ, - camera_lookatX=args.camera_lookatX, - camera_lookatY=args.camera_lookatY, - camera_lookatZ=args.camera_lookatZ, - camera_fov=args.camera_fov, + camera_posX=benchmark_config.camera_pos[0], + camera_posY=benchmark_config.camera_pos[1], + camera_posZ=benchmark_config.camera_pos[2], + camera_lookatX=benchmark_config.camera_lookat[0], + camera_lookatY=benchmark_config.camera_lookat[1], + camera_lookatZ=benchmark_config.camera_lookat[2], + camera_fov=benchmark_config.camera_fov, mjcf=args.mjcf, - benchmark_result_file_path=args.benchmark_result_file_path, + benchmark_result_file=args.benchmark_result_file, + benchmark_config_file=args.benchmark_config_file, + max_bounce=benchmark_config.max_bounce, + spp=benchmark_config.spp, + gui=benchmark_config.gui, ) print(f"Benchmark with args:") - print(f" renderer_name: {benchmark_args.renderer_name}") + print(f" renderer: {benchmark_args.renderer}") print(f" rasterizer: {benchmark_args.rasterizer}") print(f" n_envs: {benchmark_args.n_envs}") print(f" n_steps: {benchmark_args.n_steps}") @@ -70,89 +80,63 @@ def parse_args(): print(f" camera_lookat: ({benchmark_args.camera_lookatX}, {benchmark_args.camera_lookatY}, {benchmark_args.camera_lookatZ})") print(f" camera_fov: {benchmark_args.camera_fov}") print(f" mjcf: {benchmark_args.mjcf}") - print(f" benchmark_result_file_path: {benchmark_args.benchmark_result_file_path}") + print(f" benchmark_result_file: {benchmark_args.benchmark_result_file}") + print(f" benchmark_config_file: {benchmark_args.benchmark_config_file}") + print(f" max_bounce: {benchmark_args.max_bounce}") + print(f" spp: {benchmark_args.spp}") + print(f" gui: {benchmark_args.gui}") return benchmark_args class BatchBenchmarkArgs: - def __init__(self, use_full_list, continue_from): - self.use_full_list = use_full_list + def __init__(self, config_file, continue_from): + self.config_file = config_file self.continue_from = continue_from -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("-f", "--use_full_list", action="store_true", default=False) - parser.add_argument("-c", "--continue_from", type=str, default=None) - args = parser.parse_args() - return BatchBenchmarkArgs(use_full_list=args.use_full_list, continue_from=args.continue_from) + def parse_batch_benchmark_args(): + parser = argparse.ArgumentParser() + parser.add_argument("-f", "--config_file", type=str, default="benchmark_config_smoke_test.yml") + parser.add_argument("-c", "--continue_from", type=str, default=None) + args = parser.parse_args() + return BatchBenchmarkArgs( + config_file=args.config_file, + continue_from=args.continue_from + ) -def create_batch_args(benchmark_result_file_path, use_full_list=False): + +def create_batch_args(benchmark_result_file, config_file): # Ensure the directory exists - os.makedirs(os.path.dirname(benchmark_result_file_path), exist_ok=True) + os.makedirs(os.path.dirname(benchmark_result_file), exist_ok=True) - # Create a list of all the possible combinations of arguments - # and return them as a list of BenchmarkArgs - full_mjcf_list = ["xml/franka_emika_panda/panda.xml", "xml/unitree_g1/g1.xml", "xml/unitree_go2/go2.xml"] - full_renderer_list = ["batch_renderer", "pyrender"] - full_rasterizer_list = [True, False] - full_batch_size_list = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384] - square_resolution_list = [ - (64, 64), (128, 128), (256, 256), (512, 512), (1024, 1024), (2048, 2048), (4096, 4096), (8192, 8192) - ] - four_three_resolution_list = [ - (320, 240), (640, 480), (800, 600), (1024, 768), (1280, 960), (1600, 1200), (1920, 1440), (2048, 1536), (2560, 1920), (3200, 2400), (4096, 3072), (8192, 6144), - ] - sixteen_nine_resolution_list = [ - (320, 180), (640, 360), (800, 450), (1024, 576), (1280, 720), (1600, 900), (1920, 1080), (2048, 1152), (2560, 1440), (3200, 1800), (4096, 2304), (8192, 4608), - ] - full_resolution_list = square_resolution_list + four_three_resolution_list + sixteen_nine_resolution_list - - # Minimal mjcf, resolution, and batch size - minimal_renderer_list = ["batch_renderer", "pyrender"] - minimal_rasterizer_list = [True] - minimal_mjcf_list = [ - "xml/franka_emika_panda/panda.xml" - ] - minimal_batch_size_list = [ - #2048, 3072, 4096, 6144, 8192, 12288, 16384 - 1024, 2048 - ] - #minimal_batch_size_list = full_batch_size_list - minimal_resolution_list = [ - (128, 128), - (256, 256), - ] - - if use_full_list: - renderer_list = full_renderer_list - rasterizer_list = full_rasterizer_list - mjcf_list = full_mjcf_list - resolution_list = full_resolution_list - batch_size_list = full_batch_size_list - else: - renderer_list = minimal_renderer_list - rasterizer_list = minimal_rasterizer_list - mjcf_list = minimal_mjcf_list - resolution_list = minimal_resolution_list - batch_size_list = minimal_batch_size_list + # Load configuration + config = BenchmarkConfigs(config_file) + mjcf_list = config.mjcf_list + renderer_list = config.renderer_list + rasterizer_list = config.rasterizer_list + batch_size_list = config.batch_size_list + resolution_list = config.resolution_list + n_steps = config.n_steps + camera_pos = config.camera_pos + camera_lookat = config.camera_lookat + camera_fov = config.camera_fov + max_bounce = config.max_bounce + spp = config.spp + gui = config.gui # Batch data for resolution and batch size needs to be sorted in ascending order of resX x resY # so that if one resolution fails, all the resolutions, which are larger, will be skipped. resolution_list.sort(key=lambda x: x[0] * x[1]) - # Hardcoded parameters - n_steps = 1 - camera_pos = (1.5, 0.5, 1.5) - camera_lookat = (0.0, 0.0, 0.5) - camera_fov = 45 - # Create a hierarchical dictionary to store all combinations batch_args_dict = {} # Build hierarchical structure - for renderer in renderer_list: + for renderer_info in renderer_list: + renderer = renderer_info['renderer'] + benchmark_script = renderer_info['benchmark_script'] + renderer_timeout = renderer_info['timeout'] batch_args_dict[renderer] = {} for rasterizer in rasterizer_list: - batch_args_dict[renderer][rasterizer] = {} + batch_args_dict[renderer][rasterizer] = {} for mjcf in mjcf_list: batch_args_dict[renderer][rasterizer][mjcf] = {} for batch_size in batch_size_list: @@ -161,7 +145,7 @@ def create_batch_args(benchmark_result_file_path, use_full_list=False): resX, resY = resolution # Create benchmark args for this combination args = BenchmarkArgs( - renderer_name=renderer, + renderer=renderer, rasterizer=rasterizer, n_envs=batch_size, n_steps=n_steps, @@ -175,36 +159,42 @@ def create_batch_args(benchmark_result_file_path, use_full_list=False): camera_lookatZ=camera_lookat[2], camera_fov=camera_fov, mjcf=mjcf, - benchmark_result_file_path=benchmark_result_file_path + benchmark_result_file=benchmark_result_file, + benchmark_config_file=config_file, + max_bounce=max_bounce, + spp=spp, + gui=gui, + benchmark_script=benchmark_script, + renderer_timeout=renderer_timeout, ) batch_args_dict[renderer][rasterizer][mjcf][batch_size][(resX,resY)] = args return batch_args_dict -def create_benchmark_result_file(continue_from_file_path): - if continue_from_file_path is not None: - if not os.path.exists(continue_from_file_path): - raise FileNotFoundError(f"Continue from file not found: {continue_from_file_path}") - print(f"Continuing from file: {continue_from_file_path}") - return continue_from_file_path +def create_benchmark_result_file(continue_from_file): + if continue_from_file is not None: + if not os.path.exists(continue_from_file): + raise FileNotFoundError(f"Continue from file not found: {continue_from_file}") + print(f"Continuing from file: {continue_from_file}") + return continue_from_file else: # Create benchmark result data file with header benchmark_data_directory = "logs/benchmark" if not os.path.exists(benchmark_data_directory): os.makedirs(benchmark_data_directory) benchmark_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - benchmark_result_file_path = f"{benchmark_data_directory}/batch_benchmark_{benchmark_timestamp}.csv" - with open(benchmark_result_file_path, "w") as f: - f.write("result,mjcf,renderer,rasterizer,n_envs,n_steps,resX,resY,camera_posX,camera_posY,camera_posZ,camera_lookatX,camera_lookatY,camera_lookatZ,camera_fov,time_taken,time_taken_per_env,fps,fps_per_env\n") - print(f"Created new benchmark result file: {benchmark_result_file_path}") - return benchmark_result_file_path + benchmark_result_file = f"{benchmark_data_directory}/batch_benchmark_{benchmark_timestamp}.csv" + with open(benchmark_result_file, "w") as f: + f.write("result,mjcf,renderer,rasterizer,n_envs,n_steps,resX,resY,camera_posX,camera_posY,camera_posZ,camera_lookatX,camera_lookatY,camera_lookatZ,camera_fov,time_taken_gpu,time_taken_per_env_gpu,time_taken_cpu,time_taken_per_env_cpu,fps,fps_per_env\n") + print(f"Created new benchmark result file: {benchmark_result_file}") + return benchmark_result_file -def get_previous_runs(continue_from_file_path): - if continue_from_file_path is None: +def get_previous_runs(continue_from_file): + if continue_from_file is None: return [] # Read the existing benchmark data file - df = pd.read_csv(continue_from_file_path) + df = pd.read_csv(continue_from_file) # Create a list of tuples containing run info and status previous_runs = [] @@ -213,6 +203,7 @@ def get_previous_runs(continue_from_file_path): run_info = ( row['mjcf'], row['renderer'], + row['rasterizer'], row['n_envs'], (row['resX'], row['resY']), row['result'] # 'succeeded' or 'failed' @@ -221,25 +212,12 @@ def get_previous_runs(continue_from_file_path): return previous_runs -def get_benchmark_script_path(renderer_name): - current_dir = os.path.dirname(os.path.abspath(__file__)) - if renderer_name == "batch_renderer": - return f"{current_dir}/benchmark.py" - elif renderer_name == "pyrender": - return f"{current_dir}/benchmark_pyrender.py" - else: - raise ValueError(f"Invalid renderer name: {renderer_name}") - def run_batch_benchmark(batch_args_dict, previous_runs=None): if previous_runs is None: previous_runs = [] for renderer in batch_args_dict: - benchmark_script_path = get_benchmark_script_path(renderer) - if not os.path.exists(benchmark_script_path): - raise FileNotFoundError(f"Benchmark script not found: {benchmark_script_path}") - print(f"Running benchmark for {renderer}") - + print(f"Running benchmark for {renderer}") for rasterizer in batch_args_dict[renderer]: for mjcf in batch_args_dict[renderer][rasterizer]: for batch_size in batch_args_dict[renderer][rasterizer][mjcf]: @@ -249,11 +227,11 @@ def run_batch_benchmark(batch_args_dict, previous_runs=None): break # Check if this run was in a previous execution - run_info = (mjcf, rasterizer, batch_size, resolution) + run_info = (mjcf, renderer, rasterizer, batch_size, resolution) skip_this_run = False for prev_run in previous_runs: - if run_info == prev_run[:4]: # Compare only the run parameters, not the status + if run_info == prev_run[:5]: # Compare only the run parameters, not the status skip_this_run = True if prev_run[4] == 'failed': # Skip this and subsequent resolutions if it failed before @@ -267,51 +245,75 @@ def run_batch_benchmark(batch_args_dict, previous_runs=None): batch_args = batch_args_dict[renderer][rasterizer][mjcf][batch_size][resolution] # launch a process to run the benchmark + current_dir = os.path.dirname(os.path.abspath(__file__)) + benchmark_script_path = os.path.join(current_dir, batch_args.benchmark_script) + if not os.path.exists(benchmark_script_path): + raise FileNotFoundError(f"Benchmark script not found: {benchmark_script_path}") cmd = ["python3", benchmark_script_path] if batch_args.rasterizer: - cmd.append("-r") + cmd.append("--rasterizer") cmd.extend([ - "-d", batch_args.renderer_name, - "-n", str(batch_args.n_envs), - "-s", str(batch_args.n_steps), - "-x", str(batch_args.resX), - "-y", str(batch_args.resY), - "-i", str(batch_args.camera_posX), - "-j", str(batch_args.camera_posY), - "-k", str(batch_args.camera_posZ), - "-l", str(batch_args.camera_lookatX), - "-m", str(batch_args.camera_lookatY), - "-o", str(batch_args.camera_lookatZ), - "-v", str(batch_args.camera_fov), - "-f", batch_args.mjcf, - "-g", batch_args.benchmark_result_file_path + "--renderer", batch_args.renderer, + "--n_envs", str(batch_args.n_envs), + "--resX", str(batch_args.resX), + "--resY", str(batch_args.resY), + "--mjcf", batch_args.mjcf, + "--benchmark_result_file", batch_args.benchmark_result_file, + "--benchmark_config_file", batch_args.benchmark_config_file, ]) try: + # Read timeout from config process = subprocess.Popen(cmd) - return_code = process.wait() - if return_code != 0: - raise subprocess.CalledProcessError(return_code, cmd) + try: + # Hack to avoid omniverse runs to take forever. + timeout = batch_args.renderer_timeout + return_code = process.wait(timeout=timeout) + if return_code != 0: + raise subprocess.CalledProcessError(return_code, cmd) + except subprocess.TimeoutExpired: + process.kill() + process.wait() # Wait for the process to be killed + raise TimeoutError(f"Process did not complete within {timeout} seconds") except Exception as e: print(f"Error running benchmark: {str(e)}") - last_resolution_failed = True + if isinstance(e, subprocess.CalledProcessError): + last_resolution_failed = True # Write failed result without timing data - with open(batch_args.benchmark_result_file_path, 'a') as f: - f.write(f'failed,{batch_args.mjcf},{batch_args.renderer_name},{batch_args.rasterizer},{batch_args.n_envs},{batch_args.n_steps},{batch_args.resX},{batch_args.resY},{batch_args.camera_posX},{batch_args.camera_posY},{batch_args.camera_posZ},{batch_args.camera_lookatX},{batch_args.camera_lookatY},{batch_args.camera_lookatZ},{batch_args.camera_fov},,,,\n') - break + with open(batch_args.benchmark_result_file, 'a') as f: + f.write(f'failed,{batch_args.mjcf},{batch_args.renderer},{batch_args.rasterizer},{batch_args.n_envs},{batch_args.n_steps},{batch_args.resX},{batch_args.resY},{batch_args.camera_posX},{batch_args.camera_posY},{batch_args.camera_posZ},{batch_args.camera_lookatX},{batch_args.camera_lookatY},{batch_args.camera_lookatZ},{batch_args.camera_fov},,,,,,\n') + +def sort_and_dedupe_benchmark_result_file(benchmark_result_file): + # Sort by mjcf asc, renderer asc, rasterizer desc, n_envs asc, resX asc, resY asc, n_envs asc + df = pd.read_csv(benchmark_result_file) + df = df.sort_values( + by=['mjcf', 'renderer', 'rasterizer', 'resX', 'resY', 'n_envs', 'result'], + ascending=[True, True, False, True, True, True, False] + ) + + # Deduplicate by keeping the first occurrence of each unique combination of mjcf, renderer, rasterizer, resX, resY, n_envs + # Keep succeeded runs if there are multiple runs for the same combination. + df = df.drop_duplicates( + subset=['mjcf', 'renderer', 'rasterizer', 'resX', 'resY', 'n_envs'], + keep='first' + ) + df.to_csv(benchmark_result_file, index=False) def main(): - batch_benchmark_args = parse_args() - benchmark_result_file_path = create_benchmark_result_file(batch_benchmark_args.continue_from) + batch_benchmark_args = BatchBenchmarkArgs.parse_batch_benchmark_args() + benchmark_result_file = create_benchmark_result_file(batch_benchmark_args.continue_from) # Get list of previous runs if continuing from a previous run previous_runs = get_previous_runs(batch_benchmark_args.continue_from) # Run benchmark in batch - batch_args_dict = create_batch_args(benchmark_result_file_path, use_full_list=batch_benchmark_args.use_full_list) + batch_args_dict = create_batch_args(benchmark_result_file, config_file=batch_benchmark_args.config_file) run_batch_benchmark(batch_args_dict, previous_runs) + # Sort benchmark result file + sort_and_dedupe_benchmark_result_file(benchmark_result_file) + # Generate plots - plot_batch_benchmark(benchmark_result_file_path) + generate_report(benchmark_result_file, config_file=batch_benchmark_args.config_file) if __name__ == "__main__": main() diff --git a/examples/perf_benchmark/benchmark_configs.py b/examples/perf_benchmark/benchmark_configs.py new file mode 100644 index 000000000..290b487f3 --- /dev/null +++ b/examples/perf_benchmark/benchmark_configs.py @@ -0,0 +1,44 @@ +import os +import yaml + +class BenchmarkConfigs: + def __init__(self, config_file): + self.load_from_config_file(config_file) + + def load_from_config_file(self, config_file): + self.config_path = os.path.join(os.path.dirname(__file__), "configs", config_file) + if not os.path.exists(self.config_path): + raise FileNotFoundError(f"Config file not found: {self.config_path}") + with open(self.config_path, 'r') as f: + config = yaml.safe_load(f) + + self.mjcf_list = config['mjcf_list'] + self.rasterizer_list = config['rasterizer_list'] + self.batch_size_list = config['batch_size_list'] + self.resolution_list = config['resolution_list'] + self.gui = config.get('gui', False) + + # Get renderer list with defaults + self.renderer_list = config['renderer_list'] + + # Get raytracer config with defaults + raytracer_config = config.get('raytracer', {}) + self.max_bounce = raytracer_config.get('max_bounce', 2) + self.spp = raytracer_config.get('spp', 1) + + # Get simulation config with defaults + simulation_config = config.get('simulation', {}) + self.n_steps = simulation_config.get('n_steps', 1) + + # Get camera config with defaults + camera_config = config.get('camera', {}) + self.camera_pos = camera_config.get('position', [1.5, 0.5, 1.5]) + self.camera_lookat = camera_config.get('lookat', [0.0, 0.0, 0.5]) + self.camera_fov = camera_config.get('fov', 45.0) + + # Get display config with defaults + display_config = config.get('display', {}) + self.gui = display_config.get('gui', False) + + # Get comparison list with defaults + self.comparison_list = config.get('comparison_list', []) \ No newline at end of file diff --git a/examples/perf_benchmark/benchmark.py b/examples/perf_benchmark/benchmark_madrona.py similarity index 60% rename from examples/perf_benchmark/benchmark.py rename to examples/perf_benchmark/benchmark_madrona.py index 4508e8f85..90284ade9 100644 --- a/examples/perf_benchmark/benchmark.py +++ b/examples/perf_benchmark/benchmark_madrona.py @@ -5,6 +5,8 @@ import genesis as gs import torch from batch_benchmark import BenchmarkArgs +import benchmark_utils +from benchmark_profiler import BenchmarkProfiler def init_gs(benchmark_args): ########################## init ########################## @@ -33,9 +35,9 @@ def init_gs(benchmark_args): ) ########################## entities ########################## - #plane = scene.add_entity( - # gs.morphs.Plane(), - #) + plane = scene.add_entity( + gs.morphs.Plane(), + ) franka = scene.add_entity( gs.morphs.MJCF(file=benchmark_args.mjcf), visualize_contact=False, @@ -67,25 +69,6 @@ def init_gs(benchmark_args): scene.build(n_envs=benchmark_args.n_envs) return scene -def add_noise_to_all_cameras(scene): - for cam in scene.visualizer.cameras: - cam.set_pose( - pos=cam.pos_all_envs + torch.rand((cam.n_envs, 3), device=cam.pos_all_envs.device) * 0.002 - 0.001, - lookat=cam.lookat_all_envs + torch.rand((cam.n_envs, 3), device=cam.lookat_all_envs.device) * 0.002 - 0.001, - up=cam.up_all_envs + torch.rand((cam.n_envs, 3), device=cam.up_all_envs.device) * 0.002 - 0.001, - ) - -def fill_gpu_cache_with_random_data(): - # 100 MB of random data - dummy_data =torch.rand(100, 1024, 1024, device="cuda") - # Make some random data manipulation to the entire tensor - dummy_data = dummy_data + 1 - dummy_data = dummy_data * 2 - dummy_data = dummy_data - 1 - dummy_data = dummy_data / 2 - dummy_data = dummy_data.abs() - dummy_data = dummy_data.sqrt() - def run_benchmark(scene, benchmark_args): try: n_envs = benchmark_args.n_envs @@ -96,39 +79,38 @@ def run_benchmark(scene, benchmark_args): rgb, depth, _, _ = scene.render_all_cams() # fill gpu cache with random data - fill_gpu_cache_with_random_data() - - # timer - from time import time - start_time = time() + # benchmark_utils.fill_gpu_cache_with_random_data() + # Profiler + profiler = BenchmarkProfiler(n_steps, n_envs) for i in range(n_steps): - rgb, depth, _, _ = scene.render_all_cams(force_render=True) + profiler.on_simulation_start() + scene.step() + profiler.on_rendering_start() + rgb, depth, _, _ = scene.render_all_cams() + profiler.on_rendering_end() + + profiler.end() + profiler.print_summary() - end_time = time() - time_taken = end_time - start_time - time_taken_per_env = time_taken / n_envs - fps = n_envs * n_steps / time_taken - fps_per_env = n_steps / time_taken - - print(f'Time taken: {time_taken} seconds') - print(f'Time taken per env: {time_taken_per_env} seconds') - print(f'FPS: {fps}') - print(f'FPS per env: {fps_per_env}') - - # Ensure the directory exists - os.makedirs(os.path.dirname(benchmark_args.benchmark_result_file_path), exist_ok=True) + time_taken_gpu = profiler.get_total_rendering_gpu_time() + time_taken_cpu = profiler.get_total_rendering_cpu_time() + time_taken_per_env_gpu = profiler.get_total_rendering_gpu_time_per_env() + time_taken_per_env_cpu = profiler.get_total_rendering_cpu_time_per_env() + fps = profiler.get_rendering_fps() + fps_per_env = profiler.get_rendering_fps_per_env() # Append a line with all args and results in csv format - with open(benchmark_args.benchmark_result_file_path, 'a') as f: - f.write(f'succeeded,{benchmark_args.mjcf},{benchmark_args.renderer_name},{benchmark_args.rasterizer},{benchmark_args.n_envs},{benchmark_args.n_steps},{benchmark_args.resX},{benchmark_args.resY},{benchmark_args.camera_posX},{benchmark_args.camera_posY},{benchmark_args.camera_posZ},{benchmark_args.camera_lookatX},{benchmark_args.camera_lookatY},{benchmark_args.camera_lookatZ},{benchmark_args.camera_fov},{time_taken},{time_taken_per_env},{fps},{fps_per_env}\n') + os.makedirs(os.path.dirname(benchmark_args.benchmark_result_file), exist_ok=True) + with open(benchmark_args.benchmark_result_file, 'a') as f: + f.write(f'succeeded,{benchmark_args.mjcf},{benchmark_args.renderer},{benchmark_args.rasterizer},{benchmark_args.n_envs},{benchmark_args.n_steps},{benchmark_args.resX},{benchmark_args.resY},{benchmark_args.camera_posX},{benchmark_args.camera_posY},{benchmark_args.camera_posZ},{benchmark_args.camera_lookatX},{benchmark_args.camera_lookatY},{benchmark_args.camera_lookatZ},{benchmark_args.camera_fov},{time_taken_gpu},{time_taken_per_env_gpu},{time_taken_cpu},{time_taken_per_env_cpu},{fps},{fps_per_env}\n') except Exception as e: print(f"Error during benchmark: {e}") raise def main(): ######################## Parse arguments ####################### - benchmark_args = BenchmarkArgs.parse_args() + benchmark_args = BenchmarkArgs.parse_benchmark_args() ######################## Initialize scene ####################### scene = init_gs(benchmark_args) diff --git a/examples/perf_benchmark/benchmark_omni.py b/examples/perf_benchmark/benchmark_omni.py new file mode 100644 index 000000000..2522fcbd8 --- /dev/null +++ b/examples/perf_benchmark/benchmark_omni.py @@ -0,0 +1,403 @@ +# Before running, convert the assets like: +# python examples/perf_benchmark/process_xml.py \ +# --file ./genesis/assets/xml/franka_emika_panda/panda.xml + +######################## Parse arguments ####################### +# Create a struct to store the arguments +import argparse +from batch_benchmark import BenchmarkArgs +benchmark_args = BenchmarkArgs.parse_benchmark_args() + +######################## Launch app ####################### +from isaaclab.app import AppLauncher +app = AppLauncher( + headless=not benchmark_args.gui, + enable_cameras=True, + device="cuda:0", + rendering_mode="performance", +).app + +import carb +import isaaclab.sim as sim_utils +import isaacsim.core.utils.prims as prim_utils +import isaacsim.core.utils.stage as stage_utils +from isaaclab.sensors.camera import TiledCamera, TiledCameraCfg +from isaaclab.sim.converters import ( + MjcfConverter, MjcfConverterCfg, + UrdfConverter, UrdfConverterCfg +) +from isaaclab.utils.math import ( + create_rotation_matrix_from_view, + quat_from_matrix, +) +import omni.replicator.core as rep +from pxr import UsdLux, PhysxSchema + +from isaacsim.core.utils.extensions import enable_extension +enable_extension("isaacsim.asset.importer.mjcf") + +import os +import math +import numpy as np +import torch +import psutil +import pynvml +from scipy.spatial.transform import Rotation as R +from genesis.utils.image_exporter import FrameImageExporter +import benchmark_utils +from benchmark_profiler import BenchmarkProfiler + + +def load_mjcf(mjcf_path): + return MjcfConverter( + MjcfConverterCfg( + asset_path=mjcf_path, + fix_base=True, + force_usd_conversion=True + ) + ).usd_path + +def load_urdf(urdf_path): + return UrdfConverter( + UrdfConverterCfg( + asset_path=urdf_path, + joint_drive=None, + fix_base=True, + force_usd_conversion=True + ) + ).usd_path + +def apply_benchmark_carb_settings(print_changes=False): + settings = carb.settings.get_settings() + # Print settings before applying the settings + if print_changes: + print("Before settings:") + print("Render mode:", settings.get("/rtx/rendermode")) + print("Sample per pixel:", settings.get("/rtx/pathtracing/spp")) + print("Total spp:", settings.get("/rtx/pathtracing/totalSpp")) + print("Clamp spp:", settings.get("/rtx/pathtracing/clampSpp")) + print("Max bounce:", settings.get("/rtx/pathtracing/maxBounces")) + print("Optix Denoiser", settings.get("/rtx/pathtracing/optixDenoiser/enabled")) + print("Shadows", settings.get("/rtx/shadows/enabled")) + print("dlss/enabled:", settings.get("/rtx/post/dlss/enabled")) + print("dlss/auto:", settings.get("/rtx/post/dlss/auto")) + print("upscaling/enabled:", settings.get("/rtx/post/upscaling/enabled")) + print("aa/denoiser/enabled:", settings.get("/rtx/post/aa/denoiser/enabled")) + print("aa/taa/enabled:", settings.get("/rtx/post/aa/taa/enabled")) + print("motionBlur/enabled:", settings.get("/rtx/post/motionBlur/enabled")) + print("dof/enabled:", settings.get("/rtx/post/dof/enabled")) + print("bloom/enabled:", settings.get("/rtx/post/bloom/enabled")) + print("tonemap/enabled:", settings.get("/rtx/post/tonemap/enabled")) + print("exposure/enabled:", settings.get("/rtx/post/exposure/enabled")) + print("vsync:", settings.get("/app/window/vsync")) + + # Options: https://docs.omniverse.nvidia.com/materials-and-rendering/latest/rtx-renderer_pt.html + if benchmark_args.rasterizer: + # carb_settings.set("/rtx/rendermode", "Hydra Storm") + settings.set("/rtx/rendermode", "RayTracedLighting") + else: + settings.set("/rtx/rendermode", "PathTracing") + settings.set("/rtx/shadows/enabled", False) + + # Path tracing settings + settings.set("/rtx/pathtracing/spp", benchmark_args.spp) + settings.set("/rtx/pathtracing/totalSpp", benchmark_args.spp) + settings.set("/rtx/pathtracing/clampSpp", benchmark_args.spp) + settings.set("/rtx/pathtracing/maxBounces", benchmark_args.max_bounce) + settings.set("/rtx/pathtracing/optixDenoiser/enabled", False) + settings.set("/rtx/pathtracing/adaptiveSampling/enabled", False) + + # Disable DLSS & upscaling + settings.set("/rtx-transient/dlssg/enabled", False) + settings.set("/rtx/post/dlss/enabled", False) + settings.set("/rtx/post/dlss/auto", False) + settings.set("/rtx/post/upscaling/enabled", False) + + # Disable post-processing + settings.set("/rtx/post/aa/denoiser/enabled", False) + settings.set("/rtx/post/aa/taa/enabled", False) + settings.set("/rtx/post/motionBlur/enabled", False) + settings.set("/rtx/post/dof/enabled", False) + settings.set("/rtx/post/bloom/enabled", False) + settings.set("/rtx/post/tonemap/enabled", False) + settings.set("/rtx/post/exposure/enabled", False) + + # Disable VSync + settings.set("/app/window/vsync", False) + + # Print settings after applying the settings + if print_changes: + print("After settings:") + print("Render mode:", settings.get("/rtx/rendermode")) + print("Sample per pixel:", settings.get("/rtx/pathtracing/spp")) + print("Total spp:", settings.get("/rtx/pathtracing/totalSpp")) + print("Clamp spp:", settings.get("/rtx/pathtracing/clampSpp")) + print("Max bounce:", settings.get("/rtx/pathtracing/maxBounces")) + print("Optix Denoiser", settings.get("/rtx/pathtracing/optixDenoiser/enabled")) + print("Shadows", settings.get("/rtx/shadows/enabled")) + print("dlss/enabled:", settings.get("/rtx/post/dlss/enabled")) + print("dlss/auto:", settings.get("/rtx/post/dlss/auto")) + print("upscaling/enabled:", settings.get("/rtx/post/upscaling/enabled")) + print("aa/denoiser/enabled:", settings.get("/rtx/post/aa/denoiser/enabled")) + print("aa/taa/enabled:", settings.get("/rtx/post/aa/taa/enabled")) + print("motionBlur/enabled:", settings.get("/rtx/post/motionBlur/enabled")) + print("dof/enabled:", settings.get("/rtx/post/dof/enabled")) + print("bloom/enabled:", settings.get("/rtx/post/bloom/enabled")) + print("tonemap/enabled:", settings.get("/rtx/post/tonemap/enabled")) + print("exposure/enabled:", settings.get("/rtx/post/exposure/enabled")) + print("vsync:", settings.get("/app/window/vsync")) + +def init_isaac(benchmark_args): + ########################## init ########################## + stage_utils.create_new_stage() + stage = stage_utils.get_current_stage() + scene = sim_utils.SimulationContext( + sim_utils.SimulationCfg(device="cuda:0", dt=0.01,) + ) + cam_eye = ( + benchmark_args.camera_posX, + benchmark_args.camera_posY, + benchmark_args.camera_posZ + ) + cam_target = ( + benchmark_args.camera_lookatX, + benchmark_args.camera_lookatY, + benchmark_args.camera_lookatZ + ) + scene.set_camera_view(eye=cam_eye, target=cam_target) + cam_eye = torch.Tensor(cam_eye).reshape(-1, 3) + cam_target = torch.Tensor(cam_target).reshape(-1, 3) + + physxSceneAPI = PhysxSchema.PhysxSceneAPI.Apply(stage.GetPrimAtPath("/physicsScene")) + physxSceneAPI.CreateGpuTempBufferCapacityAttr(16 * 1024 * 1024 * 2) + physxSceneAPI.CreateGpuHeapCapacityAttr(64 * 1024 * 1024 * 2) + physxSceneAPI.CreateGpuMaxRigidPatchCountAttr(8388608) + physxSceneAPI.CreateGpuMaxRigidContactCountAttr(16777216) + + rep.settings.set_render_rtx_realtime() + apply_benchmark_carb_settings() + + ########################## entities ########################## + spacing_row = np.array((2.0, -6.0)) + spacing_col = np.array((-6.0, -2.0)) + n_cols = int(math.sqrt(benchmark_args.n_envs)) + offsets = [] + for i in range(benchmark_args.n_envs): + col = i % n_cols + row = i // n_cols + offset_XY = (row * spacing_row + col * spacing_col) + offset = np.array([*offset_XY, 0.0]) + offsets.append(offset) + prim_utils.create_prim( + f"/World/Origin{i:05d}", "Xform", translation=offset + ) + offsets = np.array(offsets) + + # load objects + plane_path = os.path.abspath(os.path.join("genesis/assets", "urdf/plane_usd/plane.usd")) + print(plane_path) + plane_cfg = sim_utils.UsdFileCfg(usd_path=plane_path) + plane_cfg.func("/World/Origin.*/plane", plane_cfg) + + robot_name = f"{os.path.splitext(benchmark_args.mjcf)[0]}_new.xml" + robot_path = load_mjcf(os.path.join("genesis/assets", robot_name)) + print("Robot asset:", robot_path) + robot_cfg = sim_utils.UsdFileCfg(usd_path=robot_path) + robot_cfg.func("/World/Origin.*/robot", robot_cfg) + + cam_fov = math.radians(benchmark_args.camera_fov) + cam_hapert = 20.955 + cam_fol = cam_hapert / (2 * math.tan(cam_fov / 2)) + cam_quat = quat_from_matrix( + create_rotation_matrix_from_view( + cam_target, cam_eye, stage_utils.get_stage_up_axis() + ) @ R.from_euler('z', 180, degrees=True).as_matrix() + ) + cam_eye = tuple(cam_eye.detach().cpu().squeeze().numpy()) + cam_quat = tuple(cam_quat.detach().cpu().squeeze().numpy()) + + print(cam_eye, cam_quat) + print(type(cam_eye), type(cam_quat)) + + cam_0 = TiledCamera( + TiledCameraCfg( + height=benchmark_args.resX, + width=benchmark_args.resY, + offset=TiledCameraCfg.OffsetCfg( + pos=cam_eye, + rot=cam_quat, + convention="ros" + ), + prim_path="/World/Origin.*/camera", + update_period=0, + data_types=["rgb", "depth"], + spawn=sim_utils.PinholeCameraCfg( + focal_length=cam_fol, + ), + ) + ) + + ########################## cameras ########################## + dir_light_pos = torch.Tensor([[0.0, 0.0, 1.5]]) + dir_light_quat = quat_from_matrix( + create_rotation_matrix_from_view( + dir_light_pos, + torch.Tensor([[1.0, 1.0, -2.0]]), + stage_utils.get_stage_up_axis())) + dir_light_pos = tuple(dir_light_pos.detach().cpu().squeeze().numpy()) + dir_light_quat = tuple(dir_light_quat.detach().cpu().squeeze().numpy()) + dir_light_cfg = sim_utils.DistantLightCfg(intensity=500.0, angle=45.0) + dir_light_prim = dir_light_cfg.func( + "/World/DirectionalLight", dir_light_cfg, + translation=dir_light_pos, + orientation=dir_light_quat) + + cone_light_pos = torch.Tensor([[4, -4, 4]]) + cone_light_quat = quat_from_matrix( + create_rotation_matrix_from_view( + cone_light_pos, + torch.Tensor([[-1, 1, -1]]), + stage_utils.get_stage_up_axis())) + cone_light_cfg = sim_utils.SphereLightCfg(intensity=1000.0, radius=0.1) + cone_light_pos = tuple(cone_light_pos.detach().cpu().squeeze().numpy()) + cone_light_quat = tuple(cone_light_quat.detach().cpu().squeeze().numpy()) + cone_light_prim = cone_light_cfg.func( + "/World/ConeLight", cone_light_cfg, + translation=cone_light_pos, + orientation=cone_light_quat) + cone_light = UsdLux.LightAPI(cone_light_prim) + UsdLux.ShapingAPI.Apply(cone_light_prim) + cone_light_prim.SetTypeName("SphereLight") + + return scene, cam_0 + +def get_utilization_percentages(reset: bool = False, max_values: list[float] = [0.0, 0.0, 0.0, 0.0]) -> list[float]: + """Get the maximum CPU, RAM, GPU utilization (processing), and + GPU memory usage percentages since the last time reset was true.""" + if reset: + max_values[:] = [0, 0, 0, 0] # Reset the max values + + # CPU utilization + cpu_usage = psutil.cpu_percent(interval=0.1) + max_values[0] = max(max_values[0], cpu_usage) + + # RAM utilization + memory_info = psutil.virtual_memory() + ram_usage = memory_info.percent + max_values[1] = max(max_values[1], ram_usage) + + # GPU utilization using pynvml + if torch.cuda.is_available(): + pynvml.nvmlInit() # Initialize NVML + for i in range(torch.cuda.device_count()): + handle = pynvml.nvmlDeviceGetHandleByIndex(i) + + # GPU Utilization + gpu_utilization = pynvml.nvmlDeviceGetUtilizationRates(handle) + gpu_processing_utilization_percent = gpu_utilization.gpu # GPU core utilization + max_values[2] = max(max_values[2], gpu_processing_utilization_percent) + + # GPU Memory Usage + memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle) + gpu_memory_total = memory_info.total + gpu_memory_used = memory_info.used + gpu_memory_utilization_percent = (gpu_memory_used / gpu_memory_total) * 100 + max_values[3] = max(max_values[3], gpu_memory_utilization_percent) + + pynvml.nvmlShutdown() # Shutdown NVML after usage + else: + gpu_processing_utilization_percent = None + gpu_memory_utilization_percent = None + return max_values + +def fill_gpu_cache_with_random_data(): + # 100 MB of random data + dummy_data = torch.rand(100, 1024, 1024, device="cuda") + # Make some random data manipulation to the entire tensor + dummy_data = dummy_data.sqrt() + +def run_benchmark(scene, camera, benchmark_args): + try: + n_envs = benchmark_args.n_envs + n_steps = benchmark_args.n_steps + + # warmup + system_utilization_analytics = get_utilization_percentages() + print( + f"| CPU:{system_utilization_analytics[0]}% | " + f"RAM:{system_utilization_analytics[1]}% | " + f"GPU Compute:{system_utilization_analytics[2]}% | " + f"GPU Memory: {system_utilization_analytics[3]:.2f}% |" + ) + + scene.reset() + dt = scene.get_physics_dt() + for i in range(3): + scene.step() + camera.update(dt) + _ = camera.data + print("Env and steps:", n_envs, n_steps) + + if benchmark_args.gui: + while True: + scene.step() + + # fill gpu cache with random data + # benchmark_utils.fill_gpu_cache_with_random_data() + + # Create an image exporter + image_dir = os.path.splitext(benchmark_args.benchmark_result_file)[0] + exporter = FrameImageExporter(image_dir) + + # Profiler + profiler = BenchmarkProfiler(n_steps, n_envs) + for i in range(n_steps): + profiler.on_simulation_start() + scene.step(render=False) + profiler.on_rendering_start() + scene.render() + # camera.update(dt, force_recompute=True) + # rgb_tiles = camera.data.output.get("rgb") + # depth_tiles = camera.data.output.get("depth") + profiler.on_rendering_end() + # exporter.export_frame_single_cam(i, 0, rgb=rgb_tiles, depth=depth_tiles) + + profiler.end() + profiler.print_summary() + + time_taken_gpu = profiler.get_total_rendering_gpu_time() + time_taken_cpu = profiler.get_total_rendering_cpu_time() + time_taken_per_env_gpu = profiler.get_total_rendering_gpu_time_per_env() + time_taken_per_env_cpu = profiler.get_total_rendering_cpu_time_per_env() + fps = profiler.get_rendering_fps() + fps_per_env = profiler.get_rendering_fps_per_env() + + print( + f"| CPU:{system_utilization_analytics[0]}% | " + f"RAM:{system_utilization_analytics[1]}% | " + f"GPU Compute:{system_utilization_analytics[2]}% | " + f" GPU Memory: {system_utilization_analytics[3]:.2f}% |" + ) + + # Append a line with all args and results in csv format + os.makedirs(os.path.dirname(benchmark_args.benchmark_result_file), exist_ok=True) + with open(benchmark_args.benchmark_result_file, 'a') as f: + f.write(f'succeeded,{benchmark_args.mjcf},{benchmark_args.renderer},{benchmark_args.rasterizer},{benchmark_args.n_envs},{benchmark_args.n_steps},{benchmark_args.resX},{benchmark_args.resY},{benchmark_args.camera_posX},{benchmark_args.camera_posY},{benchmark_args.camera_posZ},{benchmark_args.camera_lookatX},{benchmark_args.camera_lookatY},{benchmark_args.camera_lookatZ},{benchmark_args.camera_fov},{time_taken_gpu},{time_taken_per_env_gpu},{time_taken_cpu},{time_taken_per_env_cpu},{fps},{fps_per_env}\n') + + print("App closing..") + # app.close() + print("App closed!") + + except Exception as e: + print(f"Error during benchmark: {e}") + raise + +def main(): + ######################## Initialize scene ####################### + scene, camera = init_isaac(benchmark_args) + + ######################## Run benchmark ####################### + run_benchmark(scene, camera, benchmark_args) + +if __name__ == "__main__": + main() diff --git a/examples/perf_benchmark/benchmark_profiler.py b/examples/perf_benchmark/benchmark_profiler.py new file mode 100644 index 000000000..6a245cd82 --- /dev/null +++ b/examples/perf_benchmark/benchmark_profiler.py @@ -0,0 +1,223 @@ +import torch +import numpy as np +import time + +class BenchmarkProfiler: + def __init__(self, n_steps, n_envs): + self.reset(n_steps) + self.n_envs = n_envs + + def reset(self, n_steps): + self.n_steps = n_steps + # Create arrays of CUDA events for each step + # Each step has 3 events: simulation_start, render_start, render_end + self.events = [] + # CPU timing arrays + self.cpu_times = [] + for _ in range(n_steps): + step_events = { + 'simulation_start': torch.cuda.Event(enable_timing=True), + 'render_start': torch.cuda.Event(enable_timing=True), + 'render_end': torch.cuda.Event(enable_timing=True) + } + self.events.append(step_events) + # Initialize CPU timing structure for each step + self.cpu_times.append({ + 'simulation_start': 0.0, + 'render_start': 0.0, + 'render_end': 0.0 + }) + self.current_step = 0 + + # Synchronize all previous GPU events + torch.cuda.synchronize() + self.is_synchronized = False + ######################## Profiling Events ####################### + def on_simulation_start(self): + """Record the start of simulation for current step""" + if self.current_step >= self.n_steps: + raise Exception("All steps have been profiled") + self.events[self.current_step]['simulation_start'].record() + self.cpu_times[self.current_step]['simulation_start'] = time.time() + + def on_rendering_start(self): + """Record the start of rendering for current step""" + if self.current_step >= self.n_steps: + raise Exception("All steps have been profiled") + self.events[self.current_step]['render_start'].record() + self.cpu_times[self.current_step]['render_start'] = time.time() + + def on_rendering_end(self): + """Record the end of rendering for current step""" + if self.current_step >= self.n_steps: + raise Exception("All steps have been profiled") + self.events[self.current_step]['render_end'].record() + self.cpu_times[self.current_step]['render_end'] = time.time() + self.current_step += 1 + + def end(self): + """End the profiler""" + self._synchronize() + + def _synchronize(self): + """Synchronize GPU to ensure all events are recorded""" + torch.cuda.synchronize() + self.is_synchronized = True + + ######################## Simulation Performance ####################### + def get_total_simulation_gpu_time(self): + """Calculate total simulation GPU time across all steps in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + total_time = 0.0 + for step in range(self.current_step): + events = self.events[step] + total_time += events['simulation_start'].elapsed_time(events['render_start']) + return total_time / 1000.0 + + def get_total_simulation_cpu_time(self): + """Calculate total simulation CPU time across all steps in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + total_time = 0.0 + for step in range(self.current_step): + cpu_times = self.cpu_times[step] + total_time += (cpu_times['render_start'] - cpu_times['simulation_start']) * 1000 # Convert to ms + return total_time / 1000.0 + + def get_simulation_fps(self): + """Get the FPS for the current step""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.n_envs * self.n_steps / self.get_total_simulation_gpu_time() + + def get_simulation_fps_per_env(self): + """Get the FPS per env for the current step""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.n_steps / self.get_total_simulation_gpu_time() + + ######################## Rendering Performance ####################### + def get_total_rendering_gpu_time(self): + """Calculate total rendering GPU time across all steps in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + total_time = 0.0 + for step in range(self.current_step): + events = self.events[step] + total_time += events['render_start'].elapsed_time(events['render_end']) + return total_time / 1000.0 + + def get_total_rendering_cpu_time(self): + """Calculate total rendering CPU time across all steps in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + total_time = 0.0 + for step in range(self.current_step): + cpu_times = self.cpu_times[step] + total_time += (cpu_times['render_end'] - cpu_times['render_start']) * 1000 # Convert to ms + return total_time / 1000.0 + + def get_rendering_fps(self): + """Get the FPS for the current step""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.n_envs * self.n_steps / self.get_total_rendering_gpu_time() + + def get_rendering_fps_per_env(self): + """Get the FPS per env for the current step""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.n_steps / self.get_total_rendering_gpu_time() + + def get_total_rendering_gpu_time_per_env(self): + """Get the total rendering GPU time per env for the current step in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.get_total_rendering_gpu_time() / self.n_envs + + def get_total_rendering_cpu_time_per_env(self): + """Get the total rendering CPU time per env for the current step in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.get_total_rendering_cpu_time() / self.n_envs + + ######################## Total Performance ####################### + def get_total_gpu_time(self): + """Calculate total GPU time across all steps in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + total_time = 0.0 + for step in range(self.current_step): + events = self.events[step] + total_time += events['simulation_start'].elapsed_time(events['render_end']) + return total_time / 1000.0 + + def get_total_cpu_time(self): + """Calculate total CPU time across all steps in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + total_time = 0.0 + for step in range(self.current_step): + cpu_times = self.cpu_times[step] + total_time += (cpu_times['render_end'] - cpu_times['simulation_start']) * 1000 # Convert to ms + return total_time / 1000.0 + + def get_total_gpu_time_per_env(self): + """Get the total GPU time per env for the current step in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.get_total_gpu_time() / self.n_envs + + def get_total_cpu_time_per_env(self): + """Get the total CPU time per env for the current step in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + return self.get_total_cpu_time() / self.n_envs + + def get_step_times(self, step_idx): + """Get detailed timing for a specific step in seconds""" + if not self.is_synchronized: + raise Exception("GPU profiler is not synchronized") + if step_idx >= self.current_step: + raise Exception(f"Step {step_idx} has not been profiled yet") + + events = self.events[step_idx] + cpu_times = self.cpu_times[step_idx] + + return { + 'simulation': { + 'gpu_ms': events['simulation_start'].elapsed_time(events['render_start']), + 'cpu_ms': (cpu_times['render_start'] - cpu_times['simulation_start']) * 1000 + }, + 'rendering': { + 'gpu_ms': events['render_start'].elapsed_time(events['render_end']), + 'cpu_ms': (cpu_times['render_end'] - cpu_times['render_start']) * 1000 + }, + 'total': { + 'gpu_ms': events['simulation_start'].elapsed_time(events['render_end']), + 'cpu_ms': (cpu_times['render_end'] - cpu_times['simulation_start']) * 1000 + } + } + + ######################## Print Summary ####################### + def print_rendering_summary(self): + """Print a summary of the profiler""" + print(f"Total rendering GPU time: {self.get_total_rendering_gpu_time()} seconds") + print(f"Total rendering CPU time: {self.get_total_rendering_cpu_time()} seconds") + print(f"Total rendering GPU time per env: {self.get_total_rendering_gpu_time_per_env()} seconds") + print(f"Total rendering CPU time per env: {self.get_total_rendering_cpu_time_per_env()} seconds") + print(f"Rendering FPS: {self.get_rendering_fps()}") + print(f"Rendering FPS per env: {self.get_rendering_fps_per_env()}") + + def print_simulation_summary(self): + """Print a summary of the profiler""" + print(f"Total simulation GPU time: {self.get_total_simulation_gpu_time()} seconds") + print(f"Total simulation CPU time: {self.get_total_simulation_cpu_time()} seconds") + print(f"Simulation FPS: {self.get_simulation_fps()}") + print(f"Simulation FPS per env: {self.get_simulation_fps_per_env()}") + + def print_summary(self): + """Print a summary of the profiler""" + self.print_rendering_summary() + self.print_simulation_summary() diff --git a/examples/perf_benchmark/benchmark_pyrender.py b/examples/perf_benchmark/benchmark_pyrender.py index e0301c4be..63036916c 100644 --- a/examples/perf_benchmark/benchmark_pyrender.py +++ b/examples/perf_benchmark/benchmark_pyrender.py @@ -5,6 +5,8 @@ import genesis as gs import torch from batch_benchmark import BenchmarkArgs +import benchmark_utils +from benchmark_profiler import BenchmarkProfiler def init_gs(benchmark_args): ########################## init ########################## @@ -55,12 +57,6 @@ def init_gs(benchmark_args): scene.build() return scene -def fill_gpu_cache_with_random_data(): - # 100 MB of random data - dummy_data =torch.rand(100, 1024, 1024, device="cuda") - # Make some random data manipulation to the entire tensor - dummy_data = dummy_data.sqrt() - def run_benchmark(scene, benchmark_args): try: n_envs = benchmark_args.n_envs @@ -71,40 +67,38 @@ def run_benchmark(scene, benchmark_args): rgb, depth, _, _ = scene.visualizer.cameras[0].render(rgb=True, depth=True) # fill gpu cache with random data - fill_gpu_cache_with_random_data() - - # timer - from time import time - start_time = time() + # benchmark_utils.fill_gpu_cache_with_random_data() + # Profiler + profiler = BenchmarkProfiler(n_steps, n_envs) for i in range(n_steps): - for i_env in range(n_envs): - rgb, depth, _, _ = scene.visualizer.cameras[0].render(rgb=True, depth=True) + profiler.on_simulation_start() + scene.step() + profiler.on_rendering_start() + rgb, depth, _, _ = scene.visualizer.cameras[0].render(rgb=True, depth=True) + profiler.on_rendering_end() + + profiler.end() + profiler.print_summary() - end_time = time() - time_taken = end_time - start_time - time_taken_per_env = time_taken / n_envs - fps = n_envs * n_steps / time_taken - fps_per_env = n_steps / time_taken - - print(f'Time taken: {time_taken} seconds') - print(f'Time taken per env: {time_taken_per_env} seconds') - print(f'FPS: {fps}') - print(f'FPS per env: {fps_per_env}') - - # Ensure the directory exists - os.makedirs(os.path.dirname(benchmark_args.benchmark_result_file_path), exist_ok=True) + time_taken_gpu = profiler.get_total_rendering_gpu_time() + time_taken_cpu = profiler.get_total_rendering_cpu_time() + time_taken_per_env_gpu = profiler.get_total_rendering_gpu_time_per_env() + time_taken_per_env_cpu = profiler.get_total_rendering_cpu_time_per_env() + fps = profiler.get_rendering_fps() + fps_per_env = profiler.get_rendering_fps_per_env() # Append a line with all args and results in csv format - with open(benchmark_args.benchmark_result_file_path, 'a') as f: - f.write(f'succeeded,{benchmark_args.mjcf},{benchmark_args.renderer_name},{benchmark_args.rasterizer},{benchmark_args.n_envs},{benchmark_args.n_steps},{benchmark_args.resX},{benchmark_args.resY},{benchmark_args.camera_posX},{benchmark_args.camera_posY},{benchmark_args.camera_posZ},{benchmark_args.camera_lookatX},{benchmark_args.camera_lookatY},{benchmark_args.camera_lookatZ},{benchmark_args.camera_fov},{time_taken},{time_taken_per_env},{fps},{fps_per_env}\n') + os.makedirs(os.path.dirname(benchmark_args.benchmark_result_file), exist_ok=True) + with open(benchmark_args.benchmark_result_file, 'a') as f: + f.write(f'succeeded,{benchmark_args.mjcf},{benchmark_args.renderer},{benchmark_args.rasterizer},{benchmark_args.n_envs},{benchmark_args.n_steps},{benchmark_args.resX},{benchmark_args.resY},{benchmark_args.camera_posX},{benchmark_args.camera_posY},{benchmark_args.camera_posZ},{benchmark_args.camera_lookatX},{benchmark_args.camera_lookatY},{benchmark_args.camera_lookatZ},{benchmark_args.camera_fov},{time_taken_gpu},{time_taken_per_env_gpu},{time_taken_cpu},{time_taken_per_env_cpu},{fps},{fps_per_env}\n') except Exception as e: print(f"Error during benchmark: {e}") raise def main(): ######################## Parse arguments ####################### - benchmark_args = BenchmarkArgs.parse_args() + benchmark_args = BenchmarkArgs.parse_benchmark_args() ######################## Initialize scene ####################### scene = init_gs(benchmark_args) diff --git a/examples/perf_benchmark/benchmark_plotter.py b/examples/perf_benchmark/benchmark_report_generator.py similarity index 58% rename from examples/perf_benchmark/benchmark_plotter.py rename to examples/perf_benchmark/benchmark_report_generator.py index 2aef8626a..95c9317c8 100644 --- a/examples/perf_benchmark/benchmark_plotter.py +++ b/examples/perf_benchmark/benchmark_report_generator.py @@ -5,10 +5,111 @@ import pandas as pd import matplotlib.pyplot as plt -from matplotlib.ticker import ScalarFormatter import numpy as np +from benchmark_configs import BenchmarkConfigs -def generatePlotHtml(plots_dir): +def generate_table_html(plot_table_data): + # Add CSS styling for the table + html_table = """ + + \n""" + + # Get all batch sizes and renderers across all plots + all_batch_sizes = [] + all_renderers = [] + for renderer, renderer_data in plot_table_data.items(): + all_renderers.append(renderer) + for batch_size in renderer_data.keys(): + if batch_size not in all_batch_sizes: + all_batch_sizes.append(batch_size) + + sorted_batch_sizes = sorted(all_batch_sizes) + + # Header row with batch sizes + html_table += "" + for batch_size in sorted_batch_sizes: + html_table += f"" + html_table += "\n" + + # Data rows + renderer_data = [] + for renderer in all_renderers: + html_table += f"" + row_data = [] + for batch_size in sorted_batch_sizes: + if renderer not in plot_table_data or batch_size not in plot_table_data[renderer]: + row_data.append(None) + html_table += "" + else: + fps = plot_table_data[renderer][batch_size] + row_data.append(fps) + html_table += f"" + html_table += "\n" + renderer_data.append(row_data) + + # Add speedup row for every two renderers + if len(renderer_data) % 2 == 0: + html_table += f"" + last_renderer_data = [None, None] + for i in range(len(sorted_batch_sizes)): + if (renderer_data[-2][i] is not None and + renderer_data[-1][i] is not None): + ratio = renderer_data[-1][i] / renderer_data[-2][i] + last_renderer_data[-2] = renderer_data[-2][i] + last_renderer_data[-1] = renderer_data[-1][i] + html_table += f"" + elif (renderer_data[-2][i] is not None and + renderer_data[-1][i] is None): + ratio = last_renderer_data[-1] / renderer_data[-2][i] + last_renderer_data[-2] = renderer_data[-2][i] + html_table += f"" + elif (renderer_data[-2][i] is None and + renderer_data[-1][i] is not None): + ratio = renderer_data[-1][i] / last_renderer_data[-2] + last_renderer_data[-1] = renderer_data[-1][i] + html_table += f"" + else: + html_table += "" + html_table += "\n" + + html_table += "
Renderer{batch_size}
{html.escape(renderer)}N/A{fps:.1f}
Speedup{ratio:.1f}x{ratio:.1f}x{ratio:.1f}xN/A
" + return html_table + +def generatePlotHtml(plots_dir, all_plot_table_data): #Generate an html page to display all the plots # Get all plot files @@ -77,6 +178,7 @@ def get_resolution_dims(res): html_content += f"

Resolution: {resolution}

\n" html_content += "
\n" for plot in comparison_plot_files[resolution]: + html_content += generate_table_html(all_plot_table_data[plot]) html_content += f"{html.escape(os.path.basename(plot))}
\n" html_content += "
\n" html_content += "\n" @@ -101,12 +203,11 @@ def get_resolution_dims(res): with open(f"{plots_dir}/index.html", 'w') as f: f.write(html_content) -def get_comparison_data_set(): - return [ - (("pyrender", True), ("batch_renderer", True), ("batch_renderer", False)), - ] +def get_comparison_data_list(config_file): + config = BenchmarkConfigs(config_file) + return config.comparison_list -def plot_batch_benchmark(data_file_path, width=20, height=15): +def generate_report(data_file_path, config_file, width=20, height=15): # Load the log file as csv # For each mjcf, rasterizer (rasterizer or not(=raytracer)), generate a plot image and save it to a directory. # The plot image has batch size on the x-axis and fps on the y-axis. @@ -127,12 +228,14 @@ def plot_batch_benchmark(data_file_path, width=20, height=15): generate_individual_plots(df, plots_dir, width, height) # Generate difference plots for specific aspect ratios + all_plot_table_data = dict() for aspect_ratio in ["1:1", "4:3", "16:9"]: - for renderer_info_array in get_comparison_data_set(): - generate_comparison_plots(df, plots_dir, width, height, renderer_info_array, aspect_ratio=aspect_ratio) + for comparison_list in get_comparison_data_list(config_file): + plot_table_data = generate_comparison_plots(df, plots_dir, width, height, comparison_list, aspect_ratio) + all_plot_table_data.update(plot_table_data) # Generate an html page to display all the plots - generatePlotHtml(plots_dir) + generatePlotHtml(plots_dir, all_plot_table_data) def generate_individual_plots(df, plots_dir, width, height): # Get unique combinations of mjcf and rasterizer @@ -158,7 +261,7 @@ def generate_individual_plots(df, plots_dir, width, height): # Create bar chart x = np.arange(len(all_batch_sizes)) - bar_width = 0.8 / len(resolutions) # Adjust bar width based on number of resolutions + bar_width = 0.8 / len(resolutions) # Plot bars for each resolution for i, (resolution, res_data) in enumerate(resolutions): @@ -197,9 +300,9 @@ def generate_individual_plots(df, plots_dir, width, height): plt.savefig(plot_filename) plt.close() -def generate_comparison_plots(df, plots_dir, width, height, renderer_info_array, aspect_ratio=None): - renderer_name_array = [renderer_info[0] for renderer_info in renderer_info_array] - renderer_is_rasterizer_array = [renderer_info[1] for renderer_info in renderer_info_array] +def generate_comparison_plots(df, plots_dir, width, height, comparison_list, aspect_ratio=None): + renderer_array = [comparison_info['renderer'] for comparison_info in comparison_list] + renderer_is_rasterizer_array = [comparison_info['rasterizer'] for comparison_info in comparison_list] rasterizer_str_array = ['rasterizer' if renderer_is_rasterizer else 'raytracer' for renderer_is_rasterizer in renderer_is_rasterizer_array] # Filter by aspect ratio if specified @@ -212,6 +315,8 @@ def generate_comparison_plots(df, plots_dir, width, height, renderer_info_array, df = df[df['resX'] * 9 == df['resY'] * 16] else: raise ValueError(f"Unsupported aspect ratio: {aspect_ratio}") + + plot_table_data = dict() plt.clf() plt.cla() @@ -221,8 +326,13 @@ def generate_comparison_plots(df, plots_dir, width, height, renderer_info_array, mjcf_data = df[df['mjcf'] == mjcf] # Get resolutions available for both renderer_1 and renderer_2 - renderer_resolutions = [set(zip(mjcf_data[(mjcf_data['renderer'] == renderer_name) & (mjcf_data['rasterizer'] == renderer_is_rasterizer)]['resX'], - mjcf_data[(mjcf_data['renderer'] == renderer_name) & (mjcf_data['rasterizer'] == renderer_is_rasterizer)]['resY'])) for renderer_name, renderer_is_rasterizer in renderer_info_array] + for comparison in comparison_list: + renderer = comparison['renderer'] + renderer_is_rasterizer = comparison['rasterizer'] + renderer_resolutions = [set(zip(mjcf_data[(mjcf_data['renderer'] == renderer) & (mjcf_data['rasterizer'] == renderer_is_rasterizer)]['resX'], + mjcf_data[(mjcf_data['renderer'] == renderer) & (mjcf_data['rasterizer'] == renderer_is_rasterizer)]['resY']))] + print(f"renderer: {renderer}, renderer_is_rasterizer: {renderer_is_rasterizer}") + print(f"renderer_resolutions: {renderer_resolutions}") common_res = set.intersection(*renderer_resolutions) # continue if there is no data @@ -234,29 +344,21 @@ def generate_comparison_plots(df, plots_dir, width, height, renderer_info_array, for resX, resY in sorted(common_res, key=lambda x: x[0] * x[1]): plt.figure(figsize=(width, height)) renderer_data_array = [] - for renderer_name, renderer_is_rasterizer in renderer_info_array: - renderer_data = mjcf_data[(mjcf_data['renderer'] == renderer_name) & - (mjcf_data['rasterizer'] == renderer_is_rasterizer) & - (mjcf_data['resX'] == resX) & - (mjcf_data['resY'] == resY)] + for comparison in comparison_list: + renderer = comparison['renderer'] + renderer_is_rasterizer = comparison['rasterizer'] + renderer_data = mjcf_data[(mjcf_data['result'] == 'succeeded') & + (mjcf_data['renderer'] == renderer) & + (mjcf_data['rasterizer'] == renderer_is_rasterizer) & + (mjcf_data['resX'] == resX) & + (mjcf_data['resY'] == resY)] renderer_data_array.append(renderer_data) # Match batch sizes and calculate difference - common_batch = set.intersection(*[set(renderer_data['n_envs']) for renderer_data in renderer_data_array]) - batch_sizes = sorted(list(common_batch)) - - fps_array = [] - for renderer_data in renderer_data_array: - fps_array.append(renderer_data[renderer_data['n_envs'].isin(batch_sizes)]['fps'].values) + batch_sizes = set.union(*[set(renderer_data['n_envs']) for renderer_data in renderer_data_array]) + sorted_batch_sizes = sorted(list(batch_sizes)) # Create bar chart - x = np.arange(len(batch_sizes)) - bar_width = 0.8 / len(renderer_info_array) - - # Plot bars - bar_groups = [plt.bar(x + i * bar_width, fps, bar_width, label=f'{renderer_name} {rasterizer_str}') for i, (fps, renderer_name, rasterizer_str) in enumerate(zip(fps_array, renderer_name_array, rasterizer_str_array))] - - # Add value labels on top of bars def add_labels(bars): for bar in bars: bar_height = bar.get_height() @@ -265,33 +367,53 @@ def add_labels(bars): xytext=(0, 3), # 3 points vertical offset textcoords="offset points", ha='center', va='bottom', fontsize=8) - - for bar_group in bar_groups: - add_labels(bar_group) - + + # Plot bars + bar_width = 0.8 / len(comparison_list) + fps_array = [renderer_data[renderer_data['n_envs'].isin(sorted_batch_sizes)]['fps'].values for renderer_data in renderer_data_array] + for i, (fps, renderer, rasterizer_str) in enumerate(zip(fps_array, renderer_array, rasterizer_str_array)): + x = np.arange(len(fps)) + bars = plt.bar(x + i * bar_width, fps, bar_width, label=f'{renderer} {rasterizer_str}') + add_labels(bars) + # Customize plot - renderer_str_array = [f'{renderer_name} {rasterizer_str}' for renderer_name, rasterizer_str in zip(renderer_name_array, rasterizer_str_array)] + renderer_str_array = [f'{renderer} {rasterizer_str}' for renderer, rasterizer_str in zip(renderer_array, rasterizer_str_array)] renderer_str_array_str = ', '.join(renderer_str_array) plt.title(f'FPS Comparison: {renderer_str_array_str}\n{os.path.basename(mjcf)} - Resolution: {resX}x{resY}') plt.xlabel('Batch Size') plt.ylabel('FPS') - plt.xticks(x, batch_sizes) + plt.xticks(np.arange(len(sorted_batch_sizes)), sorted_batch_sizes) plt.legend() plt.grid(True, axis='y') # Save plot - plot_filename = f"{plots_dir}/{os.path.splitext(os.path.basename(mjcf))[0]}_{renderer_str_array_str}_{resX}x{resY}_comparison_plot.png" + renderer_str_array_str_for_filename = renderer_str_array_str.replace(",", "_") + plot_filename = f"{plots_dir}/{os.path.splitext(os.path.basename(mjcf))[0]}_{renderer_str_array_str_for_filename}_{resX}x{resY}_comparison_plot.png" plt.savefig(plot_filename, dpi=100) # Added dpi parameter for better quality plt.close() + # Create a table of the data in plot_table_data, the key is the plot_filename, the value is a nested dict + # The key of the outer dict is "{renderer} - {rasterizer_str}" + # The key of the inner dict is "batch_size" + # The value of the inner dict is the fps + plot_table_data[plot_filename] = { + f"{renderer} - {rasterizer_str}": { + batch_size: fps for batch_size, fps in zip(sorted_batch_sizes, fps_array[i]) + } for i, (renderer, rasterizer_str) in enumerate(zip(renderer_array, rasterizer_str_array)) + } + + return plot_table_data + def main(): import sys import os print("Script arguments:", sys.argv) # Debug print parser = argparse.ArgumentParser() - parser.add_argument("-d", "--data_file_path", type=str, default="logs/benchmark/batch_benchmark_20250610_160138_combined.csv", + parser.add_argument("-d", "--data_file_path", type=str, default="logs/benchmark/batch_benchmark_20250615_234412.csv", help="Path to the benchmark data CSV file") + parser.add_argument("-c", "--config_file", type=str, default="benchmark_config_smoke_test.yml", + help="Path to the benchmark config file") parser.add_argument("-w", "--width", type=int, default=20, help="Width of the plot in inches") parser.add_argument("-y", "--height", type=int, default=8, @@ -305,7 +427,7 @@ def main(): args = parser.parse_args() print("Parsed arguments:", args) # Debug print - plot_batch_benchmark(args.data_file_path, args.width, args.height) + generate_report(args.data_file_path, args.config_file, args.width, args.height) if __name__ == "__main__": main() diff --git a/examples/perf_benchmark/benchmark_utils.py b/examples/perf_benchmark/benchmark_utils.py new file mode 100644 index 000000000..eddde5844 --- /dev/null +++ b/examples/perf_benchmark/benchmark_utils.py @@ -0,0 +1,7 @@ +import torch + +def fill_gpu_cache_with_random_data(): + # 100 MB of random data + dummy_data = torch.rand(100, 1024, 1024, device="cuda") + # Make some random data manipulation to the entire tensor + dummy_data = dummy_data.sqrt() \ No newline at end of file diff --git a/examples/perf_benchmark/configs/benchmark_config_basic.yml b/examples/perf_benchmark/configs/benchmark_config_basic.yml new file mode 100644 index 000000000..05faba4b3 --- /dev/null +++ b/examples/perf_benchmark/configs/benchmark_config_basic.yml @@ -0,0 +1,77 @@ +mjcf_list: + - xml/franka_emika_panda/panda.xml + - xml/unitree_g1/g1.xml + - xml/unitree_go2/go2.xml + +renderer_list: + - renderer: madrona + benchmark_script: benchmark_madrona.py + timeout: 120 + - renderer: omniverse + benchmark_script: benchmark_omni.py + timeout: 60 + +rasterizer_list: + - true + - false + +batch_size_list: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 768 + - 1024 + - 1536 + - 2048 + - 3072 + - 4096 + - 6144 + - 8192 + - 12288 + - 16384 + +resolution_list: + #square: + - [128, 128] + - [256, 256] + + #four_three: + + #sixteen_nine: + +comparison_list: + - - renderer: omniverse + rasterizer: true + - renderer: madrona + rasterizer: true + - renderer: omniverse + rasterizer: false + - renderer: madrona + rasterizer: false + +# Configurations shared betwen batch_benchmark.py and benchmark_*.py +# Raytracer configuration +raytracer: + max_bounce: 2 + spp: 1 + +# Simulation configuration +simulation: + n_steps: 100 + +# Camera configuration +camera: + position: [1.5, 0.5, 1.5] # [x, y, z] + lookat: [0.0, 0.0, 0.5] # [x, y, z] + fov: 45.0 # degrees + +# Display configuration +display: + gui: false # Enable/disable GUI mode diff --git a/examples/perf_benchmark/configs/benchmark_config_full.yml b/examples/perf_benchmark/configs/benchmark_config_full.yml new file mode 100644 index 000000000..e8afa2952 --- /dev/null +++ b/examples/perf_benchmark/configs/benchmark_config_full.yml @@ -0,0 +1,100 @@ +mjcf_list: + - xml/franka_emika_panda/panda.xml + - xml/unitree_g1/g1.xml + - xml/unitree_go2/go2.xml + +renderer_list: + - renderer: madrona + benchmark_script: benchmark_madrona.py + timeout: 120 + +rasterizer_list: + - true + - false + +batch_size_list: + - 1 + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 768 + - 1024 + - 1536 + - 2048 + - 3072 + - 4096 + - 6144 + - 8192 + - 12288 + - 16384 + +resolution_list: + #square: + - [64, 64] + - [128, 128] + - [256, 256] + - [512, 512] + - [1024, 1024] + - [2048, 2048] + - [4096, 4096] + - [8192, 8192] + + #four_three: + - [320, 240] + - [640, 480] + - [800, 600] + - [1024, 768] + - [1280, 960] + - [1600, 1200] + - [1920, 1440] + - [2048, 1536] + - [2560, 1920] + - [3200, 2400] + - [4096, 3072] + - [8192, 6144] + + #sixteen_nine: + - [320, 180] + - [640, 360] + - [800, 450] + - [1024, 576] + - [1280, 720] + - [1600, 900] + - [1920, 1080] + - [2048, 1152] + - [2560, 1440] + - [3200, 1800] + - [4096, 2304] + - [8192, 4608] + +comparison_list: + - - renderer: madrona + rasterizer: true + - renderer: madrona + rasterizer: false + +# Configurations shared betwen batch_benchmark.py and benchmark_*.py +# Raytracer configuration +raytracer: + max_bounce: 2 + spp: 1 + +# Simulation configuration +simulation: + n_steps: 100 + +# Camera configuration +camera: + position: [1.5, 0.5, 1.5] # [x, y, z] + lookat: [0.0, 0.0, 0.5] # [x, y, z] + fov: 45.0 # degrees + +# Display configuration +display: + gui: false # Enable/disable GUI mode diff --git a/examples/perf_benchmark/configs/benchmark_config_smoke_test.yml b/examples/perf_benchmark/configs/benchmark_config_smoke_test.yml new file mode 100644 index 000000000..a6c29f231 --- /dev/null +++ b/examples/perf_benchmark/configs/benchmark_config_smoke_test.yml @@ -0,0 +1,56 @@ +mjcf_list: + - xml/franka_emika_panda/panda.xml + +renderer_list: + - renderer: madrona + benchmark_script: benchmark_madrona.py + timeout: 120 + - renderer: omniverse + benchmark_script: benchmark_omni.py + timeout: 60 + +rasterizer_list: + - true + - false + +batch_size_list: + - 256 + +resolution_list: + #square: + - [128, 128] + - [256, 256] + + #four_three: + + #sixteen_nine: + +comparison_list: + - - renderer: omniverse + rasterizer: true + - renderer: madrona + rasterizer: true + - renderer: omniverse + rasterizer: false + - renderer: madrona + rasterizer: false + +# Configurations shared betwen batch_benchmark.py and benchmark_*.py +# Raytracer configuration +raytracer: + max_bounce: 2 + spp: 1 + +# Simulation configuration +simulation: + n_steps: 100 + +# Camera configuration +camera: + position: [1.5, 0.5, 1.5] # [x, y, z] + lookat: [0.0, 0.0, 0.5] # [x, y, z] + fov: 45.0 # degrees + +# Display configuration +display: + gui: false # Enable/disable GUI mode