From 46527139fdc071c913dc47e2fb650ecef2d7a2f0 Mon Sep 17 00:00:00 2001 From: Guo Yejun Date: Fri, 23 Jun 2023 02:13:42 -0700 Subject: [PATCH] add support for MPMD (multiple program, multiple data) launch mode the following command: mpiexec -genv WORLD_SIZE 2 ... \ -np 1 -host localhost ... -env RANK 0 final_cmd : \ -np 1 -host localhost ... -env RANK 1 final_cmd is converted into: mpitx -genv WORLD_SIZE 2 ... -- \ -np 1 -host localhost ... -env RANK 0 final_cmd : \ -np 1 -host localhost ... -env RANK 1 final_cmd and finally calls: mpiexec -genv WORLD_SIZE 2 ... \ -np 1 -host localhost ... -env RANK 0 mpitx "mpitx_child" final_cmd : \ -np 1 -host localhost ... -env RANK 1 mpitx "mpitx_child" final_cmd --- mpitx/mpitx.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/mpitx/mpitx.py b/mpitx/mpitx.py index 93c90dd..621a86a 100755 --- a/mpitx/mpitx.py +++ b/mpitx/mpitx.py @@ -366,8 +366,33 @@ def notify_parent(port, token): def launch_mpiexec(port, token): nonlocal mpiexec_process hosts = ["127.0.0.1"] + get_ip_addresses() - args = dict(commands=commands, hosts=hosts, port=port, token=token) - mpiexec_process = subprocess.Popen([mpiexec_cmd] + options + [this_cmd, child_subcmd, serialize(args)], + if ':' in commands: + # MPMD (multiple program, multiple data) launch mode looks like: + # mpiexec -genv WORLD_SIZE 2 + # -np 1 -host localhost -env RANK 0 final_cmd : # 1st program + # -np 1 -host localhost -env RANK 1 final_cmd # 2nd program + commands_str = ' '.join(commands) + new_commands = [] + for one_program_str in commands_str.split(':'): + one_program = one_program_str.split() + final_cmd_index = 0 + for _ in range(len(one_program)): + if one_program[final_cmd_index].startswith('-env'): + final_cmd_index += 3 + elif one_program[final_cmd_index].startswith('-'): + final_cmd_index += 2 + else: + break + program_part1 = one_program[:final_cmd_index] + program_part2 = one_program[final_cmd_index:] + args = dict(commands=program_part2, hosts=hosts, port=port, token=token) + new_commands = new_commands + [':'] + program_part1 + [this_cmd, child_subcmd, serialize(args)] + new_commands = new_commands[1:] + mpiexec_process = subprocess.Popen([mpiexec_cmd] + options + new_commands, + start_new_session=True) + else: + args = dict(commands=commands, hosts=hosts, port=port, token=token) + mpiexec_process = subprocess.Popen([mpiexec_cmd] + options + [this_cmd, child_subcmd, serialize(args)], start_new_session=True) child_conns = establish_connection_on_parent(launch_mpiexec)