@@ -57,13 +57,21 @@ def get_exp_handles(expname: str, ignore_finished=True, ignore_exp_not_exists=Tr
5757 is called, but finish before nemo-run submits a new job (which might take minutes)
5858 """
5959
60- def _get_handles (exp ):
60+ def _get_handles (exp : run . Experiment ):
6161 handles = []
62- for job in exp .jobs :
62+ status_dict = exp .status (return_dict = True )
63+ assert status_dict , f"No status found for experiment { exp ._id } "
64+ for _ , status_info in status_dict .items ():
6365 if not ignore_finished or (
64- job .status (exp ._runner ) in [AppState .RUNNING , AppState .PENDING , AppState .SUBMITTED , AppState .UNKNOWN ]
66+ status_info ['status' ]
67+ in [
68+ AppState .RUNNING ,
69+ AppState .PENDING ,
70+ AppState .SUBMITTED ,
71+ AppState .UNKNOWN ,
72+ ]
6573 ):
66- handles .append (job . handle )
74+ handles .append (status_info [ ' handle' ] )
6775 continue
6876 return handles
6977
@@ -611,7 +619,12 @@ def get_exp(expname, cluster_config, _reuse_exp=None):
611619 # nemo-run redefines the handlers, so removing ours to avoid duplicate logs
612620 remove_handlers ()
613621 if cluster_config ['executor' ] == 'slurm' :
614- return run .Experiment (expname )
622+ return run .Experiment (
623+ expname ,
624+ skip_status_at_exit = True ,
625+ serialize_metadata_for_scripts = False ,
626+ threadpool_workers = cluster_config .get ('num_workers' , 4 ),
627+ )
615628 # hiding all nemo-run logs otherwise as they are not useful locally
616629 if cluster_config ['executor' ] == 'local' :
617630 return run .Experiment (expname , clean_mode = True )
0 commit comments