add scripts, update utils

qlan3 · qlan3 · commit 49b3efd7adf1 · 2023-02-15T21:21:07.000-07:00
diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ Explorer is a PyTorch reinforcement learning framework for **exploring** new ide
       |     |    ├── DDQN
       |     |    ├── NoisyNetDQN
       |     |    ├── BootstrappedDQN
-      |     |    └── MeDQN_Uniform, MeDQN_Real
+      |     |    └── MeDQN: MeDQN(U), MeDQN(R)
       |     ├── Maxmin DQN ── Ensemble DQN
       |     └── Averaged DQN
       └── REINFORCE 
diff --git a/analysis.py b/analysis.py
@@ -10,7 +10,7 @@ def get_process_result_dict(result, config_idx, mode='Train'):
     'Env': result['Env'][0],
     'Agent': result['Agent'][0],
     'Config Index': config_idx,
-    'Return (mean)': result['Return'][-100:].mean() if mode=='Train' else result['Return'][-5:].mean()
+    'Return (mean)': result['Return'][-100:].mean(skipna=False) if mode=='Train' else result['Return'][-5:].mean(skipna=False)
   }
   return result_dict
 
@@ -19,7 +19,7 @@ def get_csv_result_dict(result, config_idx, mode='Train'):
     'Env': result['Env'][0],
     'Agent': result['Agent'][0],
     'Config Index': config_idx,
-    'Return (mean)': result['Return (mean)'].mean(),
+    'Return (mean)': result['Return (mean)'].mean(skipna=False),
     'Return (se)': result['Return (mean)'].sem(ddof=0)
   }
   return result_dict
@@ -29,6 +29,8 @@ def get_csv_result_dict(result, config_idx, mode='Train'):
   'merged': True,
   'x_label': 'Step',
   'y_label': 'Average Return',
+    # 'rolling_score_window': 20,
+  'rolling_score_window': -1,
   'hue_label': 'Agent',
   'show': False,
   'imgType': 'png',
@@ -39,7 +41,7 @@ def get_csv_result_dict(result, config_idx, mode='Train'):
   'ylim': {'min': None, 'max': None},
   'EMA': True,
   'loc': 'lower right',
-  'sweep_keys': [],
+  'sweep_keys': ['optimizer/actor_kwargs/lr', 'optimizer/critic_kwargs/lr', 'optimizer/reward_kwargs/lr'],
   'sort_by': ['Return (mean)', 'Return (se)'],
   'ascending': [False, True],
   'runs': 1
diff --git a/clean.sh b/clean.sh
@@ -0,0 +1,4 @@
+# Clean: remove all job indexes, and output
+rm -f job_idx_*
+rm -rf ./output/*
+rm -rf slurm-*.out
diff --git a/copyfile.sh b/copyfile.sh
diff --git a/find_config.py b/find_config.py
@@ -2,8 +2,8 @@
 from utils.sweeper import Sweeper
 
 
-def find_one_run():
-  agent_config = 'mc_medqn.json'
+def find_cfg_idx():
+  agent_config = 'MERL_mc_medqn.json'
   config_file = os.path.join('./configs/', agent_config)
   sweeper = Sweeper(config_file)
   for i in range(1, 1+sweeper.config_dicts['num_combinations']):
@@ -13,7 +13,7 @@ def find_one_run():
   print()
 
 
-def find_many_runs():
+def get_cfg_idx_for_runs():
   l = [23,146,150,147,255,207,133,130,114,55,235,210,138,82,140,209,228,69,71,353,317]
   l.sort()
   print('len(l)=', len(l))
@@ -22,9 +22,10 @@ def find_many_runs():
     for x in l:
       ll.append(x+360*r)
   print('len(ll)=', len(ll))
-  print(*ll)
+  for x in ll:
+    print(x, end=',')
 
 
 if __name__ == "__main__":
-  find_one_run()
-  # find_many_runs()
+  find_cfg_idx()
+  # get_cfg_idx_for_runs()
diff --git a/git_commit_id.sh b/git_commit_id.sh
@@ -0,0 +1 @@
+git rev-parse --short HEAD
diff --git a/move_log.sh b/move_log.sh
@@ -0,0 +1,19 @@
+# Get git commit id
+git_id=$(git rev-parse --short HEAD)
+printf "$git_id\n"
+
+# Create old_log directory
+mkdir old_logs
+dest_dir=./old_logs/logs-$git_id/
+mkdir $dest_dir
+
+# Compress log files.
+cd logs
+files=$(ls)
+for filename in $files
+do
+    printf "zip and move $filename to old_logs...\n"
+    zip -rq $filename.zip ./$filename
+    mv -f $filename.zip ../$dest_dir
+done
+cd ..
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,3 @@
-gym==0.23.1
-gym_games==1.0.4
 matplotlib==3.5.2
 numpy==1.22.0
 opencv_python==4.5.5.64
diff --git a/run.py b/run.py
@@ -1,5 +1,7 @@
 import os
 import sys
+import argparse
+from math import ceil
 from utils.submitter import Submitter
 
 
@@ -9,83 +11,63 @@ def make_dir(dir):
 
 
 def main(argv):
+  # python run_narval.py --job_type S
+  # python run_narval.py --job_type M
+  parser = argparse.ArgumentParser(description="Submit jobs")
+  parser.add_argument('--job_type', type=str, default='S', help='Run single (S) or multiple (M) jobs in one experiment: S, M')
+  args = parser.parse_args()
+
   sbatch_cfg = {
     # Account name
     # 'account': 'def-ashique',
     'account': 'rrg-ashique',
     # Job name
-    # 'job-name': 'minatar_dqn',
-    'job-name': 'minatar_dqn_sm',
-    # 'job-name': 'minatar_medqn_real',
-    # 'job-name': 'minatar_medqn_uniform',
+    'job-name': 'MERL_mc_dqn',
     # Job time
-    'time': '0-05:00:00',
-    # GPU/CPU type
-    'cpus-per-task': 1,
-    # Memory
-    # 'mem-per-cpu': '2500M',
-    'mem-per-cpu': '1500M',
-    # Email address
+    'time': '0-01:00:00',
+    # Email notification
     'mail-user': 'qlan3@ualberta.ca'
   }
-
-  # sbatch configs backup for different games
-  # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'catcher', '0-10:00:00', '2000M'
-  # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'copter', '0-05:00:00', '2000M'
-  # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'lunar', '0-07:00:00', '2000M'
-  # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'minatar', '0-05:00:00', '2500M'
-
-
-  l_dqn = [11,15,19,7,13,17,9,12,16,6,10,2,18]
-  l_dqn.sort()
-  ll_dqn = []
-  for r in range(1,10):
-    for x in l_dqn:
-      ll_dqn.append(x+20*r)
-
-  l_dqn_sm = [19,11,15,13,17,9,18,14,2,20,12]
-  l_dqn_sm.sort()
-  ll_dqn_sm = []
-  for r in range(1,10):
-    for x in l_dqn_sm:
-      ll_dqn_sm.append(x+20*r)
-
-  l_uniform = [827,267,927,155,583,691,751,351,147,747,587,277,269,273,497,357,669,433,501,509,821,205,517,577,254,270,826,746,490,510,730,430,830,734,732,736,652,888,656,892,512,496,572,592,508,352]
-  l_uniform.sort()
-  ll_uniform = []
-  for r in range(1,10):
-    for x in l_uniform:
-      ll_uniform.append(x+960*r)
-  
-  l_real = [195,643,27,179,423,115,403,187,191,267,419,43,351,199,31,203,357,121,41,201,125,285,129,133,213,749,429,433,517,493,501,505,489,57,432,888,884,564,648,664,644,188,416,652,276,352,340,108,256,426,106,402,566,110,510,406,410,254,414,206,574]
-  l_real.sort()
-  ll_real = []
-  for r in range(1,10):
-    for x in l_real:
-      ll_real.append(x+960*r)
-
   general_cfg = {
     # User name
     'user': 'qlan3',
-    # Sbatch script path
-    'script-path': './sbatch.sh',
     # Check time interval in minutes
     'check-time-interval': 5,
-    # Clusters info: {name: capacity}
-    'clusters': {'Narval': 1000},
+    # Clusters info: name & capacity
+    'cluster_name': 'Narval',
+    'cluster_capacity': 996,
     # Job indexes list
-    # 'job-list': list(range(1, 20+1))
-    # 'job-list': list(range(1, 960+1))
-    # 'job-list': ll_uniform
-    # 'job-list': ll_real
-    # 'job-list': ll_dqn
-    'job-list': ll_dqn_sm
-    # 'job-list': []
+    'job-list': list(range(1, 10+1))
   }
-
   make_dir(f"output/{sbatch_cfg['job-name']}")
-  submitter = Submitter(general_cfg, sbatch_cfg)
-  submitter.submit()
+
+  if args.job_type == 'M':
+    # Max number of parallel jobs in one experiment
+    max_parallel_jobs = 4
+    mem_per_job = 16 # in GB
+    cpu_per_job = 2 # Increase cpus_per_job to 5/10 can further increase training speed.
+    mem_per_cpu = int(ceil(mem_per_job/cpu_per_job))
+    # Write to procfile for Parallel
+    with open('procfile', 'w') as f:
+      f.write(str(max_parallel_jobs))
+    sbatch_cfg['gres'] = 'gpu:1' # GPU type
+    sbatch_cfg['cpus-per-task'] = cpu_per_job*max_parallel_jobs
+    sbatch_cfg['mem-per-cpu'] = f'{mem_per_cpu}G' # Memory
+    # Sbatch script path
+    general_cfg['script-path'] = './sbatch_m.sh'
+    # Max number of jobs for Parallel
+    general_cfg['max_parallel_jobs'] = max_parallel_jobs
+    submitter = Submitter(general_cfg, sbatch_cfg)
+    submitter.multiple_submit()
+  elif args.job_type == 'S':
+    mem_per_cpu = 1500
+    sbatch_cfg['cpus-per-task'] = 1
+    sbatch_cfg['mem-per-cpu'] = f'{mem_per_cpu}M' # Memory
+    # Sbatch script path
+    general_cfg['script-path'] = './sbatch_s.sh'
+    submitter = Submitter(general_cfg, sbatch_cfg)
+    submitter.single_submit()
+
 
 if __name__=='__main__':
   main(sys.argv)
diff --git a/sbatch_m.sh b/sbatch_m.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# Ask SLURM to send the USR1 signal 300 seconds before end of the time limit
+#SBATCH --signal=B:USR1@300
+#SBATCH --output=output/%x/%a.txt
+#SBATCH --mail-type=ALL
+#SBATCH --exclude=nc20552,nc11001,nc11002,nc11103,nc11126,nc10303,nc20305,nc10249,nc20325,nc11124,nc20529,nc20526,nc20342,nc20354,nc30616,nc30305,nc20133,nc10220
+
+# ---------------------------------------------------------------------
+echo "Current working directory: `pwd`"
+echo "Starting run at: `date`"
+# ---------------------------------------------------------------------
+echo "Job Array ID / Job ID: $SLURM_ARRAY_JOB_ID / $SLURM_JOB_ID"
+echo "This is job $SLURM_ARRAY_TASK_ID out of $SLURM_ARRAY_TASK_COUNT jobs"
+echo "SLURM_TMPDIR: $SLURM_TMPDIR"
+echo "SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST"
+# ---------------------------------------------------------------------
+cleanup()
+{
+    echo "Copy log files from temporary directory"
+    sour=$SLURM_TMPDIR/$SLURM_JOB_NAME/.
+    dest=./logs/$SLURM_JOB_NAME/
+    echo "Source directory: $sour"
+    echo "Destination directory: $dest"
+    cp -rf $sour $dest
+}
+# Call `cleanup` once we receive USR1 or EXIT signal
+trap 'cleanup' USR1 EXIT
+# ---------------------------------------------------------------------
+# export OMP_NUM_THREADS=1
+module load gcc/9.3.0 arrow/2.0.0 python/3.8 scipy-stack
+source ~/envs/tianshou/bin/activate
+
+parallel --ungroup --jobs procfile python main.py --config_file ./configs/${SLURM_JOB_NAME}.json --config_idx {1} --slurm_dir $SLURM_TMPDIR :::: job_idx_${SLURM_JOB_NAME}_${SLURM_ARRAY_TASK_ID}.txt
+# parallel --eta --ungroup --jobs procfile python main.py --config_file ./configs/${SLURM_JOB_NAME}.json --config_idx {1} --slurm_dir $SLURM_TMPDIR :::: job_idx_${SLURM_JOB_NAME}_${SLURM_ARRAY_TASK_ID}.txt
+# parallel --ungroup --jobs procfile python main.py --config_file ./configs/${SLURM_JOB_NAME}.json --config_idx {1} :::: job_idx_${SLURM_JOB_NAME}_${SLURM_ARRAY_TASK_ID}.txt
+
+# ---------------------------------------------------------------------
+echo "Job finished with exit code $? at: `date`"
+# ---------------------------------------------------------------------
diff --git a/sbatch_s.sh b/sbatch_s.sh
@@ -3,8 +3,8 @@
 #SBATCH --signal=B:USR1@300
 #SBATCH --output=output/%x/%a.txt
 #SBATCH --mail-type=ALL
-#SBATCH --mail-type=TIME_LIMIT
-#SBATCH --exclude=nc20552,nc11103,nc11126,nc10303,nc20305,nc10249,nc20325,nc11124,nc20529,nc20526,nc20342,nc20354,nc30616,nc30305,nc20133,nc10220
+#SBATCH --exclude=nc20552,nc11001,nc11002,nc11004,nc11003,nc11010,nc11011,nc11022,nc11025,nc11103,nc11126,nc10303,nc20305,nc10249,nc20325,nc11124,nc20529,nc20526,nc20342,nc20354,nc30616,nc30305,nc20133,nc10220
+
 # ---------------------------------------------------------------------
 echo "Current working directory: `pwd`"
 echo "Starting run at: `date`"
@@ -17,7 +17,7 @@ echo "SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST"
 cleanup()
 {
     echo "Copy log files from temporary directory"
-    sour=$SLURM_TMPDIR/$SLURM_JOB_NAME/$SLURM_ARRAY_TASK_ID/
+    sour=$SLURM_TMPDIR/$SLURM_JOB_NAME/.
     dest=./logs/$SLURM_JOB_NAME/
     echo "Source directory: $sour"
     echo "Destination directory: $dest"
@@ -26,11 +26,13 @@ cleanup()
 # Call `cleanup` once we receive USR1 or EXIT signal
 trap 'cleanup' USR1 EXIT
 # ---------------------------------------------------------------------
-export OMP_NUM_THREADS=1
-module load gcc/9.3.0 arrow/2.0.0 python/3.7 scipy-stack
-source ~/envs/gym/bin/activate
+# export OMP_NUM_THREADS=1
+module load gcc/9.3.0 arrow/2.0.0 python/3.8 scipy-stack
+source ~/envs/tianshou/bin/activate
+
 python main.py --config_file ./configs/${SLURM_JOB_NAME}.json --config_idx $SLURM_ARRAY_TASK_ID --slurm_dir $SLURM_TMPDIR
 # python main.py --config_file ./configs/${SLURM_JOB_NAME}.json --config_idx $SLURM_ARRAY_TASK_ID
+
 # ---------------------------------------------------------------------
 echo "Job finished with exit code $? at: `date`"
 # ---------------------------------------------------------------------
diff --git a/utils/logger.py b/utils/logger.py
@@ -5,6 +5,7 @@
 
 class Logger(object):
   def __init__(self, logs_dir, file_name='log.txt', filemode='w'):
+    self.logs_dir = logs_dir
     logging.basicConfig(
       format='%(asctime)s - %(levelname)s: %(message)s',
       filename=f'{logs_dir}{file_name}',
@@ -18,18 +19,11 @@ def __init__(self, logs_dir, file_name='log.txt', filemode='w'):
     self.warning = logger.warning
     self.error = logger.error
     self.critical = logger.critical
-    
-    self.logs_dir = logs_dir
+    # Set default writer
     self.writer = None
 
   def init_writer(self):
     self.writer = SummaryWriter(self.logs_dir)
-
-  def add_scalar(self, tag, scalar_value, global_step=None):
-    self.writer.add_scalar(tag, scalar_value, global_step)
-
-  def add_scalars(self, main_tag, tag_scalar_dict, global_step=None):
-    self.writer.add_scalars(main_tag, tag_scalar_dict, global_step)
-
-  def add_histogram(self, tag, values, global_step=None):
-    self.writer.add_histogram(tag, values, global_step)
+    self.add_scalar = self.writer.add_scalar       # Input: tag, scalar_value, global_step
+    self.add_scalars = self.writer.add_scalars     # Input: main_tag, tag_scalar_dict, global_step
+    self.add_histogram = self.writer.add_histogram # Input: tag, values, global_step
diff --git a/utils/plotter.py b/utils/plotter.py
diff --git a/utils/submitter.py b/utils/submitter.py
diff --git a/zip_log.sh b/zip_log.sh

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`		`-gym==0.23.1`
`2`		`-gym_games==1.0.4`
`3`	`1`	`matplotlib==3.5.2`
`4`	`2`	`numpy==1.22.0`
`5`	`3`	`opencv_python==4.5.5.64`