diff --git a/configs/config_256.yaml b/configs/config_256.yaml
deleted file mode 100644
index 04ec015..0000000
--- a/configs/config_256.yaml
+++ /dev/null
@@ -1,112 +0,0 @@
-# Copyright (C) 2018 NVIDIA Corporation.  All rights reserved.
-# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
-
-# logger options
-image_save_iter: 1000         # How often do you want to save output images during training
-image_display_iter: 500       # How often do you want to display output images during training
-display_size: 8               # How many images do you want to display each time
-snapshot_save_iter: 5000      # How often do you want to save trained models
-log_iter: 1                   # How often do you want to log the training stats
-
-# optimization options
-max_iter: 1000000             # maximum number of training iterations
-batch_size: 1                 # batch size
-weight_decay: 0.0001          # weight decay
-beta1: 0.5                    # Adam parameter
-beta2: 0.999                  # Adam parameter
-init: kaiming                 # initialization [gaussian/kaiming/xavier/orthogonal]
-lr: 0.0001                    # initial learning rate
-lr_policy: step               # learning rate scheduler
-step_size: 100000             # how often to decay learning rate
-gamma: 0.5                    # how much to decay learning rate
-gan_w: 3                      # weight of adversarial loss
-recon_x_w: 12                 # weight of image reconstruction loss
-recon_s_w: 1                  # weight of style reconstruction loss
-recon_c_w: 2                  # weight of content reconstruction loss
-recon_x_cyc_w: 12             # weight of explicit style augmented cycle consistency loss
-vgg_w: 0                      # weight of domain-invariant perceptual loss
-
-adaptation:
-    full_adaptation: 0
-    ##################
-    output_classifier_lambda: 0
-    output_adv_lambda: 0
-    output_classif_freq: 1
-    ##################
-    adv_lambda: 6
-    dfeat_lambda: 1
-    classif_frequency: 15
-    ##################
-    sem_seg_lambda: 0
-
-classifier_frequency: 100000  # Frequency to which we update the features classifier
-semantic_w: 3                 # weight of semantic conservation loss
-recon_mask: 1                 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region
-domain_adv_w: 0 
-
-synthetic_frequency: -1       # frequency to which we show synthetic examples -1 if we don't want to
-recon_synth_w: 0             # weight of image reconstruction loss on the pair
-#classifier ckpt path: 
-class_ckpt_path: 
-
-# Semantic segmentation ckpt path:
-semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth
-
-# model options
-gen_state: 1                  # Default 0, 1 means using one common style encoder, 2 one autoencoder only
-guided: 1                     # Default 0 random style picked (multi modal), 1 means guided training
-
-# FID 
-batch_size_fid: 1            # batch_size to infer the model when computing fid
-eval_fid: 0                   # Default 0, 1 means we track FID during training 
-
-# Path to the inception moment computed on the real dataset of 900 flooded images
-inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz
-      
-
-gen:
-  dim: 64                     # number of filters in the bottommost layer
-  mlp_dim: 256                # number of filters in MLP
-  style_dim: 16               # length of style code
-  activ: relu                 # activation function [relu/lrelu/prelu/selu/tanh]
-  n_downsample: 2             # number of downsampling layers in content encoder
-  n_res: 4                    # number of residual blocks in content encoder/decoder
-  pad_type: reflect           # padding type [zero/reflect]
-dis:
-  dim: 64                     # number of filters in the bottommost layer
-  norm: none                  # normalization layer [none/bn/in/ln]
-  activ: lrelu                # activation function [relu/lrelu/prelu/selu/tanh]
-  n_layer: 4                  # number of layers in D
-  gan_type: lsgan             # GAN loss [lsgan/nsgan]
-  num_scales: 3               # number of scales
-  pad_type: reflect           # padding type [zero/reflect]
-
-ratio_disc_gen: 5               # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator
-
-# data options
-input_dim_a: 3                              # number of image channels [1/3]
-input_dim_b: 3                              # number of image channels [1/3]
-num_workers: 8                              # number of data loading threads
-new_size: 256                               # first resize the shortest image side to this size
-crop_image_height: 256                      # random crop image of this height
-crop_image_width: 256                       # random crop image of this width
-
-data_folder_train_a: ./
-data_list_train_a: /network/tmp1/ccai/data/munit_dataset/trainA.txt
-data_folder_test_a: ./
-data_list_test_a: /network/tmp1/ccai/data/munit_dataset/testA.txt
-data_folder_train_b: ./
-data_list_train_b: /network/tmp1/ccai/data/munit_dataset/trainB.txt
-data_folder_test_b: ./
-data_list_test_b: /network/tmp1/ccai/data/munit_dataset/testB.txt
-data_list_train_a_seg: /network/tmp1/ccai/data/munit_dataset/trainA_seg.txt
-data_list_train_b_seg: /network/tmp1/ccai/data/munit_dataset/trainB_seg.txt
-data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt
-data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt
-data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask
-seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt
-seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt
-
-# list image for FID monitoring
-data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt
-data_list_fid_b: /network/tmp1/ccai/data/munit_dataset/trainB.txt
diff --git a/configs/config_HD.yaml b/configs/config_HD.yaml
deleted file mode 100644
index d298e0e..0000000
--- a/configs/config_HD.yaml
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (C) 2018 NVIDIA Corporation.  All rights reserved.
-# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
-
-# logger options
-image_save_iter: 10000000     # How often do you want to save output images during training
-image_display_iter: 500       # How often do you want to display output images during training
-display_size: 8               # How many images do you want to display each time
-snapshot_save_iter: 5000     # How often do you want to save trained models
-log_iter: 1                   # How often do you want to log the training stats
-
-# optimization options
-max_iter: 1000000             # maximum number of training iterations
-batch_size: 1                 # batch size
-weight_decay: 0.0001          # weight decay
-beta1: 0.5                    # Adam parameter
-beta2: 0.999                  # Adam parameter
-init: kaiming                 # initialization [gaussian/kaiming/xavier/orthogonal]
-lr: 0.0001                    # initial learning rate
-lr_policy: step               # learning rate scheduler
-step_size: 100000             # how often to decay learning rate
-gamma: 0.5                    # how much to decay learning rate
-gan_w: 3                      # weight of adversarial loss
-recon_x_w: 12                 # weight of image reconstruction loss
-recon_s_w: 1                  # weight of style reconstruction loss
-recon_c_w: 2                  # weight of content reconstruction loss
-recon_x_cyc_w: 12             # weight of explicit style augmented cycle consistency loss
-vgg_w: 0                      # weight of domain-invariant perceptual loss
-
-semantic_w: 3                 # weight of semantic conservation loss
-recon_mask: 1                 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region
-domain_adv_w: 0 
-
-synthetic_frequency: -1       # frequency to which we show synthetic examples -1 if we don't want to
-recon_synth_w: 0             # weight of image reconstruction loss on the pair
-
-#classifier ckpt path: 
-class_ckpt_path: ./
-
-# Semantic segmentation ckpt path:
-semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth
-
-# model options
-gen_state: 1                  # Default 0, 1 means using one common style encoder, 2 one autoencoder only
-guided: 1                     # Default 0 random style picked (multi modal), 1 means guided training
-
-# FID 
-batch_size_fid: 0            # batch_size to infer the model when computing fid
-eval_fid: 0                   # Default 0, 1 means we track FID during training 
-
-gen:
-  dim: 64                     # number of filters in the bottommost layer
-  mlp_dim: 256                # number of filters in MLP
-  style_dim: 16               # length of style code
-  activ: relu                 # activation function [relu/lrelu/prelu/selu/tanh]
-  n_downsample: 2             # number of downsampling layers in content encoder
-  n_res: 4                    # number of residual blocks in content encoder/decoder
-  pad_type: reflect           # padding type [zero/reflect]
-dis:
-  dim: 64                     # number of filters in the bottommost layer
-  norm: none                  # normalization layer [none/bn/in/ln]
-  activ: lrelu                # activation function [relu/lrelu/prelu/selu/tanh]
-  n_layer: 4                  # number of layers in D
-  gan_type: lsgan             # GAN loss [lsgan/nsgan]
-  num_scales: 3               # number of scales
-  pad_type: reflect           # padding type [zero/reflect]
-
-ratio_disc_gen: 5               # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator
-
-# data options
-input_dim_a: 3                              # number of image channels [1/3]
-input_dim_b: 3                              # number of image channels [1/3]
-num_workers: 8                              # number of data loading threads
-new_size: 600                               # first resize the shortest image side to this size
-crop_image_height: 512                      # random crop image of this height
-crop_image_width: 512                       # random crop image of this width
-
-data_folder_train_a: ./
-data_list_train_a: /network/tmp1/ccai/data/munit_dataset/trainA.txt
-data_folder_test_a: ./
-data_list_test_a: /network/tmp1/ccai/data/munit_dataset/testA.txt
-data_folder_train_b: ./
-data_list_train_b: /network/tmp1/ccai/data/munit_dataset/trainB.txt
-data_folder_test_b: ./
-data_list_test_b: /network/tmp1/ccai/data/munit_dataset/testB.txt
-data_list_train_a_seg: /network/tmp1/ccai/data/munit_dataset/trainA_seg.txt
-data_list_train_b_seg: /network/tmp1/ccai/data/munit_dataset/trainB_seg.txt
-data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/trainA_synth.txt
-data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/trainB_synth.txt
-data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/trainB_seg_synth.txt
diff --git a/configs/mask_conditioning.yaml b/configs/mask_conditioning.yaml
deleted file mode 100644
index 5d243cb..0000000
--- a/configs/mask_conditioning.yaml
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (C) 2018 NVIDIA Corporation.  All rights reserved.
-# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
-
-# logger options
-image_save_iter: 1000         # How often do you want to save output images during training
-image_display_iter: 500       # How often do you want to display output images during training
-display_size: 8               # How many images do you want to display each time
-snapshot_save_iter: 5000      # How often do you want to save trained models
-log_iter: 1                   # How often do you want to log the training stats
-
-# optimization options
-max_iter: 1000000             # maximum number of training iterations
-batch_size: 1                 # batch size
-weight_decay: 0.0001          # weight decay
-beta1: 0.5                    # Adam parameter
-beta2: 0.999                  # Adam parameter
-init: kaiming                 # initialization [gaussian/kaiming/xavier/orthogonal]
-lr: 0.0001                    # initial learning rate
-lr_policy: step               # learning rate scheduler
-step_size: 100000             # how often to decay learning rate
-gamma: 0.5                    # how much to decay learning rate
-gan_w: 3                      # weight of adversarial loss
-recon_x_w: 12                 # weight of image reconstruction loss
-recon_c_w: 2                  # weight of content reconstruction loss
-recon_x_cyc_w: 12             # weight of explicit style augmented cycle consistency loss
-vgg_w: 0                      # weight of domain-invariant perceptual loss
-
-adaptation:
-    full_adaptation: 0
-    ##################
-    output_classifier_lambda: 0
-    output_adv_lambda: 0
-    output_classif_freq: 1
-    ##################
-    adv_lambda: 6
-    dfeat_lambda: 1
-    classif_frequency: 15      
-    ##################
-    sem_seg_lambda: 0
-    
-synthetic_seg_gt: 1           #Use synthetic ground truth as target for synthetic images  
-semantic_w: 4                 # weight of semantic conservation loss
-context_w: 4                  #Weight of context preserving (L1) loss
-recon_mask: 1                 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region
-domain_adv_w: 0 
-
-synthetic_frequency: -1       # frequency to which we show synthetic examples -1 if we don't want to
-recon_synth_w: 0             # weight of image reconstruction loss on the pair
-#classifier ckpt path: 
-class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth
-
-# Semantic segmentation ckpt path:
-semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth
-
-# FID 
-batch_size_fid: 1            # batch_size to infer the model when computing fid
-eval_fid: 0                   # Default 0, 1 means we track FID during training 
-
-# Path to the inception moment computed on the real dataset of 900 flooded images
-inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz
-      
-
-gen:
-  dim: 64                     # number of filters in the bottommost layer
-  mlp_dim: 256                # number of filters in MLP
-  activ: relu                 # activation function [relu/lrelu/prelu/selu/tanh]
-  n_downsample: 2             # number of downsampling layers in content encoder
-  n_res: 4                    # number of residual blocks in content encoder/decoder
-  pad_type: reflect           # padding type [zero/reflect]
-dis:
-  dim: 64                     # number of filters in the bottommost layer
-  norm: none                  # normalization layer [none/bn/in/ln]
-  activ: lrelu                # activation function [relu/lrelu/prelu/selu/tanh]
-  n_layer: 4                  # number of layers in D
-  gan_type: nsgan             # GAN loss [lsgan/nsgan]
-  num_scales: 3               # number of scales
-  pad_type: reflect           # padding type [zero/reflect]
-
-ratio_disc_gen: 2              # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator
-
-# data options
-input_dim_a: 3                              # number of image channels [1/3]
-input_dim_b: 3                              # number of image channels [1/3]
-num_workers: 4                              # number of data loading threads
-new_size: 256                               # first resize the shortest image side to this size
-crop_image_height: 256                      # random crop image of this height
-crop_image_width: 256                       # random crop image of this width
-
-#do we even need this?
-data_folder_train_a: ./
-data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt
-#and this?
-data_folder_test_a: ./
-data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt
-data_folder_train_b: ./
-data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-data_folder_test_b: ./
-data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt
-
-data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt
-data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt
-
-data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt
-data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt
-
-data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt
-data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt
-data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask
-
-# list image for FID monitoring
-data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt
-data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-
-seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt
-seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt
-
diff --git a/configs/mask_conditioning_HD.yaml b/configs/mask_conditioning_HD.yaml
deleted file mode 100644
index c38f16e..0000000
--- a/configs/mask_conditioning_HD.yaml
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (C) 2018 NVIDIA Corporation.  All rights reserved.
-# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
-
-# logger options
-image_save_iter: 1000         # How often do you want to save output images during training
-image_display_iter: 500       # How often do you want to display output images during training
-display_size: 8               # How many images do you want to display each time
-snapshot_save_iter: 5000      # How often do you want to save trained models
-log_iter: 1                   # How often do you want to log the training stats
-
-# optimization options
-max_iter: 1000000             # maximum number of training iterations
-batch_size: 1                 # batch size
-weight_decay: 0.0001          # weight decay
-beta1: 0.5                    # Adam parameter
-beta2: 0.999                  # Adam parameter
-init: kaiming                 # initialization [gaussian/kaiming/xavier/orthogonal]
-lr: 0.0001                    # initial learning rate
-lr_policy: step               # learning rate scheduler
-step_size: 100000             # how often to decay learning rate
-gamma: 0.5                    # how much to decay learning rate
-gan_w: 3                      # weight of adversarial loss
-recon_x_w: 12                 # weight of image reconstruction loss
-recon_c_w: 0                  # weight of content reconstruction loss
-recon_x_cyc_w: 12             # weight of explicit style augmented cycle consistency loss
-vgg_w: 0                      # weight of domain-invariant perceptual loss
-
-adaptation:
-    full_adaptation: 0
-    ##################
-    output_classifier_lambda: 0
-    output_adv_lambda: 0
-    output_classif_freq: 1
-    ##################
-    adv_lambda: 0
-    dfeat_lambda: 0
-    classif_frequency: 0      
-    ##################
-    sem_seg_lambda: 0
-    
-synthetic_seg_gt: 1           #Use synthetic ground truth as target for synthetic images  
-semantic_w: 0                 # weight of semantic conservation loss
-context_w: 10                  #Weight of context preserving (L1) loss
-recon_mask: 1                 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region
-domain_adv_w: 0 
-
-synthetic_frequency: -1       # frequency to which we show synthetic examples -1 if we don't want to
-recon_synth_w: 0             # weight of image reconstruction loss on the pair
-#classifier ckpt path: 
-class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth
-
-# Semantic segmentation ckpt path:
-semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth
-
-# FID 
-batch_size_fid: 1            # batch_size to infer the model when computing fid
-eval_fid: 0                   # Default 0, 1 means we track FID during training 
-
-# Path to the inception moment computed on the real dataset of 900 flooded images
-inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz
-      
-
-gen:
-  dim: 32                     # number of filters in the bottommost layer
-  mlp_dim: 128                # number of filters in MLP
-  activ: relu                 # activation function [relu/lrelu/prelu/selu/tanh]
-  n_downsample: 2             # number of downsampling layers in content encoder
-  n_res: 4                    # number of residual blocks in content encoder/decoder
-  pad_type: reflect           # padding type [zero/reflect]
-dis:
-  dim: 32                     # number of filters in the bottommost layer
-  norm: none                  # normalization layer [none/bn/in/ln]
-  activ: lrelu                # activation function [relu/lrelu/prelu/selu/tanh]
-  n_layer: 4                  # number of layers in D
-  gan_type: nsgan             # GAN loss [lsgan/nsgan]
-  num_scales: 3               # number of scales
-  pad_type: reflect           # padding type [zero/reflect]
-
-ratio_disc_gen: 1              # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator
-
-# data options
-input_dim_a: 3                              # number of image channels [1/3]
-input_dim_b: 3                              # number of image channels [1/3]
-num_workers: 4                              # number of data loading threads
-new_size: 400                               # first resize the shortest image side to this size
-crop_image_height: 400                      # random crop image of this height
-crop_image_width: 400                       # random crop image of this width
-
-#do we even need this?
-data_folder_train_a: ./
-data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt
-#and this?
-data_folder_test_a: ./
-data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt
-data_folder_train_b: ./
-data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-data_folder_test_b: ./
-data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt
-
-data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt
-data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt
-
-data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt
-data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt
-
-data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt
-data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt
-data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask
-
-# list image for FID monitoring
-data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt
-data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-
-seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt
-seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt
-
diff --git a/configs/mask_conditioning_HD_lsgan.yaml b/configs/mask_conditioning_HD_lsgan.yaml
deleted file mode 100644
index 6f11cd5..0000000
--- a/configs/mask_conditioning_HD_lsgan.yaml
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (C) 2018 NVIDIA Corporation.  All rights reserved.
-# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
-
-# logger options
-image_save_iter: 1000         # How often do you want to save output images during training
-image_display_iter: 500       # How often do you want to display output images during training
-display_size: 8               # How many images do you want to display each time
-snapshot_save_iter: 5000      # How often do you want to save trained models
-log_iter: 1                   # How often do you want to log the training stats
-
-# optimization options
-max_iter: 1000000             # maximum number of training iterations
-batch_size: 1                 # batch size
-weight_decay: 0.0001          # weight decay
-beta1: 0.5                    # Adam parameter
-beta2: 0.999                  # Adam parameter
-init: kaiming                 # initialization [gaussian/kaiming/xavier/orthogonal]
-lr: 0.0001                    # initial learning rate
-lr_policy: step               # learning rate scheduler
-step_size: 100000             # how often to decay learning rate
-gamma: 0.5                    # how much to decay learning rate
-gan_w: 3                      # weight of adversarial loss
-recon_x_w: 12                 # weight of image reconstruction loss
-recon_c_w: 0                  # weight of content reconstruction loss
-recon_x_cyc_w: 12             # weight of explicit style augmented cycle consistency loss
-vgg_w: 0                      # weight of domain-invariant perceptual loss
-
-adaptation:
-    full_adaptation: 0
-    ##################
-    output_classifier_lambda: 0
-    output_adv_lambda: 0
-    output_classif_freq: 1
-    ##################
-    adv_lambda: 0
-    dfeat_lambda: 0
-    classif_frequency: 0      
-    ##################
-    sem_seg_lambda: 0
-    
-synthetic_seg_gt: 1           #Use synthetic ground truth as target for synthetic images  
-semantic_w: 0                 # weight of semantic conservation loss
-context_w: 10                  #Weight of context preserving (L1) loss
-recon_mask: 1                 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region
-domain_adv_w: 0 
-
-synthetic_frequency: -1       # frequency to which we show synthetic examples -1 if we don't want to
-recon_synth_w: 0             # weight of image reconstruction loss on the pair
-#classifier ckpt path: 
-class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth
-
-# Semantic segmentation ckpt path:
-semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth
-
-# FID 
-batch_size_fid: 1            # batch_size to infer the model when computing fid
-eval_fid: 0                   # Default 0, 1 means we track FID during training 
-
-# Path to the inception moment computed on the real dataset of 900 flooded images
-inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz
-      
-
-gen:
-  dim: 32                     # number of filters in the bottommost layer
-  mlp_dim: 128                # number of filters in MLP
-  activ: relu                 # activation function [relu/lrelu/prelu/selu/tanh]
-  n_downsample: 2             # number of downsampling layers in content encoder
-  n_res: 4                    # number of residual blocks in content encoder/decoder
-  pad_type: reflect           # padding type [zero/reflect]
-dis:
-  dim: 32                     # number of filters in the bottommost layer
-  norm: none                  # normalization layer [none/bn/in/ln]
-  activ: lrelu                # activation function [relu/lrelu/prelu/selu/tanh]
-  n_layer: 4                  # number of layers in D
-  gan_type: lsgan             # GAN loss [lsgan/nsgan]
-  num_scales: 3               # number of scales
-  pad_type: reflect           # padding type [zero/reflect]
-
-ratio_disc_gen: 1              # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator
-
-# data options
-input_dim_a: 3                              # number of image channels [1/3]
-input_dim_b: 3                              # number of image channels [1/3]
-num_workers: 4                              # number of data loading threads
-new_size: 400                               # first resize the shortest image side to this size
-crop_image_height: 400                      # random crop image of this height
-crop_image_width: 400                       # random crop image of this width
-
-#do we even need this?
-data_folder_train_a: ./
-data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt
-#and this?
-data_folder_test_a: ./
-data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt
-data_folder_train_b: ./
-data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-data_folder_test_b: ./
-data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt
-
-data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt
-data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt
-
-data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt
-data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt
-
-data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt
-data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt
-data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask
-
-# list image for FID monitoring
-data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt
-data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-
-seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt
-seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt
-
diff --git a/configs/mask_conditioning_HD_sim.yaml b/configs/mask_conditioning_HD_sim.yaml
deleted file mode 100644
index f962db3..0000000
--- a/configs/mask_conditioning_HD_sim.yaml
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (C) 2018 NVIDIA Corporation.  All rights reserved.
-# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
-
-# logger options
-image_save_iter: 10         # How often do you want to save output images during training
-image_display_iter: 500       # How often do you want to display output images during training
-display_size: 8               # How many images do you want to display each time
-snapshot_save_iter: 5000      # How often do you want to save trained models
-log_iter: 1                   # How often do you want to log the training stats
-
-# optimization options
-max_iter: 1000000             # maximum number of training iterations
-batch_size: 1                 # batch size
-weight_decay: 0.0001          # weight decay
-beta1: 0.5                    # Adam parameter
-beta2: 0.999                  # Adam parameter
-init: kaiming                 # initialization [gaussian/kaiming/xavier/orthogonal]
-lr: 0.0001                    # initial learning rate
-lr_policy: step               # learning rate scheduler
-step_size: 100000             # how often to decay learning rate
-gamma: 0.5                    # how much to decay learning rate
-gan_w: 3                      # weight of adversarial loss
-recon_x_w: 12                 # weight of image reconstruction loss
-recon_c_w: 0                  # weight of content reconstruction loss
-recon_x_cyc_w: 12             # weight of explicit style augmented cycle consistency loss
-vgg_w: 0                      # weight of domain-invariant perceptual loss
-
-adaptation:
-    full_adaptation: 0
-    ##################
-    output_classifier_lambda: 0
-    output_adv_lambda: 0
-    output_classif_freq: 1
-    ##################
-    adv_lambda: 6
-    dfeat_lambda: 1
-    classif_frequency: 15      
-    ##################
-    sem_seg_lambda: 0 
-    
-synthetic_seg_gt: 1           #Use synthetic ground truth as target for synthetic images  
-semantic_w: 0                 # weight of semantic conservation loss
-context_w: 10                  #Weight of context preserving (L1) loss
-recon_mask: 1                 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region
-domain_adv_w: 0 
-
-synthetic_frequency: 1       # frequency to which we show synthetic examples -1 if we don't want to
-recon_synth_w: 0             # weight of image reconstruction loss on the pair
-#classifier ckpt path: 
-class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth
-
-# Semantic segmentation ckpt path:
-semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth
-
-# FID 
-batch_size_fid: 1            # batch_size to infer the model when computing fid
-eval_fid: 0                   # Default 0, 1 means we track FID during training 
-
-# Path to the inception moment computed on the real dataset of 900 flooded images
-inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz
-      
-
-gen:
-  dim: 32                     # number of filters in the bottommost layer
-  mlp_dim: 256                # number of filters in MLP
-  activ: relu                 # activation function [relu/lrelu/prelu/selu/tanh]
-  n_downsample: 2             # number of downsampling layers in content encoder
-  n_res: 4                    # number of residual blocks in content encoder/decoder
-  pad_type: reflect           # padding type [zero/reflect]
-dis:
-  dim: 32                     # number of filters in the bottommost layer
-  norm: none                  # normalization layer [none/bn/in/ln]
-  activ: lrelu                # activation function [relu/lrelu/prelu/selu/tanh]
-  n_layer: 4                  # number of layers in D
-  gan_type: nsgan             # GAN loss [lsgan/nsgan]
-  num_scales: 3               # number of scales
-  pad_type: reflect           # padding type [zero/reflect]
-
-ratio_disc_gen: 1              # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator
-
-# data options
-input_dim_a: 3                              # number of image channels [1/3]
-input_dim_b: 3                              # number of image channels [1/3]
-num_workers: 4                              # number of data loading threads
-new_size: 400                               # first resize the shortest image side to this size
-crop_image_height: 400                      # random crop image of this height
-crop_image_width: 400                       # random crop image of this width
-
-#do we even need this?
-data_folder_train_a: ./
-data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt
-#and this?
-data_folder_test_a: ./
-data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt
-data_folder_train_b: ./
-data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-data_folder_test_b: ./
-data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt
-
-data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt
-data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt
-
-data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt
-data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt
-
-data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt
-data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt
-data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask
-
-# list image for FID monitoring
-data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt
-data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-
-seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt
-seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt
-
diff --git a/configs/mask_conditioning_HD_sim_lsgan.yaml b/configs/mask_conditioning_HD_sim_lsgan.yaml
deleted file mode 100644
index 20cca01..0000000
--- a/configs/mask_conditioning_HD_sim_lsgan.yaml
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (C) 2018 NVIDIA Corporation.  All rights reserved.
-# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
-
-# logger options
-image_save_iter: 1000         # How often do you want to save output images during training
-image_display_iter: 500       # How often do you want to display output images during training
-display_size: 8               # How many images do you want to display each time
-snapshot_save_iter: 5000      # How often do you want to save trained models
-log_iter: 1                   # How often do you want to log the training stats
-
-# optimization options
-max_iter: 1000000             # maximum number of training iterations
-batch_size: 1                 # batch size
-weight_decay: 0.0001          # weight decay
-beta1: 0.5                    # Adam parameter
-beta2: 0.999                  # Adam parameter
-init: kaiming                 # initialization [gaussian/kaiming/xavier/orthogonal]
-lr: 0.0001                    # initial learning rate
-lr_policy: step               # learning rate scheduler
-step_size: 100000             # how often to decay learning rate
-gamma: 0.5                    # how much to decay learning rate
-gan_w: 3                      # weight of adversarial loss
-recon_x_w: 12                 # weight of image reconstruction loss
-recon_c_w: 0                  # weight of content reconstruction loss
-recon_x_cyc_w: 12             # weight of explicit style augmented cycle consistency loss
-vgg_w: 0                      # weight of domain-invariant perceptual loss
-
-adaptation:
-    full_adaptation: 0
-    ##################
-    output_classifier_lambda: 0
-    output_adv_lambda: 0
-    output_classif_freq: 1
-    ##################
-    adv_lambda: 6
-    dfeat_lambda: 1
-    classif_frequency: 15      
-    ##################
-    sem_seg_lambda: 0 
-    
-synthetic_seg_gt: 1           #Use synthetic ground truth as target for synthetic images  
-semantic_w: 0                 # weight of semantic conservation loss
-context_w: 12                  #Weight of context preserving (L1) loss
-recon_mask: 1                 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region
-domain_adv_w: 0 
-
-synthetic_frequency: 1       # frequency to which we show synthetic examples -1 if we don't want to
-recon_synth_w: 0             # weight of image reconstruction loss on the pair
-#classifier ckpt path: 
-class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth
-
-# Semantic segmentation ckpt path:
-semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth
-
-# FID 
-batch_size_fid: 1            # batch_size to infer the model when computing fid
-eval_fid: 0                   # Default 0, 1 means we track FID during training 
-
-# Path to the inception moment computed on the real dataset of 900 flooded images
-inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz
-      
-
-gen:
-  dim: 32                     # number of filters in the bottommost layer
-  mlp_dim: 128                # number of filters in MLP
-  activ: relu                 # activation function [relu/lrelu/prelu/selu/tanh]
-  n_downsample: 2             # number of downsampling layers in content encoder
-  n_res: 4                    # number of residual blocks in content encoder/decoder
-  pad_type: reflect           # padding type [zero/reflect]
-dis:
-  dim: 32                     # number of filters in the bottommost layer
-  norm: none                  # normalization layer [none/bn/in/ln]
-  activ: lrelu                # activation function [relu/lrelu/prelu/selu/tanh]
-  n_layer: 4                  # number of layers in D
-  gan_type: lsgan             # GAN loss [lsgan/nsgan]
-  num_scales: 3               # number of scales
-  pad_type: reflect           # padding type [zero/reflect]
-
-ratio_disc_gen: 1              # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator
-
-# data options
-input_dim_a: 3                              # number of image channels [1/3]
-input_dim_b: 3                              # number of image channels [1/3]
-num_workers: 4                              # number of data loading threads
-new_size: 400                               # first resize the shortest image side to this size
-crop_image_height: 400                      # random crop image of this height
-crop_image_width: 400                       # random crop image of this width
-
-#do we even need this?
-data_folder_train_a: ./
-data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt
-#and this?
-data_folder_test_a: ./
-data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt
-data_folder_train_b: ./
-data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-data_folder_test_b: ./
-data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt
-
-data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt
-data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt
-
-data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt
-data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt
-
-data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt
-data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt
-data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask
-
-# list image for FID monitoring
-data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt
-data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-
-seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt
-seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt
-
diff --git a/configs/notebook.sh b/configs/notebook.sh
deleted file mode 100644
index bb33c81..0000000
--- a/configs/notebook.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-#SBATCH --qos=high
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:P6000
-#SBATCH --mem=24GB
-#SBATCH --gres=gpu:titanx:1
-#SBATCH --time=48:00:00
-#SBATCH -o /network/juravera/slurmlogs/slurm-%j.out
-#SBATCH --job-name jupyter-notebook
-#SBATCH --output ./slurnlogs/jupyter-notebook-%J.log
-#SBATCH -x kepler5 
-# get tunneling info
-XDG_RUNTIME_DIR=""
-port=$(shuf -i8000-9999 -n1)
-node=$(hostname -s)
-user=$(whoami)
-cluster=$(hostname -f | awk -F"." '{print $2}')
-# print tunneling instructions jupyter-log
-echo -e "
-MacOS or linux terminal command to create your ssh tunnel:
-ssh -N -L ${port}:${node}:${port} ${user}@login-1.server.mila.quebec -p 8001    
-Forwarded port:same as remote port
-Remote server: ${node}
-Remote port: ${port}
-SSH cluster: ${cluster}.login-1.server.mila.quebec
-SSH login: $user
-SSH port: $port
-Use a Browser on your local machine to go to:
-localhost:${port}  (prefix w/ https:// if using password)
-"
-# load modules or conda environments here
-module load miniconda/3
-module load cuda-10.0/cudnn/7.5
-source $CONDA_ACTIVATE
-conda activate simtoreal
-# DON'T USE ADDRESS BELOW.
-# DO USE TOKEN BELOW
-python -m notebook --no-browser --port=${port} --ip=${node}
-python -m notebook list
diff --git a/configs/patchgan.yaml b/configs/patchgan.yaml
deleted file mode 100644
index f1de99a..0000000
--- a/configs/patchgan.yaml
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (C) 2018 NVIDIA Corporation.  All rights reserved.
-# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
-
-# logger options
-image_save_iter: 1000         # How often do you want to save output images during training
-image_display_iter: 500       # How often do you want to display output images during training
-display_size: 8               # How many images do you want to display each time
-snapshot_save_iter: 5000      # How often do you want to save trained models
-log_iter: 1                   # How often do you want to log the training stats
-
-# optimization options
-max_iter: 1000000             # maximum number of training iterations
-batch_size: 1                 # batch size
-weight_decay: 0.0001          # weight decay
-beta1: 0.5                    # Adam parameter
-beta2: 0.999                  # Adam parameter
-init: kaiming                 # initialization [gaussian/kaiming/xavier/orthogonal]
-lr: 0.0001                    # initial learning rate
-lr_policy: step               # learning rate scheduler
-step_size: 100000             # how often to decay learning rate
-gamma: 0.5                    # how much to decay learning rate
-gan_w: 3                      # weight of adversarial loss
-recon_x_w: 12                 # weight of image reconstruction loss
-recon_c_w: 0                  # weight of content reconstruction loss
-recon_x_cyc_w: 12             # weight of explicit style augmented cycle consistency loss
-vgg_w: 0                      # weight of domain-invariant perceptual loss
-
-adaptation:
-    full_adaptation: 0
-    ##################
-    output_classifier_lambda: 0
-    output_adv_lambda: 0
-    output_classif_freq: 1
-    ##################
-    adv_lambda: 0
-    dfeat_lambda: 0
-    classif_frequency: 0      
-    ##################
-    sem_seg_lambda: 0
-    
-synthetic_seg_gt: 1           #Use synthetic ground truth as target for synthetic images  
-semantic_w: 0                 # weight of semantic conservation loss
-context_w: 12                  #Weight of context preserving (L1) loss
-recon_mask: 1                 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region
-domain_adv_w: 0 
-
-synthetic_frequency: 1       # frequency to which we show synthetic examples -1 if we don't want to
-recon_synth_w: 0             # weight of image reconstruction loss on the pair
-#classifier ckpt path: 
-class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth
-
-# Semantic segmentation ckpt path:
-semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth
-
-# FID 
-batch_size_fid: 1            # batch_size to infer the model when computing fid
-eval_fid: 0                   # Default 0, 1 means we track FID during training 
-
-# Path to the inception moment computed on the real dataset of 900 flooded images
-inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz
-      
-
-gen:
-  dim: 32                     # number of filters in the bottommost layer
-  mlp_dim: 128                # number of filters in MLP
-  activ: relu                 # activation function [relu/lrelu/prelu/selu/tanh]
-  n_downsample: 2             # number of downsampling layers in content encoder
-  n_res: 4                    # number of residual blocks in content encoder/decoder
-  pad_type: reflect           # padding type [zero/reflect]
-dis:
-  dim: 32                     # number of filters in the bottommost layer
-  norm: batch                  # normalization layer [none/bn/in/ln]
-  activ: lrelu                # activation function [relu/lrelu/prelu/selu/tanh]
-  n_layer: 4                  # number of layers in D
-  gan_type: lsgan             # GAN loss [lsgan/nsgan]
-  num_scales: 3               # number of scales
-  pad_type: reflect           # padding type [zero/reflect]
-  getIntermFeat: true
-  num_D: 3
-  lambda_feat: 10
-  use_sigmoid: False
-
-
-ratio_disc_gen: 1              # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator
-
-# data options
-input_dim_a: 3                              # number of image channels [1/3]
-input_dim_b: 3                              # number of image channels [1/3]
-num_workers: 4                              # number of data loading threads
-new_size: 256                               # first resize the shortest image side to this size
-crop_image_height: 256                      # random crop image of this height
-crop_image_width: 256                       # random crop image of this width
-
-#do we even need this?
-data_folder_train_a: ./
-data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt
-#and this?
-data_folder_test_a: ./
-data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt
-data_folder_train_b: ./
-data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-data_folder_test_b: ./
-data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt
-
-data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt
-data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt
-
-data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt
-data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt
-
-data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt
-data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt
-data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask
-
-# list image for FID monitoring
-data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt
-data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-
-seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt
-seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt
-
diff --git a/configs/patchgan_perceptual.yaml b/configs/patchgan_perceptual.yaml
deleted file mode 100644
index 1fc47f8..0000000
--- a/configs/patchgan_perceptual.yaml
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (C) 2018 NVIDIA Corporation.  All rights reserved.
-# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
-
-# logger options
-image_save_iter: 1000         # How often do you want to save output images during training
-image_display_iter: 500       # How often do you want to display output images during training
-display_size: 8               # How many images do you want to display each time
-snapshot_save_iter: 5000      # How often do you want to save trained models
-log_iter: 1                   # How often do you want to log the training stats
-
-# optimization options
-max_iter: 1000000             # maximum number of training iterations
-batch_size: 1                 # batch size
-weight_decay: 0.0001          # weight decay
-beta1: 0.5                    # Adam parameter
-beta2: 0.999                  # Adam parameter
-init: kaiming                 # initialization [gaussian/kaiming/xavier/orthogonal]
-lr: 0.0001                    # initial learning rate
-lr_policy: step               # learning rate scheduler
-step_size: 100000             # how often to decay learning rate
-gamma: 0.5                    # how much to decay learning rate
-gan_w: 3                      # weight of adversarial loss
-recon_x_w: 12                 # weight of image reconstruction loss
-recon_c_w: 0                  # weight of content reconstruction loss
-recon_x_cyc_w: 12             # weight of explicit style augmented cycle consistency loss
-vgg_w: 3                      # weight of domain-invariant perceptual loss
-
-adaptation:
-    full_adaptation: 0
-    ##################
-    output_classifier_lambda: 0
-    output_adv_lambda: 0
-    output_classif_freq: 1
-    ##################
-    adv_lambda: 0
-    dfeat_lambda: 0
-    classif_frequency: 0      
-    ##################
-    sem_seg_lambda: 0
-    
-synthetic_seg_gt: 1           #Use synthetic ground truth as target for synthetic images  
-semantic_w: 0                 # weight of semantic conservation loss
-context_w: 12                  #Weight of context preserving (L1) loss
-recon_mask: 1                 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region
-domain_adv_w: 0 
-
-synthetic_frequency: 1       # frequency to which we show synthetic examples -1 if we don't want to
-recon_synth_w: 0             # weight of image reconstruction loss on the pair
-#classifier ckpt path: 
-class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth
-
-# Semantic segmentation ckpt path:
-semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth
-
-# FID 
-batch_size_fid: 1            # batch_size to infer the model when computing fid
-eval_fid: 0                   # Default 0, 1 means we track FID during training 
-
-# Path to the inception moment computed on the real dataset of 900 flooded images
-inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz
-      
-
-gen:
-  dim: 32                     # number of filters in the bottommost layer
-  mlp_dim: 128                # number of filters in MLP
-  activ: relu                 # activation function [relu/lrelu/prelu/selu/tanh]
-  n_downsample: 2             # number of downsampling layers in content encoder
-  n_res: 4                    # number of residual blocks in content encoder/decoder
-  pad_type: reflect           # padding type [zero/reflect]
-dis:
-  dim: 32                     # number of filters in the bottommost layer
-  norm: batch                  # normalization layer [none/bn/in/ln]
-  activ: lrelu                # activation function [relu/lrelu/prelu/selu/tanh]
-  n_layer: 4                  # number of layers in D
-  gan_type: lsgan             # GAN loss [lsgan/nsgan]
-  num_scales: 3               # number of scales
-  pad_type: reflect           # padding type [zero/reflect]
-  getIntermFeat: true
-  num_D: 3
-  lambda_feat: 10
-  use_sigmoid: False
-
-
-ratio_disc_gen: 1              # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator
-
-# data options
-input_dim_a: 3                              # number of image channels [1/3]
-input_dim_b: 3                              # number of image channels [1/3]
-num_workers: 4                              # number of data loading threads
-new_size: 256                               # first resize the shortest image side to this size
-crop_image_height: 256                      # random crop image of this height
-crop_image_width: 256                       # random crop image of this width
-
-#do we even need this?
-data_folder_train_a: ./
-data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt
-#and this?
-data_folder_test_a: ./
-data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt
-data_folder_train_b: ./
-data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-data_folder_test_b: ./
-data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt
-
-data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt
-data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt
-
-data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt
-data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt
-
-data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt
-data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt
-data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask
-
-# list image for FID monitoring
-data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt
-data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt
-
-seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt
-seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt
-
diff --git a/scripts/data.py b/scripts/data.py
index e50ac90..955f865 100644
--- a/scripts/data.py
+++ b/scripts/data.py
@@ -4,6 +4,7 @@
 """
 import torch.utils.data as data
 import os.path
+from pathlib import Path
 
 
 def default_loader(path):
@@ -92,22 +93,13 @@ def __len__(self):
 import os
 import os.path
 
-IMG_EXTENSIONS = [
-    ".jpg",
-    ".JPG",
-    ".jpeg",
-    ".JPEG",
-    ".png",
-    ".PNG",
-    ".ppm",
-    ".PPM",
-    ".bmp",
-    ".BMP",
-]
+IMG_EXTENSIONS = set(
+    [".jpg", ".JPG", ".jpeg", ".JPEG", ".png", ".PNG", ".ppm", ".PPM", ".bmp", ".BMP"]
+)
 
 
 def is_image_file(filename):
-    return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
+    return Path(filename).suffix in IMG_EXTENSIONS
 
 
 def make_dataset(dir):
diff --git a/scripts/networks.py b/scripts/networks.py
index c4d6991..1c26873 100644
--- a/scripts/networks.py
+++ b/scripts/networks.py
@@ -1085,6 +1085,9 @@ def __init__(
             spade_kernel_size,
         )
 
+        self.nospade_1 = ResBlock(self.z_nc)
+
+        """
         self.up_spades = nn.Sequential(
             *[
                 SPADEResnetBlock(
@@ -1098,9 +1101,11 @@ def __init__(
                 for i in range(spade_n_up - 2)
             ]
         )
+        """
+        self.up_spades = nn.Sequential(*[ResBlock(self.z_nc) for i in range(spade_n_up - 2)])
 
-        self.final_nc = self.z_nc // 2 ** (spade_n_up - 2)
-
+        # self.final_nc = self.z_nc // 2 ** (spade_n_up - 2)
+        self.final_nc = self.z_nc
         self.conv_img = nn.Conv2d(self.final_nc, 3, 3, padding=1)
 
         self.upsample = nn.Upsample(scale_factor=2)
@@ -1122,11 +1127,18 @@ def forward(self, z, cond):
         y = self.upsample(y)
         y = self.G_middle_0(y, cond)
         y = self.upsample(y)
-        y = self.G_middle_1(y, cond)
+        # y = self.G_middle_1(y, cond)
+        y = self.nospade_1(y)
 
+        """
         for i, up in enumerate(self.up_spades):
             y = self.upsample(y)
             y = up(y, cond)
+        """
+
+        for i, up in enumerate(self.up_spades):
+            y = self.upsample(y)
+            y = up(y)
 
         y = self.conv_img(F.leaky_relu(y, 2e-1))
         y = torch.tanh(y)
@@ -1201,4 +1213,3 @@ def decode(self, content, mask, encoder_name):
             print("wrong value for encoder_name, must be 0 or 1")
             return None
         return images
-
diff --git a/scripts/test.py b/scripts/test.py
index a38afac..432ad99 100644
--- a/scripts/test.py
+++ b/scripts/test.py
@@ -3,40 +3,51 @@
 Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
 """
 from __future__ import print_function
-from utils import get_config, pytorch03_to_pytorch04, sorted_nicely
+from utils import get_config, sorted_nicely
 from trainer import MUNIT_Trainer
 import argparse
 from torch.autograd import Variable
 import torchvision.utils as vutils
-import sys
 import torch
 import os
 from torchvision import transforms
 from PIL import Image
 import tqdm as tq
-import glob
+import numpy as np
+from pathlib import Path
+from data import is_image_file
+from datetime import datetime
 
 # Parse arguments
 parser = argparse.ArgumentParser()
 parser.add_argument("--config", type=str, help="network configuration file")
 parser.add_argument("--input", type=str, help="directory of input images")
-parser.add_argument("--mask_dir", type=str, help="directory of masks corresponding to input images")
+parser.add_argument(
+    "--mask_dir", type=str, help="directory of masks corresponding to input images"
+)
 parser.add_argument("--output_folder", type=str, help="output image directory")
 parser.add_argument("--checkpoint", type=str, help="checkpoint of generator")
 
 parser.add_argument("--seed", type=int, default=10, help="random seed")
 
 parser.add_argument(
-    "--synchronized", action="store_true", help="whether use synchronized style code or not",
+    "--synchronized",
+    action="store_true",
+    help="whether use synchronized style code or not",
 )
 parser.add_argument(
-    "--save_input", action="store_true", help="whether use synchronized style code or not",
+    "--save_input",
+    action="store_true",
+    help="whether use synchronized style code or not",
 )
 parser.add_argument(
-    "--output_path", type=str, default=".", help="path for logs, checkpoints, and VGG model weight",
+    "--output_path",
+    type=str,
+    default=".",
+    help="path for logs, checkpoints, and VGG model weight",
 )
 parser.add_argument(
-    "--save_mask", action="store_true", help="whether to save mask or not",
+    "--save_mask", action="store_true", help="whether to save mask or not"
 )
 opts = parser.parse_args()
 
@@ -58,12 +69,10 @@
 
 trainer = MUNIT_Trainer(config)
 
-# Load the model (here we currently only load the latest model architecture: one single style)
-try:
-    state_dict = torch.load(opts.checkpoint)
-    trainer.gen.load_state_dict(state_dict["2"])
-except:
-    sys.exit("Cannot load the checkpoints")
+# Load the model
+# (here we currently only load the latest model architecture: one single style)
+state_dict = torch.load(opts.checkpoint)
+trainer.gen.load_state_dict(state_dict["2"])
 
 # Send the trainer to cuda
 trainer.cuda()
@@ -73,23 +82,31 @@
 new_size = config["new_size"]
 
 # Define the list of non-flooded images
-list_non_flooded = glob.glob(opts.input + "*")
+list_non_flooded = [
+    str(im) for im in Path(opts.input).resolve().glob("*") if is_image_file(im)
+]
 
 list_non_flooded = sorted_nicely(list_non_flooded)
 # Define list of masks:
 
-list_masks = glob.glob(opts.mask_dir + "*")
+list_masks = [
+    str(im) for im in Path(opts.mask_dir).resolve().glob("*") if is_image_file(im)
+]
 
 list_masks = sorted_nicely(list_masks)
 
-if len(list_non_flooded) != len(list_masks):
-    sys.exit("Image list and mask list differ in length")
+assert len(list_non_flooded) == len(
+    list_masks
+), "Image list and mask list differ in length"
 
 
 # Assert there are some elements inside
-if len(list_non_flooded) == 0:
-    sys.exit("Image list is empty. Please ensure opts.input ends with a /")
+assert list_non_flooded, "Image list is empty"
+
+output_folder = Path(opts.output_folder).resolve()
+output_folder.mkdir(parents=True, exist_ok=True)
 
+run_id = str(datetime.now())[:19].replace(" ", "_")
 
 # Inference
 with torch.no_grad():
@@ -102,11 +119,14 @@
         ]
     )
 
-    mask_transform = transforms.Compose([transforms.Resize((new_size, new_size)), transforms.ToTensor(),])
-
+    mask_transform = transforms.Compose(
+        [transforms.Resize((new_size, new_size)), transforms.ToTensor()]
+    )
 
     for j in tq.tqdm(range(len(list_non_flooded))):
 
+        file_id = f"{run_id}-{j}"
+
         # Define image path
         path_xa = list_non_flooded[j]
 
@@ -122,20 +142,23 @@
         mask = mask[0].unsqueeze(0).unsqueeze(0)
 
         # Load and transform the non_flooded image
-        x_a = Variable(transform(Image.open(path_xa).convert("RGB")).unsqueeze(0).cuda())
+        x_a = Variable(
+            transform(Image.open(path_xa).convert("RGB")).unsqueeze(0).cuda()
+        )
         if opts.save_input:
             inputs = (x_a + 1) / 2.0
-            path = os.path.join(opts.output_folder, "{:03d}input.jpg".format(j))
-            vutils.save_image(inputs.data, path, padding=0, normalize=True)
+            path = output_folder / "{}-input.jpg".format(file_id)
+            vutils.save_image(inputs.data, str(path), padding=0, normalize=True)
 
         if opts.save_mask:
-            path = os.path.join(opts.output_folder, "{:03d}mask.jpg".format(j))
-            #overlay mask onto image
+            path = output_folder / "{}-mask.jpg".format(file_id)
+            # overlay mask onto image
             save_m_a = x_a - (x_a * mask.repeat(1, 3, 1, 1)) + mask.repeat(1, 3, 1, 1)
-            vutils.save_image(save_m_a, path, padding=0, normalize=True)          
+            vutils.save_image(save_m_a, str(path), padding=0, normalize=True)
 
         # Extract content and style
-        c_a = trainer.gen.encode(x_a, 1)
+        x_a_augment = torch.cat([x_a, mask], dim=1)
+        c_a = trainer.gen.encode(x_a_augment, 1)
 
         # Perform cross domain translation
         x_ab = trainer.gen.decode(c_a, mask, 2)
@@ -144,7 +167,7 @@
         outputs = (x_ab + 1) / 2.0
 
         # Define output path
-        path = os.path.join(opts.output_folder, "{:03d}output.jpg".format(j))
+        path = output_folder / "{}-output.jpg".format(file_id)
 
         # Save image
-        vutils.save_image(outputs.data, path, padding=0, normalize=True)
+        vutils.save_image(outputs.data, str(path), padding=0, normalize=True)
diff --git a/scripts/train.py b/scripts/train.py
index d8fdc75..bbe91be 100644
--- a/scripts/train.py
+++ b/scripts/train.py
@@ -40,7 +40,7 @@
     "--config", type=str, default="configs/config256.yaml", help="Path to the config file.",
 )
 parser.add_argument(
-    "--output_path", type=str, default=".", help="outputs path"
+    "--output_path", type=str, default="/network/tmp1/ccai/checkpoints/sun", help="outputs path"
 )
 parser.add_argument("--resume", action="store_true")
 parser.add_argument("--trainer", type=str, default="MUNIT", help="MUNIT|UNIT")
@@ -397,4 +397,3 @@
                 iterations += 1
                 if iterations >= max_iter:
                     sys.exit("Finish training")
-
diff --git a/scripts/trainer.py b/scripts/trainer.py
index 21f2bbd..b598090 100644
--- a/scripts/trainer.py
+++ b/scripts/trainer.py
@@ -25,6 +25,7 @@
 import torch.nn as nn
 import os
 from PIL import Image
+from torchvision.utils import save_image
 
 
 class MUNIT_Trainer(nn.Module):
@@ -60,7 +61,7 @@ def __init__(self, hyperparameters):
         else:
             self.use_output_classifier_sr = False
 
-        self.gen = SpadeGen(hyperparameters["input_dim_a"], hyperparameters["gen"])
+        self.gen = SpadeGen(hyperparameters["input_dim_a"] + 1, hyperparameters["gen"])
 
         # Note: the "+1" is for the masks
         if hyperparameters["dis"]["type"] == "patchgan":
@@ -138,13 +139,11 @@ def __init__(self, hyperparameters):
             self.criterionVGG = VGGLoss()
 
         # Load semantic segmentation model if needed
-        if "semantic_w" in hyperparameters.keys() and hyperparameters["semantic_w"] > 0:
-            self.segmentation_model = load_segmentation_model(
-                hyperparameters["semantic_ckpt_path"], 19
-            )
-            self.segmentation_model.eval()
-            for param in self.segmentation_model.parameters():
-                param.requires_grad = False
+        # if "semantic_w" in hyperparameters.keys() and hyperparameters["semantic_w"] > 0:
+        self.segmentation_model = load_segmentation_model(hyperparameters["semantic_ckpt_path"], 19)
+        self.segmentation_model.eval()
+        for param in self.segmentation_model.parameters():
+            param.requires_grad = False
 
         # Load domain classifier if needed
         if "domain_adv_w" in hyperparameters.keys() and hyperparameters["domain_adv_w"] > 0:
@@ -268,14 +267,20 @@ def forward(self, x_a, x_b, m_a, m_b):
             torch.Tensor, torch.Tensor -- Translated version of x_a in domain B, Translated version of x_b in domain A
         """
         self.eval()
+        m_a_seg = self.merge_seg_with_mask(x_a, m_a)
+        m_b_seg = self.merge_seg_with_mask(x_b, m_b)
+
+        avg_mask_a = self.average_mask(x_a, m_a)
+        avg_mask_b = self.average_mask(x_b, m_b)
+
         x_a_augment = torch.cat([x_a, m_a], dim=1)
         x_b_augment = torch.cat([x_b, m_b], dim=1)
 
-        c_a = self.gen.encode(x_a, 1)
-        c_b = self.gen.encode(x_b, 2)
+        c_a = self.gen.encode(x_a_augment, 1)
+        c_b = self.gen.encode(x_b_augment, 2)
 
-        x_ba = self.gen.decode(c_b, 1)
-        x_ab = self.gen.decode(c_a, 2)
+        x_ba = self.gen.decode(c_b, m_b_seg, 1)
+        x_ab = self.gen.decode(c_a, m_a_seg, 2)
 
         self.train()
         return x_ab, x_ba
@@ -311,37 +316,51 @@ def gen_update(
         """
         self.gen_opt.zero_grad()
 
-        # encode
+        mask_a_seg = self.merge_seg_with_mask(x_a, mask_a)
+        mask_b_seg = self.merge_seg_with_mask(x_b, mask_b)
+
+        avg_mask_a = self.average_mask(x_a, mask_a)
+        avg_mask_b = self.average_mask(x_b, mask_b)
+
         x_a_augment = torch.cat([x_a, mask_a], dim=1)
         x_b_augment = torch.cat([x_b, mask_b], dim=1)
 
-        c_a = self.gen.encode(x_a, 1)
-        c_b = self.gen.encode(x_b, 2)
+        # encode
+        c_a = self.gen.encode(x_a_augment, 1)
+        c_b = self.gen.encode(x_b_augment, 2)
 
         # decode (within domain)
-        x_a_recon = self.gen.decode(c_a, mask_a, 1)
-        x_b_recon = self.gen.decode(c_b, mask_b, 2)
+        x_a_recon = self.gen.decode(c_a, mask_a_seg, 1)
+        x_b_recon = self.gen.decode(c_b, mask_b_seg, 2)
 
-        x_ba = self.gen.decode(c_b, mask_b, 1)
-        x_ab = self.gen.decode(c_a, mask_a, 2)
+        x_ba = self.gen.decode(c_b, mask_b_seg, 1)
+        x_ab = self.gen.decode(c_a, mask_a_seg, 2)
 
         x_ba_augment = torch.cat([x_ba, mask_b], dim=1)
         x_ab_augment = torch.cat([x_ab, mask_a], dim=1)
+
         # encode again
-        c_b_recon = self.gen.encode(x_ba, 1)
-        c_a_recon = self.gen.encode(x_ab, 2)
+        c_b_recon = self.gen.encode(x_ba_augment, 1)
+        c_a_recon = self.gen.encode(x_ab_augment, 2)
 
         # decode again (if needed)
         x_aba = (
-            self.gen.decode(c_a_recon, mask_a, 1) if hyperparameters["recon_x_cyc_w"] > 0 else None
+            self.gen.decode(c_a_recon, mask_a_seg, 1)
+            if hyperparameters["recon_x_cyc_w"] > 0
+            else None
         )
         x_bab = (
-            self.gen.decode(c_b_recon, mask_b, 2) if hyperparameters["recon_x_cyc_w"] > 0 else None
+            self.gen.decode(c_b_recon, mask_b_seg, 2)
+            if hyperparameters["recon_x_cyc_w"] > 0
+            else None
         )
 
         # reconstruction loss
-        self.loss_gen_recon_x_a = self.recon_criterion(x_a_recon, x_a)
-        self.loss_gen_recon_x_b = self.recon_criterion(x_b_recon, x_b)
+        # self.loss_gen_recon_x_a = self.recon_criterion(x_a_recon, x_a * (1.0 - mask_a) + avg_mask_a)
+        # self.loss_gen_recon_x_b = self.recon_criterion(x_b_recon, x_b * (1.0 - mask_b) + avg_mask_b)
+
+        self.loss_gen_recon_x_a = self.recon_criterion_mask(x_a_recon, x_a, mask_a)
+        self.loss_gen_recon_x_b = self.recon_criterion_mask(x_b_recon, x_b, mask_b)
 
         # Contex preserving loss
         self.context_loss = self.recon_criterion_mask(
@@ -407,17 +426,6 @@ def gen_update(
         self.loss_gen_vgg_b = (
             self.compute_vgg_loss(x_ab, x_a, mask_a) if hyperparameters["vgg_w"] > 0 else 0
         )
-        """        self.loss_destruct_vgg_a = (
-                    torch.exp(-self.compute_vgg_loss(x_ba, x_b, 1.0 - mask_b) * 0.01)
-                    if hyperparameters["vgg_w"] > 0
-                    else 0
-                )
-                self.loss_destruct_vgg_b = (
-                    torch.exp(-self.compute_vgg_loss(x_ab, x_a, 1.0 - mask_a) * 0.01)
-                    if hyperparameters["vgg_w"] > 0
-                    else 0
-                )
-        """
 
         # semantic-segmentation loss
         self.loss_sem_seg = (
@@ -495,8 +503,7 @@ def gen_update(
             if hyperparameters["vgg_w"] > 0:
                 comet_exp.log_metric("loss_gen_vgg_a", self.loss_gen_vgg_a.cpu().detach())
                 comet_exp.log_metric("loss_gen_vgg_b", self.loss_gen_vgg_b.cpu().detach())
-                # comet_exp.log_metric("loss_destruct_vgg_a", self.loss_destruct_vgg_a.cpu().detach())
-                # comet_exp.log_metric("loss_destruct_vgg_b", self.loss_destruct_vgg_b.cpu().detach())
+
             if hyperparameters["semantic_w"] > 0:
                 comet_exp.log_metric("loss_sem_seg", self.loss_sem_seg.cpu().detach())
             if hyperparameters["context_w"] > 0:
@@ -670,6 +677,57 @@ def compute_semantic_seg_loss(self, img1, img2, mask=None, ground_truth=None):
             loss = nn.CrossEntropyLoss()(output, target)
         return loss
 
+    def merge_seg_with_mask(self, img, mask):
+        """
+        Compute semantic segmentation loss between two images on the unmasked region or in the entire image
+        Arguments:
+            img1 {torch.Tensor} -- Image from domain A after transform in tensor format
+            img2 {torch.Tensor} -- Image transformed
+            mask {torch.Tensor} -- Binary mask where we force the loss to be zero
+            ground_truth {torch.Tensor} -- If available palletized image of size (batch, h, w) 
+        Returns:
+            torch.float -- Cross entropy loss on the unmasked region
+        """
+
+        # denorm
+        img_denorm = (img + 1) / 2.0
+
+        # norm for semantic seg network
+        input_transformed = seg_batch_transform(img_denorm)
+
+        # compute labels from original image and logits from translated version
+        # target = (
+        #   self.segmentation_model(input_transformed1).max(1)[1]
+        # )
+        # Infer x_ab or x_ba
+        output = self.segmentation_model(input_transformed)
+        max_value = output.size()[1]
+        max_value = mask_value = max_value + 1  # make masked value the largest class
+        output = output.argmax(1).unsqueeze(1)
+
+        # Zero out masked values:
+        output = output * (1 - mask) + (mask * mask_value)
+        output_mask = output.to(torch.float) / max_value
+
+        return output_mask
+
+    def average_mask(self, img, mask):
+        """
+        # Apply mask:
+        mask_region = img * mask
+
+        # Get average
+        avg = torch.mean(mask_region, dim=(2, 3))
+        avg = avg.unsqueeze(-1).unsqueeze(-1)
+
+        masked_avg = mask * avg
+        """
+
+        # Return random noise
+        masked_avg = torch.normal(mean=0, std=1, size=mask.size(), device="cuda")
+        masked_avg = masked_avg * mask
+        return masked_avg
+
     def sample(self, x_a, x_b, m_a, m_b):
         """ 
         Infer the model on a batch of image
@@ -683,7 +741,13 @@ def sample(self, x_a, x_b, m_a, m_b):
             Or if self.semantic_w is true: x_a, autoencode(x_a), Semantic segmentation x_a, 
             x_ab_1,semantic segmentation x_ab_1, x_ab_2
         """
+
         self.eval()
+        m_a_seg = self.merge_seg_with_mask(x_a, m_a)
+        m_b_seg = self.merge_seg_with_mask(x_b, m_b)
+
+        avg_mask_a = self.average_mask(x_a, m_a)
+        avg_mask_b = self.average_mask(x_b, m_b)
 
         x_a_recon, x_b_recon, x_ba1, x_ba2, x_ab1, x_ab2 = [], [], [], [], [], []
 
@@ -691,16 +755,16 @@ def sample(self, x_a, x_b, m_a, m_b):
         x_b_augment = torch.cat([x_b, m_b], dim=1)
 
         for i in range(x_a.size(0)):
-            c_a = self.gen.encode(x_a[i].unsqueeze(0), 1)
-            c_b = self.gen.encode(x_b[i].unsqueeze(0), 2)
+            c_a = self.gen.encode(x_a_augment[i].unsqueeze(0), 1)
+            c_b = self.gen.encode(x_b_augment[i].unsqueeze(0), 2)
 
-            x_a_recon.append(self.gen.decode(c_a, m_a[i].unsqueeze(0), 1))
-            x_b_recon.append(self.gen.decode(c_b, m_b[i].unsqueeze(0), 2))
+            x_a_recon.append(self.gen.decode(c_a, m_a_seg[i].unsqueeze(0), 1))
+            x_b_recon.append(self.gen.decode(c_b, m_b_seg[i].unsqueeze(0), 2))
 
-            x_ba1.append(self.gen.decode(c_b, m_b[i].unsqueeze(0), 1))  # s_a1[i].unsqueeze(0)))
-            x_ba2.append(self.gen.decode(c_b, m_b[i].unsqueeze(0), 1))  # s_a2[i].unsqueeze(0)))
-            x_ab1.append(self.gen.decode(c_a, m_a[i].unsqueeze(0), 2))  # s_b1[i].unsqueeze(0)))
-            x_ab2.append(self.gen.decode(c_a, m_a[i].unsqueeze(0), 2))  # s_b2[i].unsqueeze(0)))
+            x_ba1.append(self.gen.decode(c_b, m_b_seg[i].unsqueeze(0), 1))  # s_a1[i].unsqueeze(0)))
+            x_ba2.append(self.gen.decode(c_b, m_b_seg[i].unsqueeze(0), 1))  # s_a2[i].unsqueeze(0)))
+            x_ab1.append(self.gen.decode(c_a, m_a_seg[i].unsqueeze(0), 2))  # s_b1[i].unsqueeze(0)))
+            x_ab2.append(self.gen.decode(c_a, m_a_seg[i].unsqueeze(0), 2))  # s_b2[i].unsqueeze(0)))
 
         x_a_recon, x_b_recon = torch.cat(x_a_recon), torch.cat(x_b_recon)
         x_ba1, x_ba2 = torch.cat(x_ba1), torch.cat(x_ba2)
@@ -905,16 +969,25 @@ def dis_update(self, x_a, x_b, m_a, m_b, hyperparameters, comet_exp=None):
         Keyword Arguments:
             comet_exp {cometExperience} -- CometML object use to log all the loss and images (default: {None})        
         """
+
         self.dis_opt.zero_grad()
+
+        m_a_seg = self.merge_seg_with_mask(x_a, m_a)
+        m_b_seg = self.merge_seg_with_mask(x_b, m_b)
+
         x_a_augment = torch.cat([x_a, m_a], dim=1)
         x_b_augment = torch.cat([x_b, m_b], dim=1)
+        # Replace masked region with average
+
+        avg_mask_a = self.average_mask(x_a, m_a)
+        avg_mask_b = self.average_mask(x_b, m_b)
 
         # encode
-        c_a = self.gen.encode(x_a, 1)
-        c_b = self.gen.encode(x_b, 2)
+        c_a = self.gen.encode(x_a_augment, 1)
+        c_b = self.gen.encode(x_b_augment, 2)
         # decode (cross domain)
-        x_ba = self.gen.decode(c_b, m_b, 1)
-        x_ab = self.gen.decode(c_a, m_a, 2)
+        x_ba = self.gen.decode(c_b, m_b_seg, 1)
+        x_ab = self.gen.decode(c_a, m_a_seg, 2)
 
         x_ba_augment = torch.cat([x_ba, m_b], dim=1)
         x_ab_augment = torch.cat([x_ab, m_a], dim=1)
@@ -1076,6 +1149,8 @@ def resume(self, checkpoint_dir, hyperparameters):
         state_dict = torch.load(last_model_name)
         self.dis_a.load_state_dict(state_dict["a"])
         self.dis_b.load_state_dict(state_dict["b"])
+        self.dis_a_masked.load_state_dict(state_dict["a_masked"])
+        self.dis_b_masked.load_state_dict(state_dict["b_masked"])
         # Load optimizers
         state_dict = torch.load(os.path.join(checkpoint_dir, "optimizer.pt"))
         self.dis_opt.load_state_dict(state_dict["dis"])
@@ -1107,7 +1182,15 @@ def save(self, snapshot_dir, iterations):
         opt_name = os.path.join(snapshot_dir, "optimizer.pt")
 
         torch.save({"2": self.gen.state_dict()}, gen_name)
-        torch.save({"a": self.dis_a.state_dict(), "b": self.dis_b.state_dict()}, dis_name)
+        torch.save(
+            {
+                "a": self.dis_a.state_dict(),
+                "b": self.dis_b.state_dict(),
+                "a_masked": self.dis_a_masked.state_dict(),
+                "b_masked": self.dis_b_masked.state_dict(),
+            },
+            dis_name,
+        )
         if self.domain_classif_ab:
             torch.save({"d": self.domain_classifier.state_dict()}, domain_classifier_name)
             torch.save(
@@ -1122,4 +1205,3 @@ def save(self, snapshot_dir, iterations):
             torch.save(
                 {"gen": self.gen_opt.state_dict(), "dis": self.dis_opt.state_dict()}, opt_name,
             )
-
diff --git a/scripts/utils.py b/scripts/utils.py
index 3eb7c00..25cf170 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -225,7 +225,9 @@ def get_data_loader_list(
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
     ]
     transform_list = (
-        [transforms.RandomCrop((height, width))] + transform_list if crop else transform_list
+        [transforms.RandomCrop((height, width))] + transform_list
+        if crop
+        else transform_list
     )
     transform_list = (
         [transforms.Resize((new_size, new_size))] + transform_list
@@ -233,7 +235,9 @@ def get_data_loader_list(
         else transform_list
     )
     transform_list = (
-        [transforms.RandomHorizontalFlip()] + transform_list if train else transform_list
+        [transforms.RandomHorizontalFlip()] + transform_list
+        if train
+        else transform_list
     )
     transform = transforms.Compose(transform_list)
     dataset = ImageFilelist(root, file_list, transform=transform)
@@ -314,7 +318,9 @@ def transform(self, image, mask):
         image = resize(image)
         to_tensor = transforms.ToTensor()
         # Random crop
-        i, j, h, w = transforms.RandomCrop.get_params(image, output_size=(self.height, self.width))
+        i, j, h, w = transforms.RandomCrop.get_params(
+            image, output_size=(self.height, self.width)
+        )
         image = F.crop(image, i, j, h, w)
 
         if type(mask) is not torch.Tensor:
@@ -436,7 +442,9 @@ def __len__(self):
         return len(self.image_paths)
 
 
-def get_fid_data_loader(file_list_a, file_list_b, batch_size, train, new_size=256, num_workers=4):
+def get_fid_data_loader(
+    file_list_a, file_list_b, batch_size, train, new_size=256, num_workers=4
+):
     """
     Masks and images lists-based data loader with transformations
     (horizontal flip, resizing, random crop, normalization are handled)
@@ -518,7 +526,7 @@ def transform(self, image_a, image_b, mask, semantic_a, semantic_b):
         # print('dim image after resize',image.size)
 
         # Resize mask
-        #mask = mask.resize((image_b.width, image_b.height), Image.NEAREST)
+        # mask = mask.resize((image_b.width, image_b.height), Image.NEAREST)
         mask = resize(mask)
         semantic_a = semantic_a.resize((image_b.width, image_b.height), Image.NEAREST)
         semantic_b = semantic_b.resize((image_b.width, image_b.height), Image.NEAREST)
@@ -627,7 +635,14 @@ def get_synthetic_data_loader(
         loader -- data loader with transformed dataset
     """
     dataset = MyDatasetSynthetic(
-        file_list_a, file_list_b, mask_list, sem_list_a, sem_list_b, new_size, height, width,
+        file_list_a,
+        file_list_b,
+        mask_list,
+        sem_list_a,
+        sem_list_b,
+        new_size,
+        height,
+        width,
     )
     loader = DataLoader(
         dataset=dataset,
@@ -682,7 +697,14 @@ def get_data_loader_mask_and_im(
 
 
 def get_data_loader_folder(
-    input_folder, batch_size, train, new_size=None, height=256, width=256, num_workers=4, crop=True,
+    input_folder,
+    batch_size,
+    train,
+    new_size=None,
+    height=256,
+    width=256,
+    num_workers=4,
+    crop=True,
 ):
     """
     Folder-based data loader with transformations
@@ -711,7 +733,9 @@ def get_data_loader_folder(
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
     ]
     transform_list = (
-        [transforms.RandomCrop((height, width))] + transform_list if crop else transform_list
+        [transforms.RandomCrop((height, width))] + transform_list
+        if crop
+        else transform_list
     )
     transform_list = (
         [transforms.Resize((new_size, new_size))] + transform_list
@@ -719,7 +743,9 @@ def get_data_loader_folder(
         else transform_list
     )
     transform_list = (
-        [transforms.RandomHorizontalFlip()] + transform_list if train else transform_list
+        [transforms.RandomHorizontalFlip()] + transform_list
+        if train
+        else transform_list
     )
     transform = transforms.Compose(transform_list)
     dataset = ImageFolder(input_folder, transform=transform)
@@ -763,14 +789,18 @@ def __write_images(image_outputs, display_image_num, file_name):
     image_outputs = [
         images.expand(-1, 3, -1, -1) for images in image_outputs
     ]  # expand gray-scale images to 3 channels
-    image_tensor = torch.cat([images[:display_image_num] for images in image_outputs], 0)
+    image_tensor = torch.cat(
+        [images[:display_image_num] for images in image_outputs], 0
+    )
     image_grid = vutils.make_grid(
         image_tensor.data, nrow=display_image_num, padding=0, normalize=True
     )
     vutils.save_image(image_grid, file_name, nrow=1)
 
 
-def write_2images(image_outputs, display_image_num, image_directory, postfix, comet_exp=None):
+def write_2images(
+    image_outputs, display_image_num, image_directory, postfix, comet_exp=None
+):
     """Write images from both worlds a and b of the cycle  A-B-A as jpg
     Arguments:
         image_outputs {Tensor list} -- list of images, the first half being outputs in B,
@@ -859,7 +889,9 @@ def get_slerp_interp(nb_latents, nb_interp, z_dim):
         low = np.random.randn(z_dim)
         high = np.random.randn(z_dim)  # low + np.random.randn(512) * 0.7
         interp_vals = np.linspace(0, 1, num=nb_interp)
-        latent_interp = np.array([slerp(v, low, high) for v in interp_vals], dtype=np.float32)
+        latent_interp = np.array(
+            [slerp(v, low, high) for v in interp_vals], dtype=np.float32
+        )
         latent_interps = np.vstack((latent_interps, latent_interp))
 
     return latent_interps[:, :, np.newaxis, np.newaxis]
@@ -919,7 +951,10 @@ def __init__(self, num_classes=1000):
         # Load the pretrained weights, remove avg pool
         # layer and get the output stride of 8
         resnet34_8s = resnet34(
-            fully_conv=True, pretrained=True, output_stride=8, remove_avg_pool_layer=True,
+            fully_conv=True,
+            pretrained=True,
+            output_stride=8,
+            remove_avg_pool_layer=True,
         )
 
         # Randomly initialize the 1x1 Conv scoring layer
@@ -1070,7 +1105,9 @@ def get_scheduler(optimizer, hyperparameters, iterations=-1):
 def weights_init(init_type="gaussian"):
     def init_fun(m):
         classname = m.__class__.__name__
-        if (classname.find("Conv") == 0 or classname.find("Linear") == 0) and hasattr(m, "weight"):
+        if (classname.find("Conv") == 0 or classname.find("Linear") == 0) and hasattr(
+            m, "weight"
+        ):
             # print m.__class__.__name__
             if init_type == "gaussian":
                 init.normal_(m.weight.data, 0.0, 0.02)
@@ -1279,7 +1316,9 @@ def __init__(
         self.stride = stride
         self.downsample = downsample
         if stride != 1 or inplanes != planes:
-            self.downsample = nn.Sequential(conv1x1(inplanes, planes, stride), norm_layer(planes))
+            self.downsample = nn.Sequential(
+                conv1x1(inplanes, planes, stride), norm_layer(planes)
+            )
 
     def forward(self, x):
         identity = x
@@ -1408,12 +1447,11 @@ def p(d, prefix="", vals=[]):
     return dict(values_list)
 
 
-def sorted_nicely( l ): 
-    """ Sort the given iterable in the way that humans expect.""" 
-    convert = lambda text: int(text) if text.isdigit() else text 
-    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
-    return sorted(l, key = alphanum_key)
-
+def sorted_nicely(l):
+    """ Sort the given iterable in the way that humans expect."""
+    convert = lambda text: int(text) if text.isdigit() else text
+    alphanum_key = lambda key: [convert(c) for c in re.split("([0-9]+)", key)]
+    return sorted(l, key=alphanum_key)