diff --git a/configs/config_256.yaml b/configs/config_256.yaml deleted file mode 100644 index 04ec015..0000000 --- a/configs/config_256.yaml +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (C) 2018 NVIDIA Corporation. All rights reserved. -# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). - -# logger options -image_save_iter: 1000 # How often do you want to save output images during training -image_display_iter: 500 # How often do you want to display output images during training -display_size: 8 # How many images do you want to display each time -snapshot_save_iter: 5000 # How often do you want to save trained models -log_iter: 1 # How often do you want to log the training stats - -# optimization options -max_iter: 1000000 # maximum number of training iterations -batch_size: 1 # batch size -weight_decay: 0.0001 # weight decay -beta1: 0.5 # Adam parameter -beta2: 0.999 # Adam parameter -init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] -lr: 0.0001 # initial learning rate -lr_policy: step # learning rate scheduler -step_size: 100000 # how often to decay learning rate -gamma: 0.5 # how much to decay learning rate -gan_w: 3 # weight of adversarial loss -recon_x_w: 12 # weight of image reconstruction loss -recon_s_w: 1 # weight of style reconstruction loss -recon_c_w: 2 # weight of content reconstruction loss -recon_x_cyc_w: 12 # weight of explicit style augmented cycle consistency loss -vgg_w: 0 # weight of domain-invariant perceptual loss - -adaptation: - full_adaptation: 0 - ################## - output_classifier_lambda: 0 - output_adv_lambda: 0 - output_classif_freq: 1 - ################## - adv_lambda: 6 - dfeat_lambda: 1 - classif_frequency: 15 - ################## - sem_seg_lambda: 0 - -classifier_frequency: 100000 # Frequency to which we update the features classifier -semantic_w: 3 # weight of semantic conservation loss -recon_mask: 1 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region -domain_adv_w: 0 - -synthetic_frequency: -1 # frequency to which we show synthetic examples -1 if we don't want to -recon_synth_w: 0 # weight of image reconstruction loss on the pair -#classifier ckpt path: -class_ckpt_path: - -# Semantic segmentation ckpt path: -semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth - -# model options -gen_state: 1 # Default 0, 1 means using one common style encoder, 2 one autoencoder only -guided: 1 # Default 0 random style picked (multi modal), 1 means guided training - -# FID -batch_size_fid: 1 # batch_size to infer the model when computing fid -eval_fid: 0 # Default 0, 1 means we track FID during training - -# Path to the inception moment computed on the real dataset of 900 flooded images -inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz - - -gen: - dim: 64 # number of filters in the bottommost layer - mlp_dim: 256 # number of filters in MLP - style_dim: 16 # length of style code - activ: relu # activation function [relu/lrelu/prelu/selu/tanh] - n_downsample: 2 # number of downsampling layers in content encoder - n_res: 4 # number of residual blocks in content encoder/decoder - pad_type: reflect # padding type [zero/reflect] -dis: - dim: 64 # number of filters in the bottommost layer - norm: none # normalization layer [none/bn/in/ln] - activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] - n_layer: 4 # number of layers in D - gan_type: lsgan # GAN loss [lsgan/nsgan] - num_scales: 3 # number of scales - pad_type: reflect # padding type [zero/reflect] - -ratio_disc_gen: 5 # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator - -# data options -input_dim_a: 3 # number of image channels [1/3] -input_dim_b: 3 # number of image channels [1/3] -num_workers: 8 # number of data loading threads -new_size: 256 # first resize the shortest image side to this size -crop_image_height: 256 # random crop image of this height -crop_image_width: 256 # random crop image of this width - -data_folder_train_a: ./ -data_list_train_a: /network/tmp1/ccai/data/munit_dataset/trainA.txt -data_folder_test_a: ./ -data_list_test_a: /network/tmp1/ccai/data/munit_dataset/testA.txt -data_folder_train_b: ./ -data_list_train_b: /network/tmp1/ccai/data/munit_dataset/trainB.txt -data_folder_test_b: ./ -data_list_test_b: /network/tmp1/ccai/data/munit_dataset/testB.txt -data_list_train_a_seg: /network/tmp1/ccai/data/munit_dataset/trainA_seg.txt -data_list_train_b_seg: /network/tmp1/ccai/data/munit_dataset/trainB_seg.txt -data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt -data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt -data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask -seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt -seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt - -# list image for FID monitoring -data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt -data_list_fid_b: /network/tmp1/ccai/data/munit_dataset/trainB.txt diff --git a/configs/config_HD.yaml b/configs/config_HD.yaml deleted file mode 100644 index d298e0e..0000000 --- a/configs/config_HD.yaml +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (C) 2018 NVIDIA Corporation. All rights reserved. -# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). - -# logger options -image_save_iter: 10000000 # How often do you want to save output images during training -image_display_iter: 500 # How often do you want to display output images during training -display_size: 8 # How many images do you want to display each time -snapshot_save_iter: 5000 # How often do you want to save trained models -log_iter: 1 # How often do you want to log the training stats - -# optimization options -max_iter: 1000000 # maximum number of training iterations -batch_size: 1 # batch size -weight_decay: 0.0001 # weight decay -beta1: 0.5 # Adam parameter -beta2: 0.999 # Adam parameter -init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] -lr: 0.0001 # initial learning rate -lr_policy: step # learning rate scheduler -step_size: 100000 # how often to decay learning rate -gamma: 0.5 # how much to decay learning rate -gan_w: 3 # weight of adversarial loss -recon_x_w: 12 # weight of image reconstruction loss -recon_s_w: 1 # weight of style reconstruction loss -recon_c_w: 2 # weight of content reconstruction loss -recon_x_cyc_w: 12 # weight of explicit style augmented cycle consistency loss -vgg_w: 0 # weight of domain-invariant perceptual loss - -semantic_w: 3 # weight of semantic conservation loss -recon_mask: 1 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region -domain_adv_w: 0 - -synthetic_frequency: -1 # frequency to which we show synthetic examples -1 if we don't want to -recon_synth_w: 0 # weight of image reconstruction loss on the pair - -#classifier ckpt path: -class_ckpt_path: ./ - -# Semantic segmentation ckpt path: -semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth - -# model options -gen_state: 1 # Default 0, 1 means using one common style encoder, 2 one autoencoder only -guided: 1 # Default 0 random style picked (multi modal), 1 means guided training - -# FID -batch_size_fid: 0 # batch_size to infer the model when computing fid -eval_fid: 0 # Default 0, 1 means we track FID during training - -gen: - dim: 64 # number of filters in the bottommost layer - mlp_dim: 256 # number of filters in MLP - style_dim: 16 # length of style code - activ: relu # activation function [relu/lrelu/prelu/selu/tanh] - n_downsample: 2 # number of downsampling layers in content encoder - n_res: 4 # number of residual blocks in content encoder/decoder - pad_type: reflect # padding type [zero/reflect] -dis: - dim: 64 # number of filters in the bottommost layer - norm: none # normalization layer [none/bn/in/ln] - activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] - n_layer: 4 # number of layers in D - gan_type: lsgan # GAN loss [lsgan/nsgan] - num_scales: 3 # number of scales - pad_type: reflect # padding type [zero/reflect] - -ratio_disc_gen: 5 # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator - -# data options -input_dim_a: 3 # number of image channels [1/3] -input_dim_b: 3 # number of image channels [1/3] -num_workers: 8 # number of data loading threads -new_size: 600 # first resize the shortest image side to this size -crop_image_height: 512 # random crop image of this height -crop_image_width: 512 # random crop image of this width - -data_folder_train_a: ./ -data_list_train_a: /network/tmp1/ccai/data/munit_dataset/trainA.txt -data_folder_test_a: ./ -data_list_test_a: /network/tmp1/ccai/data/munit_dataset/testA.txt -data_folder_train_b: ./ -data_list_train_b: /network/tmp1/ccai/data/munit_dataset/trainB.txt -data_folder_test_b: ./ -data_list_test_b: /network/tmp1/ccai/data/munit_dataset/testB.txt -data_list_train_a_seg: /network/tmp1/ccai/data/munit_dataset/trainA_seg.txt -data_list_train_b_seg: /network/tmp1/ccai/data/munit_dataset/trainB_seg.txt -data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/trainA_synth.txt -data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/trainB_synth.txt -data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/trainB_seg_synth.txt diff --git a/configs/mask_conditioning.yaml b/configs/mask_conditioning.yaml deleted file mode 100644 index 5d243cb..0000000 --- a/configs/mask_conditioning.yaml +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (C) 2018 NVIDIA Corporation. All rights reserved. -# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). - -# logger options -image_save_iter: 1000 # How often do you want to save output images during training -image_display_iter: 500 # How often do you want to display output images during training -display_size: 8 # How many images do you want to display each time -snapshot_save_iter: 5000 # How often do you want to save trained models -log_iter: 1 # How often do you want to log the training stats - -# optimization options -max_iter: 1000000 # maximum number of training iterations -batch_size: 1 # batch size -weight_decay: 0.0001 # weight decay -beta1: 0.5 # Adam parameter -beta2: 0.999 # Adam parameter -init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] -lr: 0.0001 # initial learning rate -lr_policy: step # learning rate scheduler -step_size: 100000 # how often to decay learning rate -gamma: 0.5 # how much to decay learning rate -gan_w: 3 # weight of adversarial loss -recon_x_w: 12 # weight of image reconstruction loss -recon_c_w: 2 # weight of content reconstruction loss -recon_x_cyc_w: 12 # weight of explicit style augmented cycle consistency loss -vgg_w: 0 # weight of domain-invariant perceptual loss - -adaptation: - full_adaptation: 0 - ################## - output_classifier_lambda: 0 - output_adv_lambda: 0 - output_classif_freq: 1 - ################## - adv_lambda: 6 - dfeat_lambda: 1 - classif_frequency: 15 - ################## - sem_seg_lambda: 0 - -synthetic_seg_gt: 1 #Use synthetic ground truth as target for synthetic images -semantic_w: 4 # weight of semantic conservation loss -context_w: 4 #Weight of context preserving (L1) loss -recon_mask: 1 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region -domain_adv_w: 0 - -synthetic_frequency: -1 # frequency to which we show synthetic examples -1 if we don't want to -recon_synth_w: 0 # weight of image reconstruction loss on the pair -#classifier ckpt path: -class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth - -# Semantic segmentation ckpt path: -semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth - -# FID -batch_size_fid: 1 # batch_size to infer the model when computing fid -eval_fid: 0 # Default 0, 1 means we track FID during training - -# Path to the inception moment computed on the real dataset of 900 flooded images -inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz - - -gen: - dim: 64 # number of filters in the bottommost layer - mlp_dim: 256 # number of filters in MLP - activ: relu # activation function [relu/lrelu/prelu/selu/tanh] - n_downsample: 2 # number of downsampling layers in content encoder - n_res: 4 # number of residual blocks in content encoder/decoder - pad_type: reflect # padding type [zero/reflect] -dis: - dim: 64 # number of filters in the bottommost layer - norm: none # normalization layer [none/bn/in/ln] - activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] - n_layer: 4 # number of layers in D - gan_type: nsgan # GAN loss [lsgan/nsgan] - num_scales: 3 # number of scales - pad_type: reflect # padding type [zero/reflect] - -ratio_disc_gen: 2 # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator - -# data options -input_dim_a: 3 # number of image channels [1/3] -input_dim_b: 3 # number of image channels [1/3] -num_workers: 4 # number of data loading threads -new_size: 256 # first resize the shortest image side to this size -crop_image_height: 256 # random crop image of this height -crop_image_width: 256 # random crop image of this width - -#do we even need this? -data_folder_train_a: ./ -data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt -#and this? -data_folder_test_a: ./ -data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt -data_folder_train_b: ./ -data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt -data_folder_test_b: ./ -data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt - -data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt -data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt - -data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt -data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt - -data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt -data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt -data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask - -# list image for FID monitoring -data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt -data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt - -seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt -seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt - diff --git a/configs/mask_conditioning_HD.yaml b/configs/mask_conditioning_HD.yaml deleted file mode 100644 index c38f16e..0000000 --- a/configs/mask_conditioning_HD.yaml +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (C) 2018 NVIDIA Corporation. All rights reserved. -# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). - -# logger options -image_save_iter: 1000 # How often do you want to save output images during training -image_display_iter: 500 # How often do you want to display output images during training -display_size: 8 # How many images do you want to display each time -snapshot_save_iter: 5000 # How often do you want to save trained models -log_iter: 1 # How often do you want to log the training stats - -# optimization options -max_iter: 1000000 # maximum number of training iterations -batch_size: 1 # batch size -weight_decay: 0.0001 # weight decay -beta1: 0.5 # Adam parameter -beta2: 0.999 # Adam parameter -init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] -lr: 0.0001 # initial learning rate -lr_policy: step # learning rate scheduler -step_size: 100000 # how often to decay learning rate -gamma: 0.5 # how much to decay learning rate -gan_w: 3 # weight of adversarial loss -recon_x_w: 12 # weight of image reconstruction loss -recon_c_w: 0 # weight of content reconstruction loss -recon_x_cyc_w: 12 # weight of explicit style augmented cycle consistency loss -vgg_w: 0 # weight of domain-invariant perceptual loss - -adaptation: - full_adaptation: 0 - ################## - output_classifier_lambda: 0 - output_adv_lambda: 0 - output_classif_freq: 1 - ################## - adv_lambda: 0 - dfeat_lambda: 0 - classif_frequency: 0 - ################## - sem_seg_lambda: 0 - -synthetic_seg_gt: 1 #Use synthetic ground truth as target for synthetic images -semantic_w: 0 # weight of semantic conservation loss -context_w: 10 #Weight of context preserving (L1) loss -recon_mask: 1 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region -domain_adv_w: 0 - -synthetic_frequency: -1 # frequency to which we show synthetic examples -1 if we don't want to -recon_synth_w: 0 # weight of image reconstruction loss on the pair -#classifier ckpt path: -class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth - -# Semantic segmentation ckpt path: -semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth - -# FID -batch_size_fid: 1 # batch_size to infer the model when computing fid -eval_fid: 0 # Default 0, 1 means we track FID during training - -# Path to the inception moment computed on the real dataset of 900 flooded images -inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz - - -gen: - dim: 32 # number of filters in the bottommost layer - mlp_dim: 128 # number of filters in MLP - activ: relu # activation function [relu/lrelu/prelu/selu/tanh] - n_downsample: 2 # number of downsampling layers in content encoder - n_res: 4 # number of residual blocks in content encoder/decoder - pad_type: reflect # padding type [zero/reflect] -dis: - dim: 32 # number of filters in the bottommost layer - norm: none # normalization layer [none/bn/in/ln] - activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] - n_layer: 4 # number of layers in D - gan_type: nsgan # GAN loss [lsgan/nsgan] - num_scales: 3 # number of scales - pad_type: reflect # padding type [zero/reflect] - -ratio_disc_gen: 1 # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator - -# data options -input_dim_a: 3 # number of image channels [1/3] -input_dim_b: 3 # number of image channels [1/3] -num_workers: 4 # number of data loading threads -new_size: 400 # first resize the shortest image side to this size -crop_image_height: 400 # random crop image of this height -crop_image_width: 400 # random crop image of this width - -#do we even need this? -data_folder_train_a: ./ -data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt -#and this? -data_folder_test_a: ./ -data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt -data_folder_train_b: ./ -data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt -data_folder_test_b: ./ -data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt - -data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt -data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt - -data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt -data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt - -data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt -data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt -data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask - -# list image for FID monitoring -data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt -data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt - -seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt -seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt - diff --git a/configs/mask_conditioning_HD_lsgan.yaml b/configs/mask_conditioning_HD_lsgan.yaml deleted file mode 100644 index 6f11cd5..0000000 --- a/configs/mask_conditioning_HD_lsgan.yaml +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (C) 2018 NVIDIA Corporation. All rights reserved. -# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). - -# logger options -image_save_iter: 1000 # How often do you want to save output images during training -image_display_iter: 500 # How often do you want to display output images during training -display_size: 8 # How many images do you want to display each time -snapshot_save_iter: 5000 # How often do you want to save trained models -log_iter: 1 # How often do you want to log the training stats - -# optimization options -max_iter: 1000000 # maximum number of training iterations -batch_size: 1 # batch size -weight_decay: 0.0001 # weight decay -beta1: 0.5 # Adam parameter -beta2: 0.999 # Adam parameter -init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] -lr: 0.0001 # initial learning rate -lr_policy: step # learning rate scheduler -step_size: 100000 # how often to decay learning rate -gamma: 0.5 # how much to decay learning rate -gan_w: 3 # weight of adversarial loss -recon_x_w: 12 # weight of image reconstruction loss -recon_c_w: 0 # weight of content reconstruction loss -recon_x_cyc_w: 12 # weight of explicit style augmented cycle consistency loss -vgg_w: 0 # weight of domain-invariant perceptual loss - -adaptation: - full_adaptation: 0 - ################## - output_classifier_lambda: 0 - output_adv_lambda: 0 - output_classif_freq: 1 - ################## - adv_lambda: 0 - dfeat_lambda: 0 - classif_frequency: 0 - ################## - sem_seg_lambda: 0 - -synthetic_seg_gt: 1 #Use synthetic ground truth as target for synthetic images -semantic_w: 0 # weight of semantic conservation loss -context_w: 10 #Weight of context preserving (L1) loss -recon_mask: 1 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region -domain_adv_w: 0 - -synthetic_frequency: -1 # frequency to which we show synthetic examples -1 if we don't want to -recon_synth_w: 0 # weight of image reconstruction loss on the pair -#classifier ckpt path: -class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth - -# Semantic segmentation ckpt path: -semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth - -# FID -batch_size_fid: 1 # batch_size to infer the model when computing fid -eval_fid: 0 # Default 0, 1 means we track FID during training - -# Path to the inception moment computed on the real dataset of 900 flooded images -inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz - - -gen: - dim: 32 # number of filters in the bottommost layer - mlp_dim: 128 # number of filters in MLP - activ: relu # activation function [relu/lrelu/prelu/selu/tanh] - n_downsample: 2 # number of downsampling layers in content encoder - n_res: 4 # number of residual blocks in content encoder/decoder - pad_type: reflect # padding type [zero/reflect] -dis: - dim: 32 # number of filters in the bottommost layer - norm: none # normalization layer [none/bn/in/ln] - activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] - n_layer: 4 # number of layers in D - gan_type: lsgan # GAN loss [lsgan/nsgan] - num_scales: 3 # number of scales - pad_type: reflect # padding type [zero/reflect] - -ratio_disc_gen: 1 # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator - -# data options -input_dim_a: 3 # number of image channels [1/3] -input_dim_b: 3 # number of image channels [1/3] -num_workers: 4 # number of data loading threads -new_size: 400 # first resize the shortest image side to this size -crop_image_height: 400 # random crop image of this height -crop_image_width: 400 # random crop image of this width - -#do we even need this? -data_folder_train_a: ./ -data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt -#and this? -data_folder_test_a: ./ -data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt -data_folder_train_b: ./ -data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt -data_folder_test_b: ./ -data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt - -data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt -data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt - -data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt -data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt - -data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt -data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt -data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask - -# list image for FID monitoring -data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt -data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt - -seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt -seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt - diff --git a/configs/mask_conditioning_HD_sim.yaml b/configs/mask_conditioning_HD_sim.yaml deleted file mode 100644 index f962db3..0000000 --- a/configs/mask_conditioning_HD_sim.yaml +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (C) 2018 NVIDIA Corporation. All rights reserved. -# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). - -# logger options -image_save_iter: 10 # How often do you want to save output images during training -image_display_iter: 500 # How often do you want to display output images during training -display_size: 8 # How many images do you want to display each time -snapshot_save_iter: 5000 # How often do you want to save trained models -log_iter: 1 # How often do you want to log the training stats - -# optimization options -max_iter: 1000000 # maximum number of training iterations -batch_size: 1 # batch size -weight_decay: 0.0001 # weight decay -beta1: 0.5 # Adam parameter -beta2: 0.999 # Adam parameter -init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] -lr: 0.0001 # initial learning rate -lr_policy: step # learning rate scheduler -step_size: 100000 # how often to decay learning rate -gamma: 0.5 # how much to decay learning rate -gan_w: 3 # weight of adversarial loss -recon_x_w: 12 # weight of image reconstruction loss -recon_c_w: 0 # weight of content reconstruction loss -recon_x_cyc_w: 12 # weight of explicit style augmented cycle consistency loss -vgg_w: 0 # weight of domain-invariant perceptual loss - -adaptation: - full_adaptation: 0 - ################## - output_classifier_lambda: 0 - output_adv_lambda: 0 - output_classif_freq: 1 - ################## - adv_lambda: 6 - dfeat_lambda: 1 - classif_frequency: 15 - ################## - sem_seg_lambda: 0 - -synthetic_seg_gt: 1 #Use synthetic ground truth as target for synthetic images -semantic_w: 0 # weight of semantic conservation loss -context_w: 10 #Weight of context preserving (L1) loss -recon_mask: 1 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region -domain_adv_w: 0 - -synthetic_frequency: 1 # frequency to which we show synthetic examples -1 if we don't want to -recon_synth_w: 0 # weight of image reconstruction loss on the pair -#classifier ckpt path: -class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth - -# Semantic segmentation ckpt path: -semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth - -# FID -batch_size_fid: 1 # batch_size to infer the model when computing fid -eval_fid: 0 # Default 0, 1 means we track FID during training - -# Path to the inception moment computed on the real dataset of 900 flooded images -inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz - - -gen: - dim: 32 # number of filters in the bottommost layer - mlp_dim: 256 # number of filters in MLP - activ: relu # activation function [relu/lrelu/prelu/selu/tanh] - n_downsample: 2 # number of downsampling layers in content encoder - n_res: 4 # number of residual blocks in content encoder/decoder - pad_type: reflect # padding type [zero/reflect] -dis: - dim: 32 # number of filters in the bottommost layer - norm: none # normalization layer [none/bn/in/ln] - activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] - n_layer: 4 # number of layers in D - gan_type: nsgan # GAN loss [lsgan/nsgan] - num_scales: 3 # number of scales - pad_type: reflect # padding type [zero/reflect] - -ratio_disc_gen: 1 # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator - -# data options -input_dim_a: 3 # number of image channels [1/3] -input_dim_b: 3 # number of image channels [1/3] -num_workers: 4 # number of data loading threads -new_size: 400 # first resize the shortest image side to this size -crop_image_height: 400 # random crop image of this height -crop_image_width: 400 # random crop image of this width - -#do we even need this? -data_folder_train_a: ./ -data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt -#and this? -data_folder_test_a: ./ -data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt -data_folder_train_b: ./ -data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt -data_folder_test_b: ./ -data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt - -data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt -data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt - -data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt -data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt - -data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt -data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt -data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask - -# list image for FID monitoring -data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt -data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt - -seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt -seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt - diff --git a/configs/mask_conditioning_HD_sim_lsgan.yaml b/configs/mask_conditioning_HD_sim_lsgan.yaml deleted file mode 100644 index 20cca01..0000000 --- a/configs/mask_conditioning_HD_sim_lsgan.yaml +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (C) 2018 NVIDIA Corporation. All rights reserved. -# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). - -# logger options -image_save_iter: 1000 # How often do you want to save output images during training -image_display_iter: 500 # How often do you want to display output images during training -display_size: 8 # How many images do you want to display each time -snapshot_save_iter: 5000 # How often do you want to save trained models -log_iter: 1 # How often do you want to log the training stats - -# optimization options -max_iter: 1000000 # maximum number of training iterations -batch_size: 1 # batch size -weight_decay: 0.0001 # weight decay -beta1: 0.5 # Adam parameter -beta2: 0.999 # Adam parameter -init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] -lr: 0.0001 # initial learning rate -lr_policy: step # learning rate scheduler -step_size: 100000 # how often to decay learning rate -gamma: 0.5 # how much to decay learning rate -gan_w: 3 # weight of adversarial loss -recon_x_w: 12 # weight of image reconstruction loss -recon_c_w: 0 # weight of content reconstruction loss -recon_x_cyc_w: 12 # weight of explicit style augmented cycle consistency loss -vgg_w: 0 # weight of domain-invariant perceptual loss - -adaptation: - full_adaptation: 0 - ################## - output_classifier_lambda: 0 - output_adv_lambda: 0 - output_classif_freq: 1 - ################## - adv_lambda: 6 - dfeat_lambda: 1 - classif_frequency: 15 - ################## - sem_seg_lambda: 0 - -synthetic_seg_gt: 1 #Use synthetic ground truth as target for synthetic images -semantic_w: 0 # weight of semantic conservation loss -context_w: 12 #Weight of context preserving (L1) loss -recon_mask: 1 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region -domain_adv_w: 0 - -synthetic_frequency: 1 # frequency to which we show synthetic examples -1 if we don't want to -recon_synth_w: 0 # weight of image reconstruction loss on the pair -#classifier ckpt path: -class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth - -# Semantic segmentation ckpt path: -semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth - -# FID -batch_size_fid: 1 # batch_size to infer the model when computing fid -eval_fid: 0 # Default 0, 1 means we track FID during training - -# Path to the inception moment computed on the real dataset of 900 flooded images -inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz - - -gen: - dim: 32 # number of filters in the bottommost layer - mlp_dim: 128 # number of filters in MLP - activ: relu # activation function [relu/lrelu/prelu/selu/tanh] - n_downsample: 2 # number of downsampling layers in content encoder - n_res: 4 # number of residual blocks in content encoder/decoder - pad_type: reflect # padding type [zero/reflect] -dis: - dim: 32 # number of filters in the bottommost layer - norm: none # normalization layer [none/bn/in/ln] - activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] - n_layer: 4 # number of layers in D - gan_type: lsgan # GAN loss [lsgan/nsgan] - num_scales: 3 # number of scales - pad_type: reflect # padding type [zero/reflect] - -ratio_disc_gen: 1 # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator - -# data options -input_dim_a: 3 # number of image channels [1/3] -input_dim_b: 3 # number of image channels [1/3] -num_workers: 4 # number of data loading threads -new_size: 400 # first resize the shortest image side to this size -crop_image_height: 400 # random crop image of this height -crop_image_width: 400 # random crop image of this width - -#do we even need this? -data_folder_train_a: ./ -data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt -#and this? -data_folder_test_a: ./ -data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt -data_folder_train_b: ./ -data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt -data_folder_test_b: ./ -data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt - -data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt -data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt - -data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt -data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt - -data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt -data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt -data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask - -# list image for FID monitoring -data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt -data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt - -seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt -seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt - diff --git a/configs/notebook.sh b/configs/notebook.sh deleted file mode 100644 index bb33c81..0000000 --- a/configs/notebook.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -#SBATCH --qos=high -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:P6000 -#SBATCH --mem=24GB -#SBATCH --gres=gpu:titanx:1 -#SBATCH --time=48:00:00 -#SBATCH -o /network/juravera/slurmlogs/slurm-%j.out -#SBATCH --job-name jupyter-notebook -#SBATCH --output ./slurnlogs/jupyter-notebook-%J.log -#SBATCH -x kepler5 -# get tunneling info -XDG_RUNTIME_DIR="" -port=$(shuf -i8000-9999 -n1) -node=$(hostname -s) -user=$(whoami) -cluster=$(hostname -f | awk -F"." '{print $2}') -# print tunneling instructions jupyter-log -echo -e " -MacOS or linux terminal command to create your ssh tunnel: -ssh -N -L ${port}:${node}:${port} ${user}@login-1.server.mila.quebec -p 8001 -Forwarded port:same as remote port -Remote server: ${node} -Remote port: ${port} -SSH cluster: ${cluster}.login-1.server.mila.quebec -SSH login: $user -SSH port: $port -Use a Browser on your local machine to go to: -localhost:${port} (prefix w/ https:// if using password) -" -# load modules or conda environments here -module load miniconda/3 -module load cuda-10.0/cudnn/7.5 -source $CONDA_ACTIVATE -conda activate simtoreal -# DON'T USE ADDRESS BELOW. -# DO USE TOKEN BELOW -python -m notebook --no-browser --port=${port} --ip=${node} -python -m notebook list diff --git a/configs/patchgan.yaml b/configs/patchgan.yaml deleted file mode 100644 index f1de99a..0000000 --- a/configs/patchgan.yaml +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (C) 2018 NVIDIA Corporation. All rights reserved. -# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). - -# logger options -image_save_iter: 1000 # How often do you want to save output images during training -image_display_iter: 500 # How often do you want to display output images during training -display_size: 8 # How many images do you want to display each time -snapshot_save_iter: 5000 # How often do you want to save trained models -log_iter: 1 # How often do you want to log the training stats - -# optimization options -max_iter: 1000000 # maximum number of training iterations -batch_size: 1 # batch size -weight_decay: 0.0001 # weight decay -beta1: 0.5 # Adam parameter -beta2: 0.999 # Adam parameter -init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] -lr: 0.0001 # initial learning rate -lr_policy: step # learning rate scheduler -step_size: 100000 # how often to decay learning rate -gamma: 0.5 # how much to decay learning rate -gan_w: 3 # weight of adversarial loss -recon_x_w: 12 # weight of image reconstruction loss -recon_c_w: 0 # weight of content reconstruction loss -recon_x_cyc_w: 12 # weight of explicit style augmented cycle consistency loss -vgg_w: 0 # weight of domain-invariant perceptual loss - -adaptation: - full_adaptation: 0 - ################## - output_classifier_lambda: 0 - output_adv_lambda: 0 - output_classif_freq: 1 - ################## - adv_lambda: 0 - dfeat_lambda: 0 - classif_frequency: 0 - ################## - sem_seg_lambda: 0 - -synthetic_seg_gt: 1 #Use synthetic ground truth as target for synthetic images -semantic_w: 0 # weight of semantic conservation loss -context_w: 12 #Weight of context preserving (L1) loss -recon_mask: 1 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region -domain_adv_w: 0 - -synthetic_frequency: 1 # frequency to which we show synthetic examples -1 if we don't want to -recon_synth_w: 0 # weight of image reconstruction loss on the pair -#classifier ckpt path: -class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth - -# Semantic segmentation ckpt path: -semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth - -# FID -batch_size_fid: 1 # batch_size to infer the model when computing fid -eval_fid: 0 # Default 0, 1 means we track FID during training - -# Path to the inception moment computed on the real dataset of 900 flooded images -inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz - - -gen: - dim: 32 # number of filters in the bottommost layer - mlp_dim: 128 # number of filters in MLP - activ: relu # activation function [relu/lrelu/prelu/selu/tanh] - n_downsample: 2 # number of downsampling layers in content encoder - n_res: 4 # number of residual blocks in content encoder/decoder - pad_type: reflect # padding type [zero/reflect] -dis: - dim: 32 # number of filters in the bottommost layer - norm: batch # normalization layer [none/bn/in/ln] - activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] - n_layer: 4 # number of layers in D - gan_type: lsgan # GAN loss [lsgan/nsgan] - num_scales: 3 # number of scales - pad_type: reflect # padding type [zero/reflect] - getIntermFeat: true - num_D: 3 - lambda_feat: 10 - use_sigmoid: False - - -ratio_disc_gen: 1 # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator - -# data options -input_dim_a: 3 # number of image channels [1/3] -input_dim_b: 3 # number of image channels [1/3] -num_workers: 4 # number of data loading threads -new_size: 256 # first resize the shortest image side to this size -crop_image_height: 256 # random crop image of this height -crop_image_width: 256 # random crop image of this width - -#do we even need this? -data_folder_train_a: ./ -data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt -#and this? -data_folder_test_a: ./ -data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt -data_folder_train_b: ./ -data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt -data_folder_test_b: ./ -data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt - -data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt -data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt - -data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt -data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt - -data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt -data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt -data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask - -# list image for FID monitoring -data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt -data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt - -seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt -seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt - diff --git a/configs/patchgan_perceptual.yaml b/configs/patchgan_perceptual.yaml deleted file mode 100644 index 1fc47f8..0000000 --- a/configs/patchgan_perceptual.yaml +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (C) 2018 NVIDIA Corporation. All rights reserved. -# Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). - -# logger options -image_save_iter: 1000 # How often do you want to save output images during training -image_display_iter: 500 # How often do you want to display output images during training -display_size: 8 # How many images do you want to display each time -snapshot_save_iter: 5000 # How often do you want to save trained models -log_iter: 1 # How often do you want to log the training stats - -# optimization options -max_iter: 1000000 # maximum number of training iterations -batch_size: 1 # batch size -weight_decay: 0.0001 # weight decay -beta1: 0.5 # Adam parameter -beta2: 0.999 # Adam parameter -init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] -lr: 0.0001 # initial learning rate -lr_policy: step # learning rate scheduler -step_size: 100000 # how often to decay learning rate -gamma: 0.5 # how much to decay learning rate -gan_w: 3 # weight of adversarial loss -recon_x_w: 12 # weight of image reconstruction loss -recon_c_w: 0 # weight of content reconstruction loss -recon_x_cyc_w: 12 # weight of explicit style augmented cycle consistency loss -vgg_w: 3 # weight of domain-invariant perceptual loss - -adaptation: - full_adaptation: 0 - ################## - output_classifier_lambda: 0 - output_adv_lambda: 0 - output_classif_freq: 1 - ################## - adv_lambda: 0 - dfeat_lambda: 0 - classif_frequency: 0 - ################## - sem_seg_lambda: 0 - -synthetic_seg_gt: 1 #Use synthetic ground truth as target for synthetic images -semantic_w: 0 # weight of semantic conservation loss -context_w: 12 #Weight of context preserving (L1) loss -recon_mask: 1 # default 0 do not touch recon loss, 1 do not compute cycle consistency loss on masked region -domain_adv_w: 0 - -synthetic_frequency: 1 # frequency to which we show synthetic examples -1 if we don't want to -recon_synth_w: 0 # weight of image reconstruction loss on the pair -#classifier ckpt path: -class_ckpt_path: /network/home/cosnegau/ckpt_small/resnet-18-epoch24.pth - -# Semantic segmentation ckpt path: -semantic_ckpt_path: /network/tmp1/ccai/checkpoints/roadSegmentation/resnet_34_8s_cityscapes_best.pth - -# FID -batch_size_fid: 1 # batch_size to infer the model when computing fid -eval_fid: 0 # Default 0, 1 means we track FID during training - -# Path to the inception moment computed on the real dataset of 900 flooded images -inception_moment_path: /network/tmp1/ccai/data/munit_dataset/inception_moments.npz - - -gen: - dim: 32 # number of filters in the bottommost layer - mlp_dim: 128 # number of filters in MLP - activ: relu # activation function [relu/lrelu/prelu/selu/tanh] - n_downsample: 2 # number of downsampling layers in content encoder - n_res: 4 # number of residual blocks in content encoder/decoder - pad_type: reflect # padding type [zero/reflect] -dis: - dim: 32 # number of filters in the bottommost layer - norm: batch # normalization layer [none/bn/in/ln] - activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] - n_layer: 4 # number of layers in D - gan_type: lsgan # GAN loss [lsgan/nsgan] - num_scales: 3 # number of scales - pad_type: reflect # padding type [zero/reflect] - getIntermFeat: true - num_D: 3 - lambda_feat: 10 - use_sigmoid: False - - -ratio_disc_gen: 1 # ratio training discriminator vs generator 5 means 5 update of the discriminator for one of the generator - -# data options -input_dim_a: 3 # number of image channels [1/3] -input_dim_b: 3 # number of image channels [1/3] -num_workers: 4 # number of data loading threads -new_size: 256 # first resize the shortest image side to this size -crop_image_height: 256 # random crop image of this height -crop_image_width: 256 # random crop image of this width - -#do we even need this? -data_folder_train_a: ./ -data_list_train_a: /network/tmp1/ccai/MUNITfilelists/trainA.txt -#and this? -data_folder_test_a: ./ -data_list_test_a: /network/tmp1/ccai/MUNITfilelists/testA.txt -data_folder_train_b: ./ -data_list_train_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt -data_folder_test_b: ./ -data_list_test_b: /network/tmp1/ccai/MUNITfilelists/testB.txt - -data_list_train_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainA.txt -data_list_train_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_trainB.txt - -data_list_test_a_seg: /network/tmp1/ccai/MUNITfilelists/seg_testA.txt -data_list_test_b_seg: /network/tmp1/ccai/MUNITfilelists/seg_testB.txt - -data_list_train_a_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/normal.txt -data_list_train_b_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/flood.txt -data_list_train_b_seg_synth: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/mask.txt #binary mask - -# list image for FID monitoring -data_list_fid_a: /network/tmp1/ccai/data/munit_dataset/trainA_fid.txt -data_list_fid_b: /network/tmp1/ccai/MUNITfilelists/trainB.txt - -seg_list_a: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg.txt -seg_list_b: /network/tmp1/ccai/data/munit_dataset/simdata/Unity1000R/txt_files/seg_flood.txt - diff --git a/scripts/data.py b/scripts/data.py index e50ac90..955f865 100644 --- a/scripts/data.py +++ b/scripts/data.py @@ -4,6 +4,7 @@ """ import torch.utils.data as data import os.path +from pathlib import Path def default_loader(path): @@ -92,22 +93,13 @@ def __len__(self): import os import os.path -IMG_EXTENSIONS = [ - ".jpg", - ".JPG", - ".jpeg", - ".JPEG", - ".png", - ".PNG", - ".ppm", - ".PPM", - ".bmp", - ".BMP", -] +IMG_EXTENSIONS = set( + [".jpg", ".JPG", ".jpeg", ".JPEG", ".png", ".PNG", ".ppm", ".PPM", ".bmp", ".BMP"] +) def is_image_file(filename): - return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) + return Path(filename).suffix in IMG_EXTENSIONS def make_dataset(dir): diff --git a/scripts/networks.py b/scripts/networks.py index c4d6991..1c26873 100644 --- a/scripts/networks.py +++ b/scripts/networks.py @@ -1085,6 +1085,9 @@ def __init__( spade_kernel_size, ) + self.nospade_1 = ResBlock(self.z_nc) + + """ self.up_spades = nn.Sequential( *[ SPADEResnetBlock( @@ -1098,9 +1101,11 @@ def __init__( for i in range(spade_n_up - 2) ] ) + """ + self.up_spades = nn.Sequential(*[ResBlock(self.z_nc) for i in range(spade_n_up - 2)]) - self.final_nc = self.z_nc // 2 ** (spade_n_up - 2) - + # self.final_nc = self.z_nc // 2 ** (spade_n_up - 2) + self.final_nc = self.z_nc self.conv_img = nn.Conv2d(self.final_nc, 3, 3, padding=1) self.upsample = nn.Upsample(scale_factor=2) @@ -1122,11 +1127,18 @@ def forward(self, z, cond): y = self.upsample(y) y = self.G_middle_0(y, cond) y = self.upsample(y) - y = self.G_middle_1(y, cond) + # y = self.G_middle_1(y, cond) + y = self.nospade_1(y) + """ for i, up in enumerate(self.up_spades): y = self.upsample(y) y = up(y, cond) + """ + + for i, up in enumerate(self.up_spades): + y = self.upsample(y) + y = up(y) y = self.conv_img(F.leaky_relu(y, 2e-1)) y = torch.tanh(y) @@ -1201,4 +1213,3 @@ def decode(self, content, mask, encoder_name): print("wrong value for encoder_name, must be 0 or 1") return None return images - diff --git a/scripts/test.py b/scripts/test.py index a38afac..432ad99 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -3,40 +3,51 @@ Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). """ from __future__ import print_function -from utils import get_config, pytorch03_to_pytorch04, sorted_nicely +from utils import get_config, sorted_nicely from trainer import MUNIT_Trainer import argparse from torch.autograd import Variable import torchvision.utils as vutils -import sys import torch import os from torchvision import transforms from PIL import Image import tqdm as tq -import glob +import numpy as np +from pathlib import Path +from data import is_image_file +from datetime import datetime # Parse arguments parser = argparse.ArgumentParser() parser.add_argument("--config", type=str, help="network configuration file") parser.add_argument("--input", type=str, help="directory of input images") -parser.add_argument("--mask_dir", type=str, help="directory of masks corresponding to input images") +parser.add_argument( + "--mask_dir", type=str, help="directory of masks corresponding to input images" +) parser.add_argument("--output_folder", type=str, help="output image directory") parser.add_argument("--checkpoint", type=str, help="checkpoint of generator") parser.add_argument("--seed", type=int, default=10, help="random seed") parser.add_argument( - "--synchronized", action="store_true", help="whether use synchronized style code or not", + "--synchronized", + action="store_true", + help="whether use synchronized style code or not", ) parser.add_argument( - "--save_input", action="store_true", help="whether use synchronized style code or not", + "--save_input", + action="store_true", + help="whether use synchronized style code or not", ) parser.add_argument( - "--output_path", type=str, default=".", help="path for logs, checkpoints, and VGG model weight", + "--output_path", + type=str, + default=".", + help="path for logs, checkpoints, and VGG model weight", ) parser.add_argument( - "--save_mask", action="store_true", help="whether to save mask or not", + "--save_mask", action="store_true", help="whether to save mask or not" ) opts = parser.parse_args() @@ -58,12 +69,10 @@ trainer = MUNIT_Trainer(config) -# Load the model (here we currently only load the latest model architecture: one single style) -try: - state_dict = torch.load(opts.checkpoint) - trainer.gen.load_state_dict(state_dict["2"]) -except: - sys.exit("Cannot load the checkpoints") +# Load the model +# (here we currently only load the latest model architecture: one single style) +state_dict = torch.load(opts.checkpoint) +trainer.gen.load_state_dict(state_dict["2"]) # Send the trainer to cuda trainer.cuda() @@ -73,23 +82,31 @@ new_size = config["new_size"] # Define the list of non-flooded images -list_non_flooded = glob.glob(opts.input + "*") +list_non_flooded = [ + str(im) for im in Path(opts.input).resolve().glob("*") if is_image_file(im) +] list_non_flooded = sorted_nicely(list_non_flooded) # Define list of masks: -list_masks = glob.glob(opts.mask_dir + "*") +list_masks = [ + str(im) for im in Path(opts.mask_dir).resolve().glob("*") if is_image_file(im) +] list_masks = sorted_nicely(list_masks) -if len(list_non_flooded) != len(list_masks): - sys.exit("Image list and mask list differ in length") +assert len(list_non_flooded) == len( + list_masks +), "Image list and mask list differ in length" # Assert there are some elements inside -if len(list_non_flooded) == 0: - sys.exit("Image list is empty. Please ensure opts.input ends with a /") +assert list_non_flooded, "Image list is empty" + +output_folder = Path(opts.output_folder).resolve() +output_folder.mkdir(parents=True, exist_ok=True) +run_id = str(datetime.now())[:19].replace(" ", "_") # Inference with torch.no_grad(): @@ -102,11 +119,14 @@ ] ) - mask_transform = transforms.Compose([transforms.Resize((new_size, new_size)), transforms.ToTensor(),]) - + mask_transform = transforms.Compose( + [transforms.Resize((new_size, new_size)), transforms.ToTensor()] + ) for j in tq.tqdm(range(len(list_non_flooded))): + file_id = f"{run_id}-{j}" + # Define image path path_xa = list_non_flooded[j] @@ -122,20 +142,23 @@ mask = mask[0].unsqueeze(0).unsqueeze(0) # Load and transform the non_flooded image - x_a = Variable(transform(Image.open(path_xa).convert("RGB")).unsqueeze(0).cuda()) + x_a = Variable( + transform(Image.open(path_xa).convert("RGB")).unsqueeze(0).cuda() + ) if opts.save_input: inputs = (x_a + 1) / 2.0 - path = os.path.join(opts.output_folder, "{:03d}input.jpg".format(j)) - vutils.save_image(inputs.data, path, padding=0, normalize=True) + path = output_folder / "{}-input.jpg".format(file_id) + vutils.save_image(inputs.data, str(path), padding=0, normalize=True) if opts.save_mask: - path = os.path.join(opts.output_folder, "{:03d}mask.jpg".format(j)) - #overlay mask onto image + path = output_folder / "{}-mask.jpg".format(file_id) + # overlay mask onto image save_m_a = x_a - (x_a * mask.repeat(1, 3, 1, 1)) + mask.repeat(1, 3, 1, 1) - vutils.save_image(save_m_a, path, padding=0, normalize=True) + vutils.save_image(save_m_a, str(path), padding=0, normalize=True) # Extract content and style - c_a = trainer.gen.encode(x_a, 1) + x_a_augment = torch.cat([x_a, mask], dim=1) + c_a = trainer.gen.encode(x_a_augment, 1) # Perform cross domain translation x_ab = trainer.gen.decode(c_a, mask, 2) @@ -144,7 +167,7 @@ outputs = (x_ab + 1) / 2.0 # Define output path - path = os.path.join(opts.output_folder, "{:03d}output.jpg".format(j)) + path = output_folder / "{}-output.jpg".format(file_id) # Save image - vutils.save_image(outputs.data, path, padding=0, normalize=True) + vutils.save_image(outputs.data, str(path), padding=0, normalize=True) diff --git a/scripts/train.py b/scripts/train.py index d8fdc75..bbe91be 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -40,7 +40,7 @@ "--config", type=str, default="configs/config256.yaml", help="Path to the config file.", ) parser.add_argument( - "--output_path", type=str, default=".", help="outputs path" + "--output_path", type=str, default="/network/tmp1/ccai/checkpoints/sun", help="outputs path" ) parser.add_argument("--resume", action="store_true") parser.add_argument("--trainer", type=str, default="MUNIT", help="MUNIT|UNIT") @@ -397,4 +397,3 @@ iterations += 1 if iterations >= max_iter: sys.exit("Finish training") - diff --git a/scripts/trainer.py b/scripts/trainer.py index 21f2bbd..b598090 100644 --- a/scripts/trainer.py +++ b/scripts/trainer.py @@ -25,6 +25,7 @@ import torch.nn as nn import os from PIL import Image +from torchvision.utils import save_image class MUNIT_Trainer(nn.Module): @@ -60,7 +61,7 @@ def __init__(self, hyperparameters): else: self.use_output_classifier_sr = False - self.gen = SpadeGen(hyperparameters["input_dim_a"], hyperparameters["gen"]) + self.gen = SpadeGen(hyperparameters["input_dim_a"] + 1, hyperparameters["gen"]) # Note: the "+1" is for the masks if hyperparameters["dis"]["type"] == "patchgan": @@ -138,13 +139,11 @@ def __init__(self, hyperparameters): self.criterionVGG = VGGLoss() # Load semantic segmentation model if needed - if "semantic_w" in hyperparameters.keys() and hyperparameters["semantic_w"] > 0: - self.segmentation_model = load_segmentation_model( - hyperparameters["semantic_ckpt_path"], 19 - ) - self.segmentation_model.eval() - for param in self.segmentation_model.parameters(): - param.requires_grad = False + # if "semantic_w" in hyperparameters.keys() and hyperparameters["semantic_w"] > 0: + self.segmentation_model = load_segmentation_model(hyperparameters["semantic_ckpt_path"], 19) + self.segmentation_model.eval() + for param in self.segmentation_model.parameters(): + param.requires_grad = False # Load domain classifier if needed if "domain_adv_w" in hyperparameters.keys() and hyperparameters["domain_adv_w"] > 0: @@ -268,14 +267,20 @@ def forward(self, x_a, x_b, m_a, m_b): torch.Tensor, torch.Tensor -- Translated version of x_a in domain B, Translated version of x_b in domain A """ self.eval() + m_a_seg = self.merge_seg_with_mask(x_a, m_a) + m_b_seg = self.merge_seg_with_mask(x_b, m_b) + + avg_mask_a = self.average_mask(x_a, m_a) + avg_mask_b = self.average_mask(x_b, m_b) + x_a_augment = torch.cat([x_a, m_a], dim=1) x_b_augment = torch.cat([x_b, m_b], dim=1) - c_a = self.gen.encode(x_a, 1) - c_b = self.gen.encode(x_b, 2) + c_a = self.gen.encode(x_a_augment, 1) + c_b = self.gen.encode(x_b_augment, 2) - x_ba = self.gen.decode(c_b, 1) - x_ab = self.gen.decode(c_a, 2) + x_ba = self.gen.decode(c_b, m_b_seg, 1) + x_ab = self.gen.decode(c_a, m_a_seg, 2) self.train() return x_ab, x_ba @@ -311,37 +316,51 @@ def gen_update( """ self.gen_opt.zero_grad() - # encode + mask_a_seg = self.merge_seg_with_mask(x_a, mask_a) + mask_b_seg = self.merge_seg_with_mask(x_b, mask_b) + + avg_mask_a = self.average_mask(x_a, mask_a) + avg_mask_b = self.average_mask(x_b, mask_b) + x_a_augment = torch.cat([x_a, mask_a], dim=1) x_b_augment = torch.cat([x_b, mask_b], dim=1) - c_a = self.gen.encode(x_a, 1) - c_b = self.gen.encode(x_b, 2) + # encode + c_a = self.gen.encode(x_a_augment, 1) + c_b = self.gen.encode(x_b_augment, 2) # decode (within domain) - x_a_recon = self.gen.decode(c_a, mask_a, 1) - x_b_recon = self.gen.decode(c_b, mask_b, 2) + x_a_recon = self.gen.decode(c_a, mask_a_seg, 1) + x_b_recon = self.gen.decode(c_b, mask_b_seg, 2) - x_ba = self.gen.decode(c_b, mask_b, 1) - x_ab = self.gen.decode(c_a, mask_a, 2) + x_ba = self.gen.decode(c_b, mask_b_seg, 1) + x_ab = self.gen.decode(c_a, mask_a_seg, 2) x_ba_augment = torch.cat([x_ba, mask_b], dim=1) x_ab_augment = torch.cat([x_ab, mask_a], dim=1) + # encode again - c_b_recon = self.gen.encode(x_ba, 1) - c_a_recon = self.gen.encode(x_ab, 2) + c_b_recon = self.gen.encode(x_ba_augment, 1) + c_a_recon = self.gen.encode(x_ab_augment, 2) # decode again (if needed) x_aba = ( - self.gen.decode(c_a_recon, mask_a, 1) if hyperparameters["recon_x_cyc_w"] > 0 else None + self.gen.decode(c_a_recon, mask_a_seg, 1) + if hyperparameters["recon_x_cyc_w"] > 0 + else None ) x_bab = ( - self.gen.decode(c_b_recon, mask_b, 2) if hyperparameters["recon_x_cyc_w"] > 0 else None + self.gen.decode(c_b_recon, mask_b_seg, 2) + if hyperparameters["recon_x_cyc_w"] > 0 + else None ) # reconstruction loss - self.loss_gen_recon_x_a = self.recon_criterion(x_a_recon, x_a) - self.loss_gen_recon_x_b = self.recon_criterion(x_b_recon, x_b) + # self.loss_gen_recon_x_a = self.recon_criterion(x_a_recon, x_a * (1.0 - mask_a) + avg_mask_a) + # self.loss_gen_recon_x_b = self.recon_criterion(x_b_recon, x_b * (1.0 - mask_b) + avg_mask_b) + + self.loss_gen_recon_x_a = self.recon_criterion_mask(x_a_recon, x_a, mask_a) + self.loss_gen_recon_x_b = self.recon_criterion_mask(x_b_recon, x_b, mask_b) # Contex preserving loss self.context_loss = self.recon_criterion_mask( @@ -407,17 +426,6 @@ def gen_update( self.loss_gen_vgg_b = ( self.compute_vgg_loss(x_ab, x_a, mask_a) if hyperparameters["vgg_w"] > 0 else 0 ) - """ self.loss_destruct_vgg_a = ( - torch.exp(-self.compute_vgg_loss(x_ba, x_b, 1.0 - mask_b) * 0.01) - if hyperparameters["vgg_w"] > 0 - else 0 - ) - self.loss_destruct_vgg_b = ( - torch.exp(-self.compute_vgg_loss(x_ab, x_a, 1.0 - mask_a) * 0.01) - if hyperparameters["vgg_w"] > 0 - else 0 - ) - """ # semantic-segmentation loss self.loss_sem_seg = ( @@ -495,8 +503,7 @@ def gen_update( if hyperparameters["vgg_w"] > 0: comet_exp.log_metric("loss_gen_vgg_a", self.loss_gen_vgg_a.cpu().detach()) comet_exp.log_metric("loss_gen_vgg_b", self.loss_gen_vgg_b.cpu().detach()) - # comet_exp.log_metric("loss_destruct_vgg_a", self.loss_destruct_vgg_a.cpu().detach()) - # comet_exp.log_metric("loss_destruct_vgg_b", self.loss_destruct_vgg_b.cpu().detach()) + if hyperparameters["semantic_w"] > 0: comet_exp.log_metric("loss_sem_seg", self.loss_sem_seg.cpu().detach()) if hyperparameters["context_w"] > 0: @@ -670,6 +677,57 @@ def compute_semantic_seg_loss(self, img1, img2, mask=None, ground_truth=None): loss = nn.CrossEntropyLoss()(output, target) return loss + def merge_seg_with_mask(self, img, mask): + """ + Compute semantic segmentation loss between two images on the unmasked region or in the entire image + Arguments: + img1 {torch.Tensor} -- Image from domain A after transform in tensor format + img2 {torch.Tensor} -- Image transformed + mask {torch.Tensor} -- Binary mask where we force the loss to be zero + ground_truth {torch.Tensor} -- If available palletized image of size (batch, h, w) + Returns: + torch.float -- Cross entropy loss on the unmasked region + """ + + # denorm + img_denorm = (img + 1) / 2.0 + + # norm for semantic seg network + input_transformed = seg_batch_transform(img_denorm) + + # compute labels from original image and logits from translated version + # target = ( + # self.segmentation_model(input_transformed1).max(1)[1] + # ) + # Infer x_ab or x_ba + output = self.segmentation_model(input_transformed) + max_value = output.size()[1] + max_value = mask_value = max_value + 1 # make masked value the largest class + output = output.argmax(1).unsqueeze(1) + + # Zero out masked values: + output = output * (1 - mask) + (mask * mask_value) + output_mask = output.to(torch.float) / max_value + + return output_mask + + def average_mask(self, img, mask): + """ + # Apply mask: + mask_region = img * mask + + # Get average + avg = torch.mean(mask_region, dim=(2, 3)) + avg = avg.unsqueeze(-1).unsqueeze(-1) + + masked_avg = mask * avg + """ + + # Return random noise + masked_avg = torch.normal(mean=0, std=1, size=mask.size(), device="cuda") + masked_avg = masked_avg * mask + return masked_avg + def sample(self, x_a, x_b, m_a, m_b): """ Infer the model on a batch of image @@ -683,7 +741,13 @@ def sample(self, x_a, x_b, m_a, m_b): Or if self.semantic_w is true: x_a, autoencode(x_a), Semantic segmentation x_a, x_ab_1,semantic segmentation x_ab_1, x_ab_2 """ + self.eval() + m_a_seg = self.merge_seg_with_mask(x_a, m_a) + m_b_seg = self.merge_seg_with_mask(x_b, m_b) + + avg_mask_a = self.average_mask(x_a, m_a) + avg_mask_b = self.average_mask(x_b, m_b) x_a_recon, x_b_recon, x_ba1, x_ba2, x_ab1, x_ab2 = [], [], [], [], [], [] @@ -691,16 +755,16 @@ def sample(self, x_a, x_b, m_a, m_b): x_b_augment = torch.cat([x_b, m_b], dim=1) for i in range(x_a.size(0)): - c_a = self.gen.encode(x_a[i].unsqueeze(0), 1) - c_b = self.gen.encode(x_b[i].unsqueeze(0), 2) + c_a = self.gen.encode(x_a_augment[i].unsqueeze(0), 1) + c_b = self.gen.encode(x_b_augment[i].unsqueeze(0), 2) - x_a_recon.append(self.gen.decode(c_a, m_a[i].unsqueeze(0), 1)) - x_b_recon.append(self.gen.decode(c_b, m_b[i].unsqueeze(0), 2)) + x_a_recon.append(self.gen.decode(c_a, m_a_seg[i].unsqueeze(0), 1)) + x_b_recon.append(self.gen.decode(c_b, m_b_seg[i].unsqueeze(0), 2)) - x_ba1.append(self.gen.decode(c_b, m_b[i].unsqueeze(0), 1)) # s_a1[i].unsqueeze(0))) - x_ba2.append(self.gen.decode(c_b, m_b[i].unsqueeze(0), 1)) # s_a2[i].unsqueeze(0))) - x_ab1.append(self.gen.decode(c_a, m_a[i].unsqueeze(0), 2)) # s_b1[i].unsqueeze(0))) - x_ab2.append(self.gen.decode(c_a, m_a[i].unsqueeze(0), 2)) # s_b2[i].unsqueeze(0))) + x_ba1.append(self.gen.decode(c_b, m_b_seg[i].unsqueeze(0), 1)) # s_a1[i].unsqueeze(0))) + x_ba2.append(self.gen.decode(c_b, m_b_seg[i].unsqueeze(0), 1)) # s_a2[i].unsqueeze(0))) + x_ab1.append(self.gen.decode(c_a, m_a_seg[i].unsqueeze(0), 2)) # s_b1[i].unsqueeze(0))) + x_ab2.append(self.gen.decode(c_a, m_a_seg[i].unsqueeze(0), 2)) # s_b2[i].unsqueeze(0))) x_a_recon, x_b_recon = torch.cat(x_a_recon), torch.cat(x_b_recon) x_ba1, x_ba2 = torch.cat(x_ba1), torch.cat(x_ba2) @@ -905,16 +969,25 @@ def dis_update(self, x_a, x_b, m_a, m_b, hyperparameters, comet_exp=None): Keyword Arguments: comet_exp {cometExperience} -- CometML object use to log all the loss and images (default: {None}) """ + self.dis_opt.zero_grad() + + m_a_seg = self.merge_seg_with_mask(x_a, m_a) + m_b_seg = self.merge_seg_with_mask(x_b, m_b) + x_a_augment = torch.cat([x_a, m_a], dim=1) x_b_augment = torch.cat([x_b, m_b], dim=1) + # Replace masked region with average + + avg_mask_a = self.average_mask(x_a, m_a) + avg_mask_b = self.average_mask(x_b, m_b) # encode - c_a = self.gen.encode(x_a, 1) - c_b = self.gen.encode(x_b, 2) + c_a = self.gen.encode(x_a_augment, 1) + c_b = self.gen.encode(x_b_augment, 2) # decode (cross domain) - x_ba = self.gen.decode(c_b, m_b, 1) - x_ab = self.gen.decode(c_a, m_a, 2) + x_ba = self.gen.decode(c_b, m_b_seg, 1) + x_ab = self.gen.decode(c_a, m_a_seg, 2) x_ba_augment = torch.cat([x_ba, m_b], dim=1) x_ab_augment = torch.cat([x_ab, m_a], dim=1) @@ -1076,6 +1149,8 @@ def resume(self, checkpoint_dir, hyperparameters): state_dict = torch.load(last_model_name) self.dis_a.load_state_dict(state_dict["a"]) self.dis_b.load_state_dict(state_dict["b"]) + self.dis_a_masked.load_state_dict(state_dict["a_masked"]) + self.dis_b_masked.load_state_dict(state_dict["b_masked"]) # Load optimizers state_dict = torch.load(os.path.join(checkpoint_dir, "optimizer.pt")) self.dis_opt.load_state_dict(state_dict["dis"]) @@ -1107,7 +1182,15 @@ def save(self, snapshot_dir, iterations): opt_name = os.path.join(snapshot_dir, "optimizer.pt") torch.save({"2": self.gen.state_dict()}, gen_name) - torch.save({"a": self.dis_a.state_dict(), "b": self.dis_b.state_dict()}, dis_name) + torch.save( + { + "a": self.dis_a.state_dict(), + "b": self.dis_b.state_dict(), + "a_masked": self.dis_a_masked.state_dict(), + "b_masked": self.dis_b_masked.state_dict(), + }, + dis_name, + ) if self.domain_classif_ab: torch.save({"d": self.domain_classifier.state_dict()}, domain_classifier_name) torch.save( @@ -1122,4 +1205,3 @@ def save(self, snapshot_dir, iterations): torch.save( {"gen": self.gen_opt.state_dict(), "dis": self.dis_opt.state_dict()}, opt_name, ) - diff --git a/scripts/utils.py b/scripts/utils.py index 3eb7c00..25cf170 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -225,7 +225,9 @@ def get_data_loader_list( transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ] transform_list = ( - [transforms.RandomCrop((height, width))] + transform_list if crop else transform_list + [transforms.RandomCrop((height, width))] + transform_list + if crop + else transform_list ) transform_list = ( [transforms.Resize((new_size, new_size))] + transform_list @@ -233,7 +235,9 @@ def get_data_loader_list( else transform_list ) transform_list = ( - [transforms.RandomHorizontalFlip()] + transform_list if train else transform_list + [transforms.RandomHorizontalFlip()] + transform_list + if train + else transform_list ) transform = transforms.Compose(transform_list) dataset = ImageFilelist(root, file_list, transform=transform) @@ -314,7 +318,9 @@ def transform(self, image, mask): image = resize(image) to_tensor = transforms.ToTensor() # Random crop - i, j, h, w = transforms.RandomCrop.get_params(image, output_size=(self.height, self.width)) + i, j, h, w = transforms.RandomCrop.get_params( + image, output_size=(self.height, self.width) + ) image = F.crop(image, i, j, h, w) if type(mask) is not torch.Tensor: @@ -436,7 +442,9 @@ def __len__(self): return len(self.image_paths) -def get_fid_data_loader(file_list_a, file_list_b, batch_size, train, new_size=256, num_workers=4): +def get_fid_data_loader( + file_list_a, file_list_b, batch_size, train, new_size=256, num_workers=4 +): """ Masks and images lists-based data loader with transformations (horizontal flip, resizing, random crop, normalization are handled) @@ -518,7 +526,7 @@ def transform(self, image_a, image_b, mask, semantic_a, semantic_b): # print('dim image after resize',image.size) # Resize mask - #mask = mask.resize((image_b.width, image_b.height), Image.NEAREST) + # mask = mask.resize((image_b.width, image_b.height), Image.NEAREST) mask = resize(mask) semantic_a = semantic_a.resize((image_b.width, image_b.height), Image.NEAREST) semantic_b = semantic_b.resize((image_b.width, image_b.height), Image.NEAREST) @@ -627,7 +635,14 @@ def get_synthetic_data_loader( loader -- data loader with transformed dataset """ dataset = MyDatasetSynthetic( - file_list_a, file_list_b, mask_list, sem_list_a, sem_list_b, new_size, height, width, + file_list_a, + file_list_b, + mask_list, + sem_list_a, + sem_list_b, + new_size, + height, + width, ) loader = DataLoader( dataset=dataset, @@ -682,7 +697,14 @@ def get_data_loader_mask_and_im( def get_data_loader_folder( - input_folder, batch_size, train, new_size=None, height=256, width=256, num_workers=4, crop=True, + input_folder, + batch_size, + train, + new_size=None, + height=256, + width=256, + num_workers=4, + crop=True, ): """ Folder-based data loader with transformations @@ -711,7 +733,9 @@ def get_data_loader_folder( transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ] transform_list = ( - [transforms.RandomCrop((height, width))] + transform_list if crop else transform_list + [transforms.RandomCrop((height, width))] + transform_list + if crop + else transform_list ) transform_list = ( [transforms.Resize((new_size, new_size))] + transform_list @@ -719,7 +743,9 @@ def get_data_loader_folder( else transform_list ) transform_list = ( - [transforms.RandomHorizontalFlip()] + transform_list if train else transform_list + [transforms.RandomHorizontalFlip()] + transform_list + if train + else transform_list ) transform = transforms.Compose(transform_list) dataset = ImageFolder(input_folder, transform=transform) @@ -763,14 +789,18 @@ def __write_images(image_outputs, display_image_num, file_name): image_outputs = [ images.expand(-1, 3, -1, -1) for images in image_outputs ] # expand gray-scale images to 3 channels - image_tensor = torch.cat([images[:display_image_num] for images in image_outputs], 0) + image_tensor = torch.cat( + [images[:display_image_num] for images in image_outputs], 0 + ) image_grid = vutils.make_grid( image_tensor.data, nrow=display_image_num, padding=0, normalize=True ) vutils.save_image(image_grid, file_name, nrow=1) -def write_2images(image_outputs, display_image_num, image_directory, postfix, comet_exp=None): +def write_2images( + image_outputs, display_image_num, image_directory, postfix, comet_exp=None +): """Write images from both worlds a and b of the cycle A-B-A as jpg Arguments: image_outputs {Tensor list} -- list of images, the first half being outputs in B, @@ -859,7 +889,9 @@ def get_slerp_interp(nb_latents, nb_interp, z_dim): low = np.random.randn(z_dim) high = np.random.randn(z_dim) # low + np.random.randn(512) * 0.7 interp_vals = np.linspace(0, 1, num=nb_interp) - latent_interp = np.array([slerp(v, low, high) for v in interp_vals], dtype=np.float32) + latent_interp = np.array( + [slerp(v, low, high) for v in interp_vals], dtype=np.float32 + ) latent_interps = np.vstack((latent_interps, latent_interp)) return latent_interps[:, :, np.newaxis, np.newaxis] @@ -919,7 +951,10 @@ def __init__(self, num_classes=1000): # Load the pretrained weights, remove avg pool # layer and get the output stride of 8 resnet34_8s = resnet34( - fully_conv=True, pretrained=True, output_stride=8, remove_avg_pool_layer=True, + fully_conv=True, + pretrained=True, + output_stride=8, + remove_avg_pool_layer=True, ) # Randomly initialize the 1x1 Conv scoring layer @@ -1070,7 +1105,9 @@ def get_scheduler(optimizer, hyperparameters, iterations=-1): def weights_init(init_type="gaussian"): def init_fun(m): classname = m.__class__.__name__ - if (classname.find("Conv") == 0 or classname.find("Linear") == 0) and hasattr(m, "weight"): + if (classname.find("Conv") == 0 or classname.find("Linear") == 0) and hasattr( + m, "weight" + ): # print m.__class__.__name__ if init_type == "gaussian": init.normal_(m.weight.data, 0.0, 0.02) @@ -1279,7 +1316,9 @@ def __init__( self.stride = stride self.downsample = downsample if stride != 1 or inplanes != planes: - self.downsample = nn.Sequential(conv1x1(inplanes, planes, stride), norm_layer(planes)) + self.downsample = nn.Sequential( + conv1x1(inplanes, planes, stride), norm_layer(planes) + ) def forward(self, x): identity = x @@ -1408,12 +1447,11 @@ def p(d, prefix="", vals=[]): return dict(values_list) -def sorted_nicely( l ): - """ Sort the given iterable in the way that humans expect.""" - convert = lambda text: int(text) if text.isdigit() else text - alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] - return sorted(l, key = alphanum_key) - +def sorted_nicely(l): + """ Sort the given iterable in the way that humans expect.""" + convert = lambda text: int(text) if text.isdigit() else text + alphanum_key = lambda key: [convert(c) for c in re.split("([0-9]+)", key)] + return sorted(l, key=alphanum_key)